Compare commits

...

22 Commits

Author SHA1 Message Date
Yingwen
5b5d953d56 ci: tolerate error while building arm64 releases (#1143)
* ci: allow failure while building arm64 docker

* ci: Remove continue-on-error on docker step
2023-03-08 21:11:40 +08:00
Yingwen
3f6cbc378d ci: Disable arm64 release temporarily (#1141) 2023-03-08 19:13:00 +08:00
Yingwen
9619940569 ci: Allow error when building release for non-x86 platform (#1140) 2023-03-08 18:12:06 +08:00
Weny Xu
ed8252157a chore: code styling (#1137)
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-08 08:10:12 +00:00
Ruihang Xia
3e0fb7e75b test: ignore two test cases due to arrow-datafusion#5513 (#1138)
* test: ignore two test cases due to arrow-datafusion#5513

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-08 07:29:34 +00:00
Bohan Wu
ba3ce436df refactor(SST): UUID as id in FileMeta (#1116)
* feat(SST): use a newType named FileId for FileMeta

* chore: rename some functions

* fix: compatible for previous FileMeta format

* fix: alias for file_id when getting deserialized
2023-03-08 14:27:20 +08:00
Eugene Tolbakov
b31a6cb506 refactor: replace tempdir with tempfile (#1123)
* refactor: replace tempdir with tempfile

* refactor(query): move tempfile dependency under the workspace's Cargo.toml

* refactor(tempfile): create common-test-util

* refactor(tempfile): fix toml format

* refactor(tempfile): remove tempfile out of dependencies

* refactor(tempfile): fix incorrect toml
2023-03-08 11:15:56 +08:00
SSebo
95090592f0 feat: mysql prepare replacing sql placeholder to param (#1086)
* feat: mysql prepare by replace ? in sql to param

* chore: mysql prepare statment support time param

* chore: prepare test more types

* chore: add TODO
2023-03-08 11:02:29 +08:00
Ruihang Xia
3a527c0fd5 feat: impl proc macro range_fn and some aggr_over_time functions (#1072)
* impl range_fn proc macro

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl some aggr_over_time fn

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl present_over_time and absent_over_time

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* accomplish planner, and correct type cast

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* document the macro

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix styles

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update irate/idelta test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add test cases

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-07 23:39:45 +08:00
elijah
819b60ca13 feat(datatypes): implement VectorOp::take (#1115)
* feat: add take index method for VectorOp

* chore: make clippy happy

* chore: make clippy happy

* chore: improve the code

* chore: improve the code

* chore: add take null test

* chore: fix clippy
2023-03-07 19:27:33 +08:00
Weny Xu
7169fe2989 feat: implement Copy From (#1064) 2023-03-07 17:54:11 +08:00
Zheming Li
b70672be77 feat: track disk usage of regions (#1125)
* feat: track disk usage of regions

Signed-off-by: Zheming Li <nkdudu@126.com>

* calculate disk usage when call

* add default on file meta

---------

Signed-off-by: Zheming Li <nkdudu@126.com>
2023-03-07 17:13:12 +08:00
Lei, HUANG
a4c01f4a3a feat: memory profiling (#1124)
* feat: use jemalloc as default allocator

* feat: add feature for mem-prof

* feat: add errors

* make common-mem-prof optional dep

* fix: toml format

* doc: add profile doc

* fix: typo
2023-03-07 17:12:51 +08:00
Weny Xu
bd98a26cca chore: bump greptime-proto to latest(ad01872) (#1102) 2023-03-07 10:52:42 +08:00
shuiyisong
1b4236d698 refactor: use split instead of serde_urlencoded in http auth (#1110)
* refactor: change from urlencoded to regex

* refactor: change from urlencoded to regex

* chore: add unit test

* chore: update comment

* chore: remove local benchmark test

* chore: minor fix

* chore: remove unused dep
2023-03-07 10:51:47 +08:00
Lei, HUANG
e8cc9b4b29 test: add manifest compatibility tests (#1130)
* tests: add manifest compatibility tests

* fix: clippy
2023-03-06 19:31:54 +08:00
discord9
379f581780 test: add Integrated Test for Coprocessor& fix minor bugs (#1122)
* feat: cache `Runtime`

* fix: coprstream schema not set

* test: integrated tests for Coprocessor

* fix: UDF fixed

* style: remove unused import

* chore: remove more unused import

* feat: `filter`, (r)floordiv for Vector

* chore: CR advices

* feat: auto convert to `lit`

* chore: fix typo

* feat: from&to `pyarrow.array`

* feat: allow `pyarrow.array` as args to builtins

* chore: cargo fmt

* test: CI add `pyarrow`

* test: install Python&PyArrow in CI

* test: not cache depend for now

* chore: CR advices

* test: fix name

* style: rename
2023-03-06 19:20:59 +08:00
fys
ff6cfe8e70 refactor: move the batch_get to KvStore trait (#1029)
* move batch_get from KvStoreExt to KvStore

* add some unit tests

* add some unit test

* add some unit tests

* expose batch_get grpc method
2023-03-06 17:35:43 +08:00
Igor Morozov
5a397917c0 docs(contributingmd): add run tests commands (#1129)
* docs(contributingmd): add run tests commands

* docs(contributingmd): add link to nextest website

Co-authored-by: dennis zhuang <killme2008@gmail.com>

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
2023-03-06 15:54:16 +08:00
fys
559880cb84 fix: can not find catalog when create table (#1118)
* fix: get catalog by name in RemoteCatalogManager

* cr

* cr

* cr

* fix: ut failed
2023-03-06 14:44:40 +08:00
Ruihang Xia
b76b27f3bf refactor: try to remove unnecessary tests in error mod (#750)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2023-03-06 12:31:30 +08:00
yuanbohan
d4e0dc3685 feat: specify prom server start addr (#1111)
* feat: specify promql server start addr

* refactor: rename promql to prom in Prometheus API server scenario
2023-03-06 11:07:21 +08:00
153 changed files with 4370 additions and 1174 deletions

View File

@@ -207,6 +207,12 @@ jobs:
uses: Swatinem/rust-cache@v2
- name: Install latest nextest release
uses: taiki-e/install-action@nextest
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install PyArrow Package
run: pip install pyarrow
- name: Install cargo-llvm-cov
uses: taiki-e/install-action@cargo-llvm-cov
- name: Collect coverage data

View File

@@ -30,16 +30,21 @@ jobs:
- arch: x86_64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-amd64
continue-on-error: false
- arch: aarch64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-arm64
continue-on-error: true
- arch: aarch64-apple-darwin
os: macos-latest
file: greptime-darwin-arm64
continue-on-error: true
- arch: x86_64-apple-darwin
os: macos-latest
file: greptime-darwin-amd64
continue-on-error: true
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.continue-on-error }}
if: github.repository == 'GreptimeTeam/greptimedb'
steps:
- name: Checkout sources
@@ -181,30 +186,6 @@ jobs:
- name: Checkout sources
uses: actions/checkout@v3
- name: Download amd64 binary
uses: actions/download-artifact@v3
with:
name: greptime-linux-amd64
path: amd64
- name: Unzip the amd64 artifacts
run: |
cd amd64
tar xvf greptime-linux-amd64.tgz
rm greptime-linux-amd64.tgz
- name: Download arm64 binary
uses: actions/download-artifact@v3
with:
name: greptime-linux-arm64
path: arm64
- name: Unzip the arm64 artifacts
run: |
cd arm64
tar xvf greptime-linux-arm64.tgz
rm greptime-linux-arm64.tgz
- name: Login to UCloud Container Registry
uses: docker/login-action@v2
with:
@@ -239,8 +220,37 @@ jobs:
- name: Set up buildx
uses: docker/setup-buildx-action@v2
- name: Build and push
- name: Download amd64 binary
uses: actions/download-artifact@v3
with:
name: greptime-linux-amd64
path: amd64
- name: Unzip the amd64 artifacts
id: unzip-amd64
run: |
cd amd64
tar xvf greptime-linux-amd64.tgz
rm greptime-linux-amd64.tgz
- name: Download arm64 binary
id: download-arm64
uses: actions/download-artifact@v3
with:
name: greptime-linux-arm64
path: arm64
- name: Unzip the arm64 artifacts
id: unzip-arm64
if: success() || steps.download-arm64.conclusion == 'success'
run: |
cd arm64
tar xvf greptime-linux-arm64.tgz
rm greptime-linux-arm64.tgz
- name: Build and push all
uses: docker/build-push-action@v3
if: success() || steps.unzip-arm64.conclusion == 'success' # Build and push all platform if unzip-arm64 succeeds
with:
context: .
file: ./docker/ci/Dockerfile
@@ -251,3 +261,17 @@ jobs:
greptime/greptimedb:${{ env.IMAGE_TAG }}
uhub.service.ucloud.cn/greptime/greptimedb:latest
uhub.service.ucloud.cn/greptime/greptimedb:${{ env.IMAGE_TAG }}
- name: Build and push amd64 only
uses: docker/build-push-action@v3
if: success() || steps.unzip-arm64.conclusion == 'failure' # Only build and push amd64 platform if unzip-arm64 fails
with:
context: .
file: ./docker/ci/Dockerfile
push: true
platforms: linux/amd64
tags: |
greptime/greptimedb:latest
greptime/greptimedb:${{ env.IMAGE_TAG }}
uhub.service.ucloud.cn/greptime/greptimedb:latest
uhub.service.ucloud.cn/greptime/greptimedb:${{ env.IMAGE_TAG }}

View File

@@ -50,7 +50,7 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim
- To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process.
- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/).
- Make sure all unit tests are passed.
- Make sure all unit tests are passed (using `cargo test --workspace` or [nextest](https://nexte.st/index.html) `cargo nextest run`).
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr`).
#### `pre-commit` Hooks

255
Cargo.lock generated
View File

@@ -209,6 +209,7 @@ dependencies = [
"arrow-select",
"arrow-string",
"comfy-table",
"pyo3",
]
[[package]]
@@ -390,6 +391,7 @@ version = "33.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb327717d87eb94be5eff3b0cb8987f54059d343ee5235abf7f143c85f54cfc8"
dependencies = [
"bitflags",
"serde",
]
@@ -677,7 +679,7 @@ dependencies = [
"getrandom",
"instant",
"pin-project-lite",
"rand 0.8.5",
"rand",
"tokio",
]
@@ -689,7 +691,7 @@ checksum = "6cd1a59bc091e593ee9ed62df4e4a07115e00a0e0a52fd7e0e04540773939b80"
dependencies = [
"futures",
"pin-project",
"rand 0.8.5",
"rand",
"tokio",
]
@@ -701,7 +703,7 @@ checksum = "f34fac4d7cdaefa2deded0eda2d5d59dbfd43370ff3f856209e72340ae84c294"
dependencies = [
"futures",
"pin-project",
"rand 0.8.5",
"rand",
"tokio",
]
@@ -1099,7 +1101,9 @@ dependencies = [
"common-recordbatch",
"common-runtime",
"common-telemetry",
"common-test-util",
"common-time",
"dashmap",
"datafusion",
"datatypes",
"futures",
@@ -1109,6 +1113,7 @@ dependencies = [
"meta-client",
"mito",
"object-store",
"parking_lot",
"regex",
"serde",
"serde_json",
@@ -1116,7 +1121,6 @@ dependencies = [
"snafu",
"storage",
"table",
"tempdir",
"tokio",
]
@@ -1353,7 +1357,7 @@ dependencies = [
"futures-util",
"parking_lot",
"prost",
"rand 0.8.5",
"rand",
"snafu",
"substrait 0.1.0",
"substrait 0.4.0",
@@ -1397,6 +1401,7 @@ dependencies = [
"common-query",
"common-recordbatch",
"common-telemetry",
"common-test-util",
"datanode",
"either",
"frontend",
@@ -1413,8 +1418,8 @@ dependencies = [
"session",
"snafu",
"substrait 0.1.0",
"tempdir",
"tempfile",
"tikv-jemalloc-ctl",
"tikv-jemallocator",
"tokio",
"toml",
]
@@ -1474,7 +1479,6 @@ dependencies = [
"serde",
"serde_json",
"snafu",
"tempdir",
"tokio",
]
@@ -1516,6 +1520,7 @@ dependencies = [
"arc-swap",
"common-query",
"datatypes",
"proc-macro2",
"quote",
"snafu",
"static_assertions",
@@ -1541,7 +1546,7 @@ dependencies = [
"flatbuffers",
"futures",
"prost",
"rand 0.8.5",
"rand",
"snafu",
"tokio",
"tonic",
@@ -1566,6 +1571,19 @@ dependencies = [
"table",
]
[[package]]
name = "common-mem-prof"
version = "0.1.0"
dependencies = [
"common-error",
"snafu",
"tempfile",
"tikv-jemalloc-ctl",
"tikv-jemalloc-sys",
"tikv-jemallocator",
"tokio",
]
[[package]]
name = "common-procedure"
version = "0.1.0"
@@ -1574,6 +1592,7 @@ dependencies = [
"common-error",
"common-runtime",
"common-telemetry",
"common-test-util",
"futures",
"futures-util",
"object-store",
@@ -1581,7 +1600,6 @@ dependencies = [
"serde_json",
"smallvec",
"snafu",
"tempdir",
"tokio",
"uuid",
]
@@ -1656,13 +1674,20 @@ dependencies = [
"tracing-subscriber",
]
[[package]]
name = "common-test-util"
version = "0.1.0"
dependencies = [
"tempfile",
]
[[package]]
name = "common-time"
version = "0.1.0"
dependencies = [
"chrono",
"common-error",
"rand 0.8.5",
"rand",
"serde",
"serde_json",
"snafu",
@@ -2112,7 +2137,7 @@ dependencies = [
"paste",
"percent-encoding",
"pin-project-lite",
"rand 0.8.5",
"rand",
"smallvec",
"sqlparser",
"tempfile",
@@ -2189,7 +2214,7 @@ dependencies = [
"md-5",
"num-traits",
"paste",
"rand 0.8.5",
"rand",
"regex",
"sha2",
"unicode-segmentation",
@@ -2204,7 +2229,7 @@ dependencies = [
"arrow",
"datafusion-common",
"paste",
"rand 0.8.5",
"rand",
]
[[package]]
@@ -2224,6 +2249,7 @@ name = "datanode"
version = "0.1.0"
dependencies = [
"api",
"async-compat",
"async-stream",
"async-trait",
"axum",
@@ -2242,12 +2268,14 @@ dependencies = [
"common-recordbatch",
"common-runtime",
"common-telemetry",
"common-test-util",
"common-time",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datatypes",
"futures",
"futures-util",
"humantime-serde",
"hyper",
"log-store",
@@ -2259,6 +2287,7 @@ dependencies = [
"pin-project",
"prost",
"query",
"regex",
"script",
"serde",
"serde_json",
@@ -2271,13 +2300,13 @@ dependencies = [
"substrait 0.1.0",
"table",
"table-procedure",
"tempdir",
"tokio",
"tokio-stream",
"toml",
"tonic",
"tower",
"tower-http",
"url",
]
[[package]]
@@ -2630,7 +2659,7 @@ checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c"
dependencies = [
"log",
"once_cell",
"rand 0.8.5",
"rand",
]
[[package]]
@@ -2747,6 +2776,7 @@ dependencies = [
"common-recordbatch",
"common-runtime",
"common-telemetry",
"common-test-util",
"datafusion",
"datafusion-common",
"datafusion-expr",
@@ -2773,7 +2803,6 @@ dependencies = [
"strfmt",
"substrait 0.1.0",
"table",
"tempdir",
"tokio",
"toml",
"tonic",
@@ -2854,12 +2883,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]]
name = "funty"
version = "2.0.0"
@@ -3039,7 +3062,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=1599ae2a0d1d8f42ee23ed26e4ad7a7b34134c60#1599ae2a0d1d8f42ee23ed26e4ad7a7b34134c60"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=ad0187295035e83f76272da553453e649b7570de#ad0187295035e83f76272da553453e649b7570de"
dependencies = [
"prost",
"tonic",
@@ -3186,9 +3209,9 @@ dependencies = [
[[package]]
name = "http"
version = "0.2.8"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399"
checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482"
dependencies = [
"bytes",
"fnv",
@@ -3707,6 +3730,7 @@ dependencies = [
"common-error",
"common-runtime",
"common-telemetry",
"common-test-util",
"crc",
"futures",
"futures-util",
@@ -3714,10 +3738,9 @@ dependencies = [
"protobuf",
"protobuf-build",
"raft-engine",
"rand 0.8.5",
"rand",
"snafu",
"store-api",
"tempdir",
"tokio",
"tokio-util",
]
@@ -3959,7 +3982,7 @@ dependencies = [
"etcd-client",
"futures",
"meta-srv",
"rand 0.8.5",
"rand",
"serde",
"serde_json",
"snafu",
@@ -4122,6 +4145,7 @@ dependencies = [
"common-query",
"common-recordbatch",
"common-telemetry",
"common-test-util",
"common-time",
"datafusion",
"datafusion-common",
@@ -4135,7 +4159,6 @@ dependencies = [
"storage",
"store-api",
"table",
"tempdir",
"tokio",
]
@@ -4171,7 +4194,7 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12ca7f22ed370d5991a9caec16a83187e865bc8a532f889670337d5a5689e3a1"
dependencies = [
"rand_core 0.6.4",
"rand_core",
]
[[package]]
@@ -4239,7 +4262,7 @@ dependencies = [
"lexical",
"num-bigint",
"num-traits",
"rand 0.8.5",
"rand",
"regex",
"rust_decimal",
"saturating",
@@ -4266,7 +4289,7 @@ dependencies = [
"num-complex",
"num-rational",
"num-traits",
"rand 0.8.5",
"rand",
"rand_distr",
"simba",
"typenum",
@@ -4404,7 +4427,7 @@ dependencies = [
"num-integer",
"num-iter",
"num-traits",
"rand 0.8.5",
"rand",
"smallvec",
"zeroize",
]
@@ -4526,10 +4549,10 @@ dependencies = [
"anyhow",
"async-trait",
"common-telemetry",
"common-test-util",
"futures",
"lru 0.9.0",
"opendal",
"tempdir",
"tokio",
"uuid",
]
@@ -4648,7 +4671,7 @@ dependencies = [
"lazy_static",
"percent-encoding",
"pin-project",
"rand 0.8.5",
"rand",
"thiserror",
"tokio",
"tokio-stream",
@@ -4966,7 +4989,7 @@ dependencies = [
"log",
"md5",
"postgres-types",
"rand 0.8.5",
"rand",
"ring",
"stringprep",
"thiserror",
@@ -5022,7 +5045,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared 0.10.0",
"rand 0.8.5",
"rand",
]
[[package]]
@@ -5032,7 +5055,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf"
dependencies = [
"phf_shared 0.11.1",
"rand 0.8.5",
"rand",
]
[[package]]
@@ -5201,7 +5224,7 @@ dependencies = [
"hmac",
"md-5",
"memchr",
"rand 0.8.5",
"rand",
"sha2",
"stringprep",
]
@@ -5370,6 +5393,7 @@ dependencies = [
"catalog",
"common-catalog",
"common-error",
"common-function-macro",
"datafusion",
"datatypes",
"futures",
@@ -5628,7 +5652,7 @@ dependencies = [
"paste",
"promql",
"promql-parser",
"rand 0.8.5",
"rand",
"serde",
"serde_json",
"session",
@@ -5710,19 +5734,6 @@ dependencies = [
"thiserror",
]
[[package]]
name = "rand"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293"
dependencies = [
"fuchsia-cprng",
"libc",
"rand_core 0.3.1",
"rdrand",
"winapi",
]
[[package]]
name = "rand"
version = "0.8.5"
@@ -5731,7 +5742,7 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core 0.6.4",
"rand_core",
]
[[package]]
@@ -5741,24 +5752,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core 0.6.4",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
dependencies = [
"rand_core 0.4.2",
]
[[package]]
name = "rand_core"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
[[package]]
name = "rand_core"
version = "0.6.4"
@@ -5775,7 +5771,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31"
dependencies = [
"num-traits",
"rand 0.8.5",
"rand",
]
[[package]]
@@ -5815,15 +5811,6 @@ dependencies = [
"num_cpus",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "redox_syscall"
version = "0.2.16"
@@ -5879,15 +5866,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [
"winapi",
]
[[package]]
name = "rend"
version = "0.3.6"
@@ -5917,7 +5895,7 @@ dependencies = [
"once_cell",
"percent-encoding",
"quick-xml",
"rand 0.8.5",
"rand",
"rsa",
"rust-ini",
"serde",
@@ -6070,7 +6048,7 @@ dependencies = [
"num-traits",
"pkcs1",
"pkcs8",
"rand_core 0.6.4",
"rand_core",
"sha2",
"signature",
"subtle",
@@ -6099,7 +6077,7 @@ dependencies = [
"byteorder",
"bytes",
"num-traits",
"rand 0.8.5",
"rand",
"rkyv",
"serde",
"serde_json",
@@ -6238,7 +6216,7 @@ dependencies = [
"num-traits",
"once_cell",
"radium",
"rand 0.8.5",
"rand",
"siphasher",
"unic-ucd-category",
"volatile",
@@ -6383,8 +6361,8 @@ dependencies = [
"parking_lot",
"paste",
"puruspe",
"rand 0.8.5",
"rand_core 0.6.4",
"rand",
"rand_core",
"rustpython-common",
"rustpython-derive",
"rustpython-vm",
@@ -6450,7 +6428,7 @@ dependencies = [
"optional",
"parking_lot",
"paste",
"rand 0.8.5",
"rand",
"result-like",
"rustc_version 0.4.0",
"rustpython-ast",
@@ -6655,6 +6633,7 @@ checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2"
name = "script"
version = "0.1.0"
dependencies = [
"arrow",
"async-trait",
"catalog",
"common-catalog",
@@ -6663,6 +6642,7 @@ dependencies = [
"common-query",
"common-recordbatch",
"common-telemetry",
"common-test-util",
"common-time",
"console",
"crossbeam-utils",
@@ -6695,7 +6675,6 @@ dependencies = [
"storage",
"store-api",
"table",
"tempdir",
"tokio",
"tokio-test",
]
@@ -6901,10 +6880,12 @@ dependencies = [
"common-error",
"common-grpc",
"common-grpc-expr",
"common-mem-prof",
"common-query",
"common-recordbatch",
"common-runtime",
"common-telemetry",
"common-test-util",
"common-time",
"datatypes",
"derive_builder 0.12.0",
@@ -6921,13 +6902,14 @@ dependencies = [
"once_cell",
"openmetrics-parser",
"opensrv-mysql",
"parking_lot",
"pgwire",
"pin-project",
"postgres-types",
"promql-parser",
"prost",
"query",
"rand 0.8.5",
"rand",
"regex",
"rustls",
"rustls-pemfile",
@@ -6935,7 +6917,6 @@ dependencies = [
"script",
"serde",
"serde_json",
"serde_urlencoded",
"session",
"sha1",
"snafu",
@@ -6943,7 +6924,6 @@ dependencies = [
"sql",
"strum",
"table",
"tempdir",
"tokio",
"tokio-postgres",
"tokio-postgres-rustls",
@@ -7038,7 +7018,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fe458c98333f9c8152221191a77e2a44e8325d0193484af2e9421a53019e57d"
dependencies = [
"digest",
"rand_core 0.6.4",
"rand_core",
]
[[package]]
@@ -7287,7 +7267,7 @@ dependencies = [
"lazy_static",
"nalgebra",
"num-traits",
"rand 0.8.5",
"rand",
]
[[package]]
@@ -7318,6 +7298,7 @@ dependencies = [
"common-recordbatch",
"common-runtime",
"common-telemetry",
"common-test-util",
"common-time",
"criterion 0.3.6",
"datafusion-common",
@@ -7332,7 +7313,7 @@ dependencies = [
"paste",
"planus",
"prost",
"rand 0.8.5",
"rand",
"regex",
"serde",
"serde_json",
@@ -7340,7 +7321,6 @@ dependencies = [
"snafu",
"store-api",
"table",
"tempdir",
"tokio",
"tokio-util",
"tonic",
@@ -7588,6 +7568,7 @@ dependencies = [
"common-query",
"common-recordbatch",
"common-telemetry",
"common-test-util",
"common-time",
"datafusion",
"datafusion-common",
@@ -7604,7 +7585,6 @@ dependencies = [
"serde_json",
"snafu",
"store-api",
"tempdir",
"tokio",
"tokio-util",
]
@@ -7618,6 +7598,7 @@ dependencies = [
"common-error",
"common-procedure",
"common-telemetry",
"common-test-util",
"datatypes",
"log-store",
"mito",
@@ -7627,7 +7608,6 @@ dependencies = [
"snafu",
"storage",
"table",
"tempdir",
"tokio",
]
@@ -7649,28 +7629,17 @@ version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae9980cab1db3fceee2f6c6f643d5d8de2997c58ee8d25fb0cc8a9e9e7348e5"
[[package]]
name = "tempdir"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8"
dependencies = [
"rand 0.4.6",
"remove_dir_all",
]
[[package]]
name = "tempfile"
version = "3.3.0"
version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95"
dependencies = [
"cfg-if 1.0.0",
"fastrand",
"libc",
"redox_syscall",
"remove_dir_all",
"winapi",
"rustix",
"windows-sys",
]
[[package]]
@@ -7717,6 +7686,7 @@ dependencies = [
"common-query",
"common-runtime",
"common-telemetry",
"common-test-util",
"datanode",
"datatypes",
"dotenv",
@@ -7725,14 +7695,14 @@ dependencies = [
"object-store",
"once_cell",
"paste",
"rand 0.8.5",
"rand",
"serde",
"serde_json",
"servers",
"snafu",
"sql",
"table",
"tempdir",
"tempfile",
"tokio",
"uuid",
]
@@ -7831,6 +7801,37 @@ dependencies = [
"ordered-float 2.10.0",
]
[[package]]
name = "tikv-jemalloc-ctl"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e37706572f4b151dff7a0146e040804e9c26fe3a3118591112f05cf12a4216c1"
dependencies = [
"libc",
"paste",
"tikv-jemalloc-sys",
]
[[package]]
name = "tikv-jemalloc-sys"
version = "0.5.3+5.3.0-patched"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a678df20055b43e57ef8cddde41cdfda9a3c1a060b67f4c5836dfb1d78543ba8"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "tikv-jemallocator"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20612db8a13a6c06d57ec83953694185a367e16945f66565e8028d2c0bd76979"
dependencies = [
"libc",
"tikv-jemalloc-sys",
]
[[package]]
name = "time"
version = "0.1.45"
@@ -8137,7 +8138,7 @@ dependencies = [
"indexmap",
"pin-project",
"pin-project-lite",
"rand 0.8.5",
"rand",
"slab",
"tokio",
"tokio-util",
@@ -8334,7 +8335,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
dependencies = [
"cfg-if 1.0.0",
"rand 0.8.5",
"rand",
"static_assertions",
]
@@ -8648,7 +8649,7 @@ checksum = "422ee0de9031b5b948b97a8fc04e3aa35230001a722ddd27943e0be31564ce4c"
dependencies = [
"atomic",
"getrandom",
"rand 0.8.5",
"rand",
"serde",
"uuid-macro-internal",
]

View File

@@ -12,12 +12,14 @@ members = [
"src/common/function-macro",
"src/common/grpc",
"src/common/grpc-expr",
"src/common/mem-prof",
"src/common/procedure",
"src/common/query",
"src/common/recordbatch",
"src/common/runtime",
"src/common/substrait",
"src/common/telemetry",
"src/common/test-util",
"src/common/time",
"src/datanode",
"src/datatypes",
@@ -48,7 +50,7 @@ edition = "2021"
license = "Apache-2.0"
[workspace.dependencies]
arrow = "33.0"
arrow = { version = "33.0", features = ["pyarrow"] }
arrow-array = "33.0"
arrow-flight = "33.0"
arrow-schema = { version = "33.0", features = ["serde"] }

View File

@@ -46,8 +46,8 @@ enable = true
[prometheus_options]
enable = true
# PromQL protocol options, see `standalone.example.toml`.
[promql_options]
# Prometheus protocol options, see `standalone.example.toml`.
[prom_options]
addr = "127.0.0.1:4004"
# Metasrv client options, see `datanode.example.toml`.

View File

@@ -71,9 +71,9 @@ enable = true
# Whether to enable Prometheus remote write and read in HTTP API, true by default.
enable = true
# PromQL protocol options.
[promql_options]
# PromQL server address, "127.0.0.1:4004" by default.
# Prom protocol options.
[prom_options]
# Prometheus API server address, "127.0.0.1:4004" by default.
addr = "127.0.0.1:4004"
# WAL options.

View File

@@ -10,7 +10,9 @@ RUN apt-get update && apt-get install -y \
curl \
build-essential \
pkg-config \
python3-dev
python3 \
python3-dev \
&& pip install pyarrow
# Install Rust.
SHELL ["/bin/bash", "-c"]

View File

@@ -10,7 +10,7 @@ common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-time = { path = "../common/time" }
datatypes = { path = "../datatypes" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "1599ae2a0d1d8f42ee23ed26e4ad7a7b34134c60" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "ad0187295035e83f76272da553453e649b7570de" }
prost.workspace = true
snafu = { version = "0.7", features = ["backtraces"] }
tonic.workspace = true

View File

@@ -18,12 +18,14 @@ common-recordbatch = { path = "../common/recordbatch" }
common-runtime = { path = "../common/runtime" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
dashmap = "5.4"
datafusion.workspace = true
datatypes = { path = "../datatypes" }
futures = "0.3"
futures-util.workspace = true
lazy_static = "1.4"
meta-client = { path = "../meta-client" }
parking_lot = "0.12"
regex = "1.6"
serde = "1.0"
serde_json = "1.0"
@@ -34,10 +36,10 @@ table = { path = "../table" }
tokio.workspace = true
[dev-dependencies]
common-test-util = { path = "../common/test-util" }
chrono.workspace = true
log-store = { path = "../log-store" }
mito = { path = "../mito", features = ["test"] }
object-store = { path = "../object-store" }
storage = { path = "../storage" }
tempdir = "0.3"
tokio.workspace = true

View File

@@ -13,7 +13,7 @@
// limitations under the License.
use std::any::Any;
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::pin::Pin;
use std::sync::Arc;
@@ -22,8 +22,10 @@ use async_stream::stream;
use async_trait::async_trait;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_telemetry::{debug, error, info};
use dashmap::DashMap;
use futures::Stream;
use futures_util::StreamExt;
use parking_lot::RwLock;
use snafu::{OptionExt, ResultExt};
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::TableId;
@@ -39,6 +41,7 @@ use crate::error::{
use crate::helper::{
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
CATALOG_KEY_PREFIX,
};
use crate::remote::{Kv, KvBackendRef};
use crate::{
@@ -51,10 +54,9 @@ use crate::{
pub struct RemoteCatalogManager {
node_id: u64,
backend: KvBackendRef,
catalogs: Arc<ArcSwap<HashMap<String, CatalogProviderRef>>>,
catalogs: Arc<RwLock<DashMap<String, CatalogProviderRef>>>,
engine: TableEngineRef,
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
mutex: Arc<Mutex<()>>,
}
impl RemoteCatalogManager {
@@ -65,7 +67,6 @@ impl RemoteCatalogManager {
backend,
catalogs: Default::default(),
system_table_requests: Default::default(),
mutex: Default::default(),
}
}
@@ -386,7 +387,14 @@ impl CatalogManager for RemoteCatalogManager {
"Initialized catalogs: {:?}",
catalogs.keys().cloned().collect::<Vec<_>>()
);
self.catalogs.store(Arc::new(catalogs));
{
let self_catalogs = self.catalogs.read();
catalogs.into_iter().for_each(|(k, v)| {
self_catalogs.insert(k, v);
});
}
info!("Max table id allocated: {}", max_table_id);
let mut system_table_requests = self.system_table_requests.lock().await;
@@ -504,12 +512,10 @@ impl CatalogList for RemoteCatalogManager {
) -> Result<Option<CatalogProviderRef>> {
let key = self.build_catalog_key(&name).to_string();
let backend = self.backend.clone();
let mutex = self.mutex.clone();
let catalogs = self.catalogs.clone();
std::thread::spawn(|| {
common_runtime::block_on_write(async move {
let _guard = mutex.lock().await;
backend
.set(
key.as_bytes(),
@@ -518,11 +524,10 @@ impl CatalogList for RemoteCatalogManager {
.context(InvalidCatalogValueSnafu)?,
)
.await?;
let prev_catalogs = catalogs.load();
let mut new_catalogs = HashMap::with_capacity(prev_catalogs.len() + 1);
new_catalogs.clone_from(&prev_catalogs);
let prev = new_catalogs.insert(name, catalog);
catalogs.store(Arc::new(new_catalogs));
let catalogs = catalogs.read();
let prev = catalogs.insert(name, catalog.clone());
Ok(prev)
})
})
@@ -532,12 +537,65 @@ impl CatalogList for RemoteCatalogManager {
/// List all catalogs from metasrv
fn catalog_names(&self) -> Result<Vec<String>> {
Ok(self.catalogs.load().keys().cloned().collect::<Vec<_>>())
let catalogs = self.catalogs.read();
Ok(catalogs.iter().map(|k| k.key().to_string()).collect())
}
/// Read catalog info of given name from metasrv.
fn catalog(&self, name: &str) -> Result<Option<CatalogProviderRef>> {
Ok(self.catalogs.load().get(name).cloned())
{
let catalogs = self.catalogs.read();
let catalog = catalogs.get(name);
if let Some(catalog) = catalog {
return Ok(Some(catalog.clone()));
}
}
let catalogs = self.catalogs.write();
let catalog = catalogs.get(name);
if let Some(catalog) = catalog {
return Ok(Some(catalog.clone()));
}
// It's for lack of incremental catalog syncing between datanode and meta. Here we fetch catalog
// from meta on demand. This can be removed when incremental catalog syncing is done in datanode.
let backend = self.backend.clone();
let catalogs_from_meta: HashSet<String> = std::thread::spawn(|| {
common_runtime::block_on_read(async move {
let mut stream = backend.range(CATALOG_KEY_PREFIX.as_bytes());
let mut catalogs = HashSet::new();
while let Some(catalog) = stream.next().await {
if let Ok(catalog) = catalog {
let catalog_key = String::from_utf8_lossy(&catalog.0);
if let Ok(key) = CatalogKey::parse(&catalog_key) {
catalogs.insert(key.catalog_name);
}
}
}
catalogs
})
})
.join()
.unwrap();
catalogs.retain(|catalog_name, _| catalogs_from_meta.get(catalog_name).is_some());
for catalog in catalogs_from_meta {
catalogs
.entry(catalog.clone())
.or_insert(self.new_catalog_provider(&catalog));
}
let catalog = catalogs.get(name);
Ok(catalog.as_deref().cloned())
}
}

View File

@@ -395,6 +395,7 @@ pub struct TableEntryValue {
#[cfg(test)]
mod tests {
use common_recordbatch::RecordBatches;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datatypes::value::Value;
use log_store::NoopLogStore;
use mito::config::EngineConfig;
@@ -405,7 +406,6 @@ mod tests {
use storage::EngineImpl;
use table::metadata::TableType;
use table::metadata::TableType::Base;
use tempdir::TempDir;
use super::*;
@@ -480,7 +480,7 @@ mod tests {
}
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
let dir = TempDir::new("system-table-test").unwrap();
let dir = create_temp_dir("system-table-test");
let store_dir = dir.path().to_string_lossy();
let accessor = object_store::services::Fs::default()
.root(&store_dir)

View File

@@ -9,6 +9,9 @@ default-run = "greptime"
name = "greptime"
path = "src/bin/greptime.rs"
[features]
mem-prof = ["tikv-jemallocator", "tikv-jemalloc-ctl"]
[dependencies]
anymap = "1.0.0-beta.2"
catalog = { path = "../catalog" }
@@ -18,7 +21,6 @@ common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
substrait = { path = "../common/substrait" }
common-telemetry = { path = "../common/telemetry", features = [
"deadlock_detection",
] }
@@ -36,14 +38,17 @@ serde.workspace = true
servers = { path = "../servers" }
session = { path = "../session" }
snafu.workspace = true
substrait = { path = "../common/substrait" }
tikv-jemalloc-ctl = { version = "0.5", optional = true }
tikv-jemallocator = { version = "0.5", optional = true }
tokio.workspace = true
toml = "0.5"
[dev-dependencies]
common-test-util = { path = "../common/test-util" }
rexpect = "0.5"
serde.workspace = true
tempdir = "0.3"
tempfile.workspace = true
[build-dependencies]
build-data = "0.1.3"

View File

@@ -87,6 +87,10 @@ fn print_version() -> &'static str {
)
}
#[cfg(feature = "mem-prof")]
#[global_allocator]
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
#[tokio::main]
async fn main() -> Result<()> {
let cmd = Command::parse();

View File

@@ -153,15 +153,15 @@ mod tests {
use std::io::Write;
use std::time::Duration;
use common_test_util::temp_dir::create_named_temp_file;
use datanode::datanode::{CompactionConfig, ObjectStoreConfig};
use servers::Mode;
use tempfile::NamedTempFile;
use super::*;
#[test]
fn test_read_from_config_file() {
let mut file = NamedTempFile::new().unwrap();
let mut file = create_named_temp_file();
let toml_str = r#"
mode = "distributed"
enable_memory_catalog = false

View File

@@ -23,6 +23,7 @@ use frontend::instance::Instance;
use frontend::mysql::MysqlOptions;
use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
use frontend::prom::PromOptions;
use meta_client::MetaClientOptions;
use servers::auth::UserProviderRef;
use servers::http::HttpOptions;
@@ -67,6 +68,8 @@ pub struct StartCommand {
#[clap(long)]
mysql_addr: Option<String>,
#[clap(long)]
prom_addr: Option<String>,
#[clap(long)]
postgres_addr: Option<String>,
#[clap(long)]
opentsdb_addr: Option<String>,
@@ -141,6 +144,9 @@ impl TryFrom<StartCommand> for FrontendOptions {
..Default::default()
});
}
if let Some(addr) = cmd.prom_addr {
opts.prom_options = Some(PromOptions { addr });
}
if let Some(addr) = cmd.postgres_addr {
opts.postgres_options = Some(PostgresOptions {
addr,
@@ -176,8 +182,8 @@ mod tests {
use std::io::Write;
use std::time::Duration;
use common_test_util::temp_dir::create_named_temp_file;
use servers::auth::{Identity, Password, UserProviderRef};
use tempfile::NamedTempFile;
use super::*;
@@ -186,6 +192,7 @@ mod tests {
let command = StartCommand {
http_addr: Some("127.0.0.1:1234".to_string()),
grpc_addr: None,
prom_addr: Some("127.0.0.1:4444".to_string()),
mysql_addr: Some("127.0.0.1:5678".to_string()),
postgres_addr: Some("127.0.0.1:5432".to_string()),
opentsdb_addr: Some("127.0.0.1:4321".to_string()),
@@ -209,6 +216,7 @@ mod tests {
opts.opentsdb_options.as_ref().unwrap().addr,
"127.0.0.1:4321"
);
assert_eq!(opts.prom_options.as_ref().unwrap().addr, "127.0.0.1:4444");
let default_opts = FrontendOptions::default();
assert_eq!(
@@ -233,7 +241,7 @@ mod tests {
#[test]
fn test_read_from_config_file() {
let mut file = NamedTempFile::new().unwrap();
let mut file = create_named_temp_file();
let toml_str = r#"
mode = "distributed"
@@ -247,6 +255,7 @@ mod tests {
http_addr: None,
grpc_addr: None,
mysql_addr: None,
prom_addr: None,
postgres_addr: None,
opentsdb_addr: None,
influxdb_enable: None,
@@ -276,6 +285,7 @@ mod tests {
http_addr: None,
grpc_addr: None,
mysql_addr: None,
prom_addr: None,
postgres_addr: None,
opentsdb_addr: None,
influxdb_enable: None,

View File

@@ -115,8 +115,8 @@ impl TryFrom<StartCommand> for MetaSrvOptions {
mod tests {
use std::io::Write;
use common_test_util::temp_dir::create_named_temp_file;
use meta_srv::selector::SelectorType;
use tempfile::NamedTempFile;
use super::*;
@@ -139,7 +139,7 @@ mod tests {
#[test]
fn test_read_from_config_file() {
let mut file = NamedTempFile::new().unwrap();
let mut file = create_named_temp_file();
let toml_str = r#"
bind_addr = "127.0.0.1:3002"
server_addr = "127.0.0.1:3002"

View File

@@ -28,8 +28,8 @@ use frontend::instance::Instance as FeInstance;
use frontend::mysql::MysqlOptions;
use frontend::opentsdb::OpentsdbOptions;
use frontend::postgres::PostgresOptions;
use frontend::prom::PromOptions;
use frontend::prometheus::PrometheusOptions;
use frontend::promql::PromqlOptions;
use serde::{Deserialize, Serialize};
use servers::http::HttpOptions;
use servers::tls::{TlsMode, TlsOption};
@@ -77,7 +77,7 @@ pub struct StandaloneOptions {
pub opentsdb_options: Option<OpentsdbOptions>,
pub influxdb_options: Option<InfluxdbOptions>,
pub prometheus_options: Option<PrometheusOptions>,
pub promql_options: Option<PromqlOptions>,
pub prom_options: Option<PromOptions>,
pub wal: WalConfig,
pub storage: ObjectStoreConfig,
pub compaction: CompactionConfig,
@@ -96,7 +96,7 @@ impl Default for StandaloneOptions {
opentsdb_options: Some(OpentsdbOptions::default()),
influxdb_options: Some(InfluxdbOptions::default()),
prometheus_options: Some(PrometheusOptions::default()),
promql_options: Some(PromqlOptions::default()),
prom_options: Some(PromOptions::default()),
wal: WalConfig::default(),
storage: ObjectStoreConfig::default(),
compaction: CompactionConfig::default(),
@@ -116,7 +116,7 @@ impl StandaloneOptions {
opentsdb_options: self.opentsdb_options,
influxdb_options: self.influxdb_options,
prometheus_options: self.prometheus_options,
promql_options: self.promql_options,
prom_options: self.prom_options,
meta_client_options: None,
}
}
@@ -142,6 +142,8 @@ struct StartCommand {
#[clap(long)]
mysql_addr: Option<String>,
#[clap(long)]
prom_addr: Option<String>,
#[clap(long)]
postgres_addr: Option<String>,
#[clap(long)]
opentsdb_addr: Option<String>,
@@ -254,6 +256,11 @@ impl TryFrom<StartCommand> for FrontendOptions {
..Default::default()
})
}
if let Some(addr) = cmd.prom_addr {
opts.prom_options = Some(PromOptions { addr })
}
if let Some(addr) = cmd.postgres_addr {
opts.postgres_options = Some(PostgresOptions {
addr,
@@ -302,6 +309,7 @@ mod tests {
http_addr: None,
rpc_addr: None,
mysql_addr: None,
prom_addr: None,
postgres_addr: None,
opentsdb_addr: None,
config_file: Some(format!(
@@ -347,6 +355,7 @@ mod tests {
let command = StartCommand {
http_addr: None,
rpc_addr: None,
prom_addr: None,
mysql_addr: None,
postgres_addr: None,
opentsdb_addr: None,

View File

@@ -29,9 +29,9 @@ mod tests {
use std::fs::File;
use std::io::Write;
use common_test_util::temp_dir::create_temp_dir;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use tempdir::TempDir;
use super::*;
use crate::error::Result;
@@ -62,7 +62,7 @@ mod tests {
host: "greptime.test".to_string(),
};
let dir = TempDir::new("test_from_file").unwrap();
let dir = create_temp_dir("test_from_file");
let test_file = format!("{}/test.toml", dir.path().to_str().unwrap());
let s = toml::to_string(&config).unwrap();

View File

@@ -18,8 +18,8 @@ mod tests {
use std::process::{Command, Stdio};
use std::time::Duration;
use common_test_util::temp_dir::create_temp_dir;
use rexpect::session::PtyReplSession;
use tempdir::TempDir;
struct Repl {
repl: PtyReplSession,
@@ -48,8 +48,8 @@ mod tests {
#[test]
fn test_repl() {
let data_dir = TempDir::new_in("/tmp", "data").unwrap();
let wal_dir = TempDir::new_in("/tmp", "wal").unwrap();
let data_dir = create_temp_dir("data");
let wal_dir = create_temp_dir("wal");
let mut bin_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
bin_path.push("../../target/debug");

View File

@@ -17,5 +17,4 @@ snafu = { version = "0.7", features = ["backtraces"] }
[dev-dependencies]
chrono.workspace = true
tempdir = "0.3"
tokio.workspace = true

View File

@@ -10,6 +10,7 @@ proc-macro = true
[dependencies]
quote = "1.0"
syn = "1.0"
proc-macro2 = "1.0"
[dev-dependencies]
arc-swap = "1.0"

View File

@@ -12,8 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod range_fn;
use proc_macro::TokenStream;
use quote::{quote, quote_spanned};
use range_fn::process_range_fn;
use syn::parse::Parser;
use syn::spanned::Spanned;
use syn::{parse_macro_input, DeriveInput, ItemStruct};
@@ -83,3 +86,31 @@ pub fn as_aggr_func_creator(_args: TokenStream, input: TokenStream) -> TokenStre
}
.into()
}
/// Attribute macro to convert an arithimetic function to a range function. The annotated function
/// should accept servaral arrays as input and return a single value as output. This procedure
/// macro can works on any number of input parameters. Return type can be either primitive type
/// or wrapped in `Option`.
///
/// # Example
/// Take `count_over_time()` in PromQL as an example:
/// ```rust, ignore
/// /// The count of all values in the specified interval.
/// #[range_fn(
/// name = "CountOverTime",
/// ret = "Float64Array",
/// display_name = "prom_count_over_time"
/// )]
/// pub fn count_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> f64 {
/// values.len() as f64
/// }
/// ```
///
/// # Arguments
/// - `name`: The name of the generated [ScalarUDF] struct.
/// - `ret`: The return type of the generated UDF function.
/// - `display_name`: The display name of the generated UDF function.
#[proc_macro_attribute]
pub fn range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
process_range_fn(args, input)
}

View File

@@ -0,0 +1,230 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::quote;
use syn::punctuated::Punctuated;
use syn::spanned::Spanned;
use syn::token::Comma;
use syn::{
parse_macro_input, Attribute, AttributeArgs, FnArg, Ident, ItemFn, Meta, MetaNameValue,
NestedMeta, Signature, Type, TypeReference, Visibility,
};
/// Internal util macro to early return on error.
macro_rules! ok {
($item:expr) => {
match $item {
Ok(item) => item,
Err(e) => return e.into_compile_error().into(),
}
};
}
pub(crate) fn process_range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
// extract arg map
let arg_pairs = parse_macro_input!(args as AttributeArgs);
let arg_span = arg_pairs[0].span();
let arg_map = ok!(extract_arg_map(arg_pairs));
// decompose the fn block
let compute_fn = parse_macro_input!(input as ItemFn);
let ItemFn {
attrs,
vis,
sig,
block,
} = compute_fn;
// extract fn arg list
let Signature {
inputs,
ident: fn_name,
..
} = &sig;
let arg_types = ok!(extract_input_types(inputs));
// build the struct and its impl block
let struct_code = build_struct(
attrs,
vis,
ok!(get_ident(&arg_map, "name", arg_span)),
ok!(get_ident(&arg_map, "display_name", arg_span)),
);
let calc_fn_code = build_calc_fn(
ok!(get_ident(&arg_map, "name", arg_span)),
arg_types,
fn_name.clone(),
ok!(get_ident(&arg_map, "ret", arg_span)),
);
// preserve this fn, but remove its `pub` modifier
let input_fn_code: TokenStream = quote! {
#sig { #block }
}
.into();
let mut result = TokenStream::new();
result.extend(struct_code);
result.extend(calc_fn_code);
result.extend(input_fn_code);
result
}
/// Extract a String <-> Ident map from the attribute args.
fn extract_arg_map(args: Vec<NestedMeta>) -> Result<HashMap<String, Ident>, syn::Error> {
args.into_iter()
.map(|meta| {
if let NestedMeta::Meta(Meta::NameValue(MetaNameValue { path, lit, .. })) = meta {
let name = path.get_ident().unwrap().to_string();
let ident = match lit {
syn::Lit::Str(lit_str) => lit_str.parse::<Ident>(),
_ => Err(syn::Error::new(
lit.span(),
"Unexpected attribute format. Expected `name = \"value\"`",
)),
}?;
Ok((name, ident))
} else {
Err(syn::Error::new(
meta.span(),
"Unexpected attribute format. Expected `name = \"value\"`",
))
}
})
.collect::<Result<HashMap<String, Ident>, syn::Error>>()
}
/// Helper function to get an Ident from the previous arg map.
fn get_ident(map: &HashMap<String, Ident>, key: &str, span: Span) -> Result<Ident, syn::Error> {
map.get(key)
.cloned()
.ok_or_else(|| syn::Error::new(span, format!("Expect attribute {key} but not found")))
}
/// Extract the argument list from the annotated function.
fn extract_input_types(inputs: &Punctuated<FnArg, Comma>) -> Result<Vec<Type>, syn::Error> {
inputs
.iter()
.map(|arg| match arg {
FnArg::Receiver(receiver) => Err(syn::Error::new(receiver.span(), "expected bool")),
FnArg::Typed(pat_type) => Ok(*pat_type.ty.clone()),
})
.collect()
}
fn build_struct(
attrs: Vec<Attribute>,
vis: Visibility,
name: Ident,
display_name_ident: Ident,
) -> TokenStream {
let display_name = display_name_ident.to_string();
quote! {
#(#attrs)*
#[derive(Debug)]
#vis struct #name {}
impl #name {
pub const fn name() -> &'static str {
#display_name
}
pub fn scalar_udf() -> ScalarUDF {
ScalarUDF {
name: Self::name().to_string(),
signature: Signature::new(
TypeSignature::Exact(Self::input_type()),
Volatility::Immutable,
),
return_type: Arc::new(|_| Ok(Arc::new(Self::return_type()))),
fun: Arc::new(Self::calc),
}
}
// TODO(ruihang): this should be parameterized
// time index column and value column
fn input_type() -> Vec<DataType> {
vec![
RangeArray::convert_data_type(DataType::Timestamp(TimeUnit::Millisecond, None)),
RangeArray::convert_data_type(DataType::Float64),
]
}
// TODO(ruihang): this should be parameterized
fn return_type() -> DataType {
DataType::Float64
}
}
}
.into()
}
fn build_calc_fn(
name: Ident,
param_types: Vec<Type>,
fn_name: Ident,
ret_type: Ident,
) -> TokenStream {
let param_names = param_types
.iter()
.enumerate()
.map(|(i, ty)| Ident::new(&format!("param_{}", i), ty.span()))
.collect::<Vec<_>>();
let unref_param_types = param_types
.iter()
.map(|ty| {
if let Type::Reference(TypeReference { elem, .. }) = ty {
elem.as_ref().clone()
} else {
ty.clone()
}
})
.collect::<Vec<_>>();
let num_params = param_types.len();
let param_numbers = (0..num_params).collect::<Vec<_>>();
let range_array_names = param_names
.iter()
.map(|name| Ident::new(&format!("{}_range_array", name), name.span()))
.collect::<Vec<_>>();
let first_range_array_name = range_array_names.first().unwrap().clone();
quote! {
impl #name {
fn calc(input: &[ColumnarValue]) -> Result<ColumnarValue, DataFusionError> {
assert_eq!(input.len(), #num_params);
#( let #range_array_names = RangeArray::try_new(extract_array(&input[#param_numbers])?.data().clone().into())?; )*
// TODO(ruihang): add ensure!()
let mut result_array = Vec::new();
for index in 0..#first_range_array_name.len(){
#( let #param_names = #range_array_names.get(index).unwrap().as_any().downcast_ref::<#unref_param_types>().unwrap().clone(); )*
// TODO(ruihang): add ensure!() to check length
let result = #fn_name(#( &#param_names, )*);
result_array.push(result);
}
let result = ColumnarValue::Array(Arc::new(#ret_type::from_iter(result_array)));
Ok(result)
}
}
}
.into()
}

View File

@@ -14,9 +14,9 @@
use std::sync::Arc;
use common_query::error::{ExecuteFunctionSnafu, FromScalarValueSnafu};
use common_query::error::FromScalarValueSnafu;
use common_query::prelude::{
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf, ScalarValue,
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf,
};
use datatypes::error::Error as DataTypeError;
use datatypes::prelude::*;
@@ -54,16 +54,8 @@ pub fn create_udf(func: FunctionRef) -> ScalarUdf {
.collect();
let result = func_cloned.eval(func_ctx, &args.context(FromScalarValueSnafu)?);
let udf = if len.is_some() {
result.map(ColumnarValue::Vector)?
} else {
ScalarValue::try_from_array(&result?.to_arrow_array(), 0)
.map(ColumnarValue::Scalar)
.context(ExecuteFunctionSnafu)?
};
Ok(udf)
let udf_result = result.map(ColumnarValue::Vector)?;
Ok(udf_result)
});
ScalarUdf::new(func.name(), &func.signature(), &return_type, &fun)

View File

@@ -0,0 +1,20 @@
[package]
name = "common-mem-prof"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
common-error = { path = "../error" }
snafu.workspace = true
tempfile = "3.4"
tikv-jemalloc-ctl = { version = "0.5", features = ["use_std"] }
tikv-jemallocator = "0.5"
tokio.workspace = true
[dependencies.tikv-jemalloc-sys]
version = "0.5"
features = ["stats", "profiling", "unprefixed_malloc_on_supported_platforms"]
[profile.release]
debug = true

View File

@@ -0,0 +1,50 @@
# Profile memory usage of GreptimeDB
This crate provides an easy approach to dump memory profiling info.
## Prerequisites
### jemalloc
```bash
# for macOS
brew install jemalloc
# for Ubuntu
sudo apt install libjemalloc-dev
```
### [flamegraph](https://github.com/brendangregg/FlameGraph)
```bash
curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl
```
### Build GreptimeDB with `mem-prof` feature.
```bash
cargo build --features=mem-prof
```
## Profiling
Start GreptimeDB instance with environment variables:
```bash
MALLOC_CONF=prof:true,lg_prof_interval:28 ./target/debug/greptime standalone start
```
Dump memory profiling data through HTTP API:
```bash
curl localhost:4000/v1/prof/mem > greptime.hprof
```
You can periodically dump profiling data and compare them to find the delta memory usage.
## Analyze profiling data with flamegraph
To create flamegraph according to dumped profiling data:
```bash
jeprof --svg <path_to_greptimedb_binary> --base=<baseline_prof> <profile_data> > output.svg
```

View File

@@ -0,0 +1,66 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::path::PathBuf;
use common_error::prelude::{ErrorExt, StatusCode};
use snafu::{Backtrace, Snafu};
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))]
pub enum Error {
#[snafu(display("Failed to read OPT_PROF"))]
ReadOptProf { source: tikv_jemalloc_ctl::Error },
#[snafu(display("Memory profiling is not enabled"))]
ProfilingNotEnabled,
#[snafu(display("Failed to build temp file from given path: {:?}", path))]
BuildTempPath { path: PathBuf, backtrace: Backtrace },
#[snafu(display("Failed to open temp file: {}", path))]
OpenTempFile {
path: String,
source: std::io::Error,
},
#[snafu(display("Failed to dump profiling data to temp file: {:?}", path))]
DumpProfileData {
path: PathBuf,
source: tikv_jemalloc_ctl::Error,
},
}
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::ReadOptProf { .. } => StatusCode::Internal,
Error::ProfilingNotEnabled => StatusCode::InvalidArguments,
Error::BuildTempPath { .. } => StatusCode::Internal,
Error::OpenTempFile { .. } => StatusCode::StorageUnavailable,
Error::DumpProfileData { .. } => StatusCode::StorageUnavailable,
}
}
fn backtrace_opt(&self) -> Option<&Backtrace> {
snafu::ErrorCompat::backtrace(self)
}
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@@ -0,0 +1,74 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod error;
use std::ffi::{c_char, CString};
use std::path::PathBuf;
use snafu::{ensure, ResultExt};
use tokio::io::AsyncReadExt;
use crate::error::{
BuildTempPathSnafu, DumpProfileDataSnafu, OpenTempFileSnafu, ProfilingNotEnabledSnafu,
ReadOptProfSnafu,
};
const PROF_DUMP: &[u8] = b"prof.dump\0";
const OPT_PROF: &[u8] = b"opt.prof\0";
pub async fn dump_profile() -> error::Result<Vec<u8>> {
ensure!(is_prof_enabled()?, ProfilingNotEnabledSnafu);
let tmp_path = tempfile::tempdir().map_err(|_| {
BuildTempPathSnafu {
path: std::env::temp_dir(),
}
.build()
})?;
let mut path_buf = PathBuf::from(tmp_path.path());
path_buf.push("greptimedb.hprof");
let path = path_buf
.to_str()
.ok_or_else(|| BuildTempPathSnafu { path: &path_buf }.build())?
.to_string();
let mut bytes = CString::new(path.as_str())
.map_err(|_| BuildTempPathSnafu { path: &path_buf }.build())?
.into_bytes_with_nul();
{
// #safety: we always expect a valid temp file path to write profiling data to.
let ptr = bytes.as_mut_ptr() as *mut c_char;
unsafe {
tikv_jemalloc_ctl::raw::write(PROF_DUMP, ptr)
.context(DumpProfileDataSnafu { path: path_buf })?
}
}
let mut f = tokio::fs::File::open(path.as_str())
.await
.context(OpenTempFileSnafu { path: &path })?;
let mut buf = vec![];
f.read_to_end(&mut buf)
.await
.context(OpenTempFileSnafu { path })?;
Ok(buf)
}
fn is_prof_enabled() -> error::Result<bool> {
// safety: OPT_PROF variable, if present, is always a boolean value.
Ok(unsafe { tikv_jemalloc_ctl::raw::read::<bool>(OPT_PROF).context(ReadOptProfSnafu)? })
}

View File

@@ -19,5 +19,5 @@ tokio.workspace = true
uuid.workspace = true
[dev-dependencies]
common-test-util = { path = "../test-util" }
futures-util.workspace = true
tempdir = "0.3"

View File

@@ -409,9 +409,9 @@ impl ProcedureManager for LocalManager {
/// Create a new [ProcedureMeta] for test purpose.
#[cfg(test)]
mod test_util {
use common_test_util::temp_dir::TempDir;
use object_store::services::Fs as Builder;
use object_store::ObjectStoreBuilder;
use tempdir::TempDir;
use super::*;
@@ -430,7 +430,7 @@ mod test_util {
mod tests {
use common_error::mock::MockError;
use common_error::prelude::StatusCode;
use tempdir::TempDir;
use common_test_util::temp_dir::create_temp_dir;
use super::*;
use crate::error::Error;
@@ -540,7 +540,7 @@ mod tests {
#[test]
fn test_register_loader() {
let dir = TempDir::new("register").unwrap();
let dir = create_temp_dir("register");
let config = ManagerConfig {
object_store: test_util::new_object_store(&dir),
};
@@ -558,7 +558,7 @@ mod tests {
#[tokio::test]
async fn test_recover() {
let dir = TempDir::new("recover").unwrap();
let dir = create_temp_dir("recover");
let object_store = test_util::new_object_store(&dir);
let config = ManagerConfig {
object_store: object_store.clone(),
@@ -603,7 +603,7 @@ mod tests {
#[tokio::test]
async fn test_submit_procedure() {
let dir = TempDir::new("submit").unwrap();
let dir = create_temp_dir("submit");
let config = ManagerConfig {
object_store: test_util::new_object_store(&dir),
};
@@ -649,7 +649,7 @@ mod tests {
#[tokio::test]
async fn test_state_changed_on_err() {
let dir = TempDir::new("on_err").unwrap();
let dir = create_temp_dir("on_err");
let config = ManagerConfig {
object_store: test_util::new_object_store(&dir),
};

View File

@@ -395,10 +395,10 @@ mod tests {
use common_error::ext::PlainError;
use common_error::mock::MockError;
use common_error::prelude::StatusCode;
use common_test_util::temp_dir::create_temp_dir;
use futures_util::future::BoxFuture;
use futures_util::{FutureExt, TryStreamExt};
use object_store::ObjectStore;
use tempdir::TempDir;
use super::*;
use crate::local::test_util;
@@ -511,7 +511,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("normal").unwrap();
let dir = create_temp_dir("normal");
let meta = normal.new_meta(ROOT_ID);
let ctx = context_without_provider(meta.id);
let object_store = test_util::new_object_store(&dir);
@@ -559,7 +559,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("suspend").unwrap();
let dir = create_temp_dir("suspend");
let meta = suspend.new_meta(ROOT_ID);
let ctx = context_without_provider(meta.id);
let object_store = test_util::new_object_store(&dir);
@@ -658,7 +658,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("parent").unwrap();
let dir = create_temp_dir("parent");
let meta = parent.new_meta(ROOT_ID);
let procedure_id = meta.id;
@@ -700,7 +700,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("fail").unwrap();
let dir = create_temp_dir("fail");
let meta = fail.new_meta(ROOT_ID);
let ctx = context_without_provider(meta.id);
let object_store = test_util::new_object_store(&dir);
@@ -735,7 +735,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("retry_later").unwrap();
let dir = create_temp_dir("retry_later");
let meta = retry_later.new_meta(ROOT_ID);
let ctx = context_without_provider(meta.id);
let object_store = test_util::new_object_store(&dir);
@@ -806,7 +806,7 @@ mod tests {
exec_fn,
};
let dir = TempDir::new("child_err").unwrap();
let dir = create_temp_dir("child_err");
let meta = parent.new_meta(ROOT_ID);
let object_store = test_util::new_object_store(&dir);

View File

@@ -246,9 +246,9 @@ impl ParsedKey {
#[cfg(test)]
mod tests {
use async_trait::async_trait;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use object_store::services::Fs as Builder;
use object_store::ObjectStoreBuilder;
use tempdir::TempDir;
use super::*;
use crate::{Context, LockKey, Procedure, Status};
@@ -373,7 +373,7 @@ mod tests {
#[tokio::test]
async fn test_store_procedure() {
let dir = TempDir::new("store_procedure").unwrap();
let dir = create_temp_dir("store_procedure");
let store = procedure_store_for_test(&dir);
let procedure_id = ProcedureId::random();
@@ -398,7 +398,7 @@ mod tests {
#[tokio::test]
async fn test_commit_procedure() {
let dir = TempDir::new("commit_procedure").unwrap();
let dir = create_temp_dir("commit_procedure");
let store = procedure_store_for_test(&dir);
let procedure_id = ProcedureId::random();
@@ -416,7 +416,7 @@ mod tests {
#[tokio::test]
async fn test_rollback_procedure() {
let dir = TempDir::new("rollback_procedure").unwrap();
let dir = create_temp_dir("rollback_procedure");
let store = procedure_store_for_test(&dir);
let procedure_id = ProcedureId::random();
@@ -434,7 +434,7 @@ mod tests {
#[tokio::test]
async fn test_load_messages() {
let dir = TempDir::new("load_messages").unwrap();
let dir = create_temp_dir("load_messages");
let store = procedure_store_for_test(&dir);
// store 3 steps

View File

@@ -115,15 +115,15 @@ impl StateStore for ObjectStateStore {
#[cfg(test)]
mod tests {
use common_test_util::temp_dir::create_temp_dir;
use object_store::services::Fs as Builder;
use object_store::ObjectStoreBuilder;
use tempdir::TempDir;
use super::*;
#[tokio::test]
async fn test_object_state_store() {
let dir = TempDir::new("state_store").unwrap();
let dir = create_temp_dir("state_store");
let store_dir = dir.path().to_str().unwrap();
let accessor = Builder::default().root(store_dir).build().unwrap();
let object_store = ObjectStore::new(accessor).finish();

View File

@@ -0,0 +1,8 @@
[package]
name = "common-test-util"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
tempfile.workspace = true

View File

@@ -0,0 +1,15 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod temp_dir;

View File

@@ -0,0 +1,23 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub use tempfile::{NamedTempFile, TempDir};
pub fn create_temp_dir(prefix: &str) -> TempDir {
tempfile::Builder::new().prefix(prefix).tempdir().unwrap()
}
pub fn create_named_temp_file() -> NamedTempFile {
NamedTempFile::new().unwrap()
}

View File

@@ -9,6 +9,7 @@ default = ["python"]
python = ["dep:script"]
[dependencies]
async-compat = "0.2"
async-stream.workspace = true
async-trait.workspace = true
api = { path = "../api" }
@@ -32,6 +33,7 @@ datafusion-common.workspace = true
datafusion-expr.workspace = true
datatypes = { path = "../datatypes" }
futures = "0.3"
futures-util.workspace = true
hyper = { version = "0.14", features = ["full"] }
humantime-serde = "1.1"
log-store = { path = "../log-store" }
@@ -43,6 +45,7 @@ object-store = { path = "../object-store" }
pin-project = "1.0"
prost.workspace = true
query = { path = "../query" }
regex = "1.6"
script = { path = "../script", features = ["python"], optional = true }
serde = "1.0"
serde_json = "1.0"
@@ -60,11 +63,12 @@ tokio-stream = { version = "0.1", features = ["net"] }
tonic.workspace = true
tower = { version = "0.4", features = ["full"] }
tower-http = { version = "0.3", features = ["full"] }
url = "2.3.1"
[dev-dependencies]
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
client = { path = "../client" }
common-test-util = { path = "../common/test-util" }
common-query = { path = "../common/query" }
datafusion-common.workspace = true
tempdir = "0.3"
toml = "0.5"

View File

@@ -21,6 +21,7 @@ use datafusion::parquet;
use datatypes::prelude::ConcreteDataType;
use storage::error::Error as StorageError;
use table::error::Error as TableError;
use url::ParseError;
use crate::datanode::ObjectStoreConfig;
@@ -205,6 +206,30 @@ pub enum Error {
#[snafu(display("Invalid SQL, error: {}", msg))]
InvalidSql { msg: String },
#[snafu(display("Invalid url: {}, error :{}", url, source))]
InvalidUrl { url: String, source: ParseError },
#[snafu(display("Invalid filepath: {}", path))]
InvalidPath { path: String },
#[snafu(display("Invalid connection: {}", msg))]
InvalidConnection { msg: String },
#[snafu(display("Unsupported backend protocol: {}", protocol))]
UnsupportedBackendProtocol { protocol: String },
#[snafu(display("Failed to regex, source: {}", source))]
BuildRegex {
backtrace: Backtrace,
source: regex::Error,
},
#[snafu(display("Failed to parse the data, source: {}", source))]
ParseDataTypes {
#[snafu(backtrace)]
source: common_recordbatch::error::Error,
},
#[snafu(display("Not support SQL, error: {}", msg))]
NotSupportSql { msg: String },
@@ -377,6 +402,22 @@ pub enum Error {
source: common_query::error::Error,
},
#[snafu(display(
"File Schema mismatch, expected table schema: {} but found :{}",
table_schema,
file_schema
))]
InvalidSchema {
table_schema: String,
file_schema: String,
},
#[snafu(display("Failed to read parquet file, source: {}", source))]
ReadParquet {
source: parquet::errors::ParquetError,
backtrace: Backtrace,
},
#[snafu(display("Failed to write parquet file, source: {}", source))]
WriteParquet {
source: parquet::errors::ParquetError,
@@ -389,6 +430,19 @@ pub enum Error {
backtrace: Backtrace,
},
#[snafu(display("Failed to build parquet record batch stream, source: {}", source))]
BuildParquetRecordBatchStream {
backtrace: Backtrace,
source: parquet::errors::ParquetError,
},
#[snafu(display("Failed to read object in path: {}, source: {}", path, source))]
ReadObject {
path: String,
backtrace: Backtrace,
source: object_store::Error,
},
#[snafu(display("Failed to write object into path: {}, source: {}", path, source))]
WriteObject {
path: String,
@@ -396,6 +450,13 @@ pub enum Error {
source: object_store::Error,
},
#[snafu(display("Failed to lists object in path: {}, source: {}", path, source))]
ListObjects {
path: String,
backtrace: Backtrace,
source: object_store::Error,
},
#[snafu(display("Unrecognized table option: {}", source))]
UnrecognizedTableOption {
#[snafu(backtrace)]
@@ -456,6 +517,11 @@ impl ErrorExt for Error {
ColumnValuesNumberMismatch { .. }
| ColumnTypeMismatch { .. }
| InvalidSql { .. }
| InvalidUrl { .. }
| InvalidPath { .. }
| InvalidConnection { .. }
| UnsupportedBackendProtocol { .. }
| BuildRegex { .. }
| NotSupportSql { .. }
| KeyColumnNotFound { .. }
| IllegalPrimaryKeysDef { .. }
@@ -481,13 +547,19 @@ impl ErrorExt for Error {
| RenameTable { .. }
| Catalog { .. }
| MissingRequiredField { .. }
| BuildParquetRecordBatchStream { .. }
| InvalidSchema { .. }
| ParseDataTypes { .. }
| IncorrectInternalState { .. } => StatusCode::Internal,
BuildBackend { .. }
| InitBackend { .. }
| ReadParquet { .. }
| WriteParquet { .. }
| PollStream { .. }
| WriteObject { .. } => StatusCode::StorageUnavailable,
| ReadObject { .. }
| WriteObject { .. }
| ListObjects { .. } => StatusCode::StorageUnavailable,
OpenLogStore { source } => source.status_code(),
StartScriptManager { source } => source.status_code(),
OpenStorageEngine { source } => source.status_code(),

View File

@@ -98,6 +98,7 @@ impl Instance {
DdlExpr::Alter(expr) => self.handle_alter(expr).await,
DdlExpr::CreateDatabase(expr) => self.handle_create_database(expr, query_ctx).await,
DdlExpr::DropTable(expr) => self.handle_drop_table(expr).await,
DdlExpr::FlushTable(_) => todo!(),
}
}
}

View File

@@ -24,7 +24,7 @@ use datatypes::schema::Schema;
use futures::StreamExt;
use query::parser::{PromQuery, QueryLanguageParser, QueryStatement};
use servers::error as server_error;
use servers::promql::PromqlHandler;
use servers::prom::PromHandler;
use servers::query_handler::sql::SqlQueryHandler;
use session::context::{QueryContext, QueryContextRef};
use snafu::prelude::*;
@@ -33,7 +33,9 @@ use sql::statements::copy::CopyTable;
use sql::statements::statement::Statement;
use sql::statements::tql::Tql;
use table::engine::TableReference;
use table::requests::{CopyTableRequest, CreateDatabaseRequest, DropTableRequest};
use table::requests::{
CopyTableFromRequest, CopyTableRequest, CreateDatabaseRequest, DropTableRequest,
};
use crate::error::{self, BumpTableIdSnafu, ExecuteSqlSnafu, Result, TableIdProviderNotFoundSnafu};
use crate::instance::Instance;
@@ -202,7 +204,21 @@ impl Instance {
.execute(SqlRequest::CopyTable(req), query_ctx)
.await
}
CopyTable::From(_) => todo!(),
CopyTable::From(copy_table) => {
let (catalog_name, schema_name, table_name) =
table_idents_to_full_name(&copy_table.table_name, query_ctx.clone())?;
let req = CopyTableFromRequest {
catalog_name,
schema_name,
table_name,
connection: copy_table.connection,
pattern: copy_table.pattern,
from: copy_table.from,
};
self.sql_handler
.execute(SqlRequest::CopyTableFrom(req), query_ctx)
.await
}
},
QueryStatement::Sql(Statement::Tql(tql)) => self.execute_tql(tql, query_ctx).await,
}
@@ -366,7 +382,7 @@ impl SqlQueryHandler for Instance {
}
#[async_trait]
impl PromqlHandler for Instance {
impl PromHandler for Instance {
async fn do_query(&self, query: &PromQuery) -> server_error::Result<Output> {
let _timer = timer!(metric::METRIC_HANDLE_PROMQL_ELAPSED);

View File

@@ -33,6 +33,7 @@ use crate::instance::sql::table_idents_to_full_name;
mod alter;
mod copy_table;
mod copy_table_from;
mod create;
mod delete;
mod drop_table;
@@ -51,6 +52,7 @@ pub enum SqlRequest {
Explain(Box<Explain>),
Delete(Delete),
CopyTable(CopyTableRequest),
CopyTableFrom(CopyTableFromRequest),
}
// Handler to execute SQL except query
@@ -92,6 +94,7 @@ impl SqlHandler {
SqlRequest::DropTable(req) => self.drop_table(req).await,
SqlRequest::Delete(req) => self.delete(query_ctx.clone(), req).await,
SqlRequest::CopyTable(req) => self.copy_table(req).await,
SqlRequest::CopyTableFrom(req) => self.copy_table_from(req).await,
SqlRequest::ShowDatabases(req) => {
show_databases(req, self.catalog_manager.clone()).context(ExecuteSqlSnafu)
}
@@ -147,6 +150,7 @@ mod tests {
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
use common_test_util::temp_dir::create_temp_dir;
use common_time::timestamp::Timestamp;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
@@ -167,7 +171,6 @@ mod tests {
use table::error::Result as TableResult;
use table::metadata::TableInfoRef;
use table::Table;
use tempdir::TempDir;
use super::*;
use crate::error::Error;
@@ -218,7 +221,7 @@ mod tests {
#[tokio::test]
async fn test_statement_to_request() {
let dir = TempDir::new("setup_test_engine_and_table").unwrap();
let dir = create_temp_dir("setup_test_engine_and_table");
let store_dir = dir.path().to_string_lossy();
let accessor = Builder::default().root(&store_dir).build().unwrap();
let object_store = ObjectStore::new(accessor).finish();

View File

@@ -0,0 +1,445 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use async_compat::CompatExt;
use common_query::Output;
use common_recordbatch::error::DataTypesSnafu;
use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder;
use datatypes::arrow::record_batch::RecordBatch;
use datatypes::vectors::{Helper, VectorRef};
use futures::future;
use futures_util::TryStreamExt;
use object_store::services::{Fs, S3};
use object_store::{Object, ObjectStore, ObjectStoreBuilder};
use regex::Regex;
use snafu::{ensure, ResultExt};
use table::engine::TableReference;
use table::requests::{CopyTableFromRequest, InsertRequest};
use tokio::io::BufReader;
use url::{ParseError, Url};
use crate::error::{self, Result};
use crate::sql::SqlHandler;
const S3_SCHEMA: &str = "S3";
const ENDPOINT_URL: &str = "ENDPOINT_URL";
const ACCESS_KEY_ID: &str = "ACCESS_KEY_ID";
const SECRET_ACCESS_KEY: &str = "SECRET_ACCESS_KEY";
const SESSION_TOKEN: &str = "SESSION_TOKEN";
const REGION: &str = "REGION";
const ENABLE_VIRTUAL_HOST_STYLE: &str = "ENABLE_VIRTUAL_HOST_STYLE";
impl SqlHandler {
pub(crate) async fn copy_table_from(&self, req: CopyTableFromRequest) -> Result<Output> {
let table_ref = TableReference {
catalog: &req.catalog_name,
schema: &req.schema_name,
table: &req.table_name,
};
let table = self.get_table(&table_ref)?;
let datasource = DataSource::new(&req.from, req.pattern, req.connection)?;
let objects = datasource.list().await?;
let mut buf: Vec<RecordBatch> = Vec::new();
for obj in objects.iter() {
let reader = obj.reader().await.context(error::ReadObjectSnafu {
path: &obj.path().to_string(),
})?;
let buf_reader = BufReader::new(reader.compat());
let builder = ParquetRecordBatchStreamBuilder::new(buf_reader)
.await
.context(error::ReadParquetSnafu)?;
ensure!(
builder.schema() == table.schema().arrow_schema(),
error::InvalidSchemaSnafu {
table_schema: table.schema().arrow_schema().to_string(),
file_schema: (*(builder.schema())).to_string()
}
);
let stream = builder
.build()
.context(error::BuildParquetRecordBatchStreamSnafu)?;
let chunk = stream
.try_collect::<Vec<_>>()
.await
.context(error::ReadParquetSnafu)?;
buf.extend(chunk.into_iter());
}
let fields = table
.schema()
.arrow_schema()
.fields()
.iter()
.map(|f| f.name().to_string())
.collect::<Vec<_>>();
// Vec<Columns>
let column_chunks = buf
.into_iter()
.map(|c| Helper::try_into_vectors(c.columns()).context(DataTypesSnafu))
.collect::<Vec<_>>();
let mut futs = Vec::with_capacity(column_chunks.len());
for column_chunk in column_chunks.into_iter() {
let column_chunk = column_chunk.context(error::ParseDataTypesSnafu)?;
let columns_values = fields
.iter()
.cloned()
.zip(column_chunk.into_iter())
.collect::<HashMap<String, VectorRef>>();
futs.push(table.insert(InsertRequest {
catalog_name: req.catalog_name.to_string(),
schema_name: req.schema_name.to_string(),
table_name: req.table_name.to_string(),
columns_values,
//TODO: support multi-regions
region_number: 0,
}))
}
let result = futures::future::try_join_all(futs)
.await
.context(error::InsertSnafu {
table_name: req.table_name.to_string(),
})?;
Ok(Output::AffectedRows(result.iter().sum()))
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum Source {
Filename(String),
Dir,
}
struct DataSource {
object_store: ObjectStore,
source: Source,
path: String,
regex: Option<Regex>,
}
impl DataSource {
fn from_path(url: &str, regex: Option<Regex>) -> Result<DataSource> {
let result = if url.ends_with('/') {
Url::from_directory_path(url)
} else {
Url::from_file_path(url)
};
match result {
Ok(url) => {
let path = url.path();
let (path, filename) = DataSource::find_dir_and_filename(path);
let source = if let Some(filename) = filename {
Source::Filename(filename)
} else {
Source::Dir
};
let accessor = Fs::default()
.root(&path)
.build()
.context(error::BuildBackendSnafu)?;
Ok(DataSource {
object_store: ObjectStore::new(accessor).finish(),
source,
path,
regex,
})
}
Err(()) => error::InvalidPathSnafu {
path: url.to_string(),
}
.fail(),
}
}
fn build_s3_backend(
host: Option<&str>,
path: &str,
connection: HashMap<String, String>,
) -> Result<ObjectStore> {
let mut builder = S3::default();
builder.root(path);
if let Some(bucket) = host {
builder.bucket(bucket);
}
if let Some(endpoint) = connection.get(ENDPOINT_URL) {
builder.endpoint(endpoint);
}
if let Some(region) = connection.get(REGION) {
builder.region(region);
}
if let Some(key_id) = connection.get(ACCESS_KEY_ID) {
builder.access_key_id(key_id);
}
if let Some(key) = connection.get(SECRET_ACCESS_KEY) {
builder.secret_access_key(key);
}
if let Some(session_token) = connection.get(SESSION_TOKEN) {
builder.security_token(session_token);
}
if let Some(enable_str) = connection.get(ENABLE_VIRTUAL_HOST_STYLE) {
let enable = enable_str.as_str().parse::<bool>().map_err(|e| {
error::InvalidConnectionSnafu {
msg: format!(
"failed to parse the option {}={}, {}",
ENABLE_VIRTUAL_HOST_STYLE, enable_str, e
),
}
.build()
})?;
if enable {
builder.enable_virtual_host_style();
}
}
let accessor = builder.build().context(error::BuildBackendSnafu)?;
Ok(ObjectStore::new(accessor).finish())
}
fn from_url(
url: Url,
regex: Option<Regex>,
connection: HashMap<String, String>,
) -> Result<DataSource> {
let host = url.host_str();
let path = url.path();
let schema = url.scheme();
let (dir, filename) = DataSource::find_dir_and_filename(path);
let source = if let Some(filename) = filename {
Source::Filename(filename)
} else {
Source::Dir
};
let object_store = match schema.to_uppercase().as_str() {
S3_SCHEMA => DataSource::build_s3_backend(host, &dir, connection)?,
_ => {
return error::UnsupportedBackendProtocolSnafu {
protocol: schema.to_string(),
}
.fail()
}
};
Ok(DataSource {
object_store,
source,
path: dir,
regex,
})
}
pub fn new(
url: &str,
pattern: Option<String>,
connection: HashMap<String, String>,
) -> Result<DataSource> {
let regex = if let Some(pattern) = pattern {
let regex = Regex::new(&pattern).context(error::BuildRegexSnafu)?;
Some(regex)
} else {
None
};
let result = Url::parse(url);
match result {
Ok(url) => DataSource::from_url(url, regex, connection),
Err(err) => {
if ParseError::RelativeUrlWithoutBase == err {
DataSource::from_path(url, regex)
} else {
Err(error::Error::InvalidUrl {
url: url.to_string(),
source: err,
})
}
}
}
}
pub async fn list(&self) -> Result<Vec<Object>> {
match &self.source {
Source::Dir => {
let streamer = self
.object_store
.object("/")
.list()
.await
.context(error::ListObjectsSnafu { path: &self.path })?;
streamer
.try_filter(|f| {
let res = if let Some(regex) = &self.regex {
regex.is_match(f.name())
} else {
true
};
future::ready(res)
})
.try_collect::<Vec<_>>()
.await
.context(error::ListObjectsSnafu { path: &self.path })
}
Source::Filename(filename) => {
let obj = self.object_store.object(filename);
Ok(vec![obj])
}
}
}
fn find_dir_and_filename(path: &str) -> (String, Option<String>) {
if path.is_empty() {
("/".to_string(), None)
} else if path.ends_with('/') {
(path.to_string(), None)
} else if let Some(idx) = path.rfind('/') {
(
path[..idx + 1].to_string(),
Some(path[idx + 1..].to_string()),
)
} else {
("/".to_string(), Some(path.to_string()))
}
}
}
#[cfg(test)]
mod tests {
use url::Url;
use super::*;
#[test]
fn test_parse_uri() {
struct Test<'a> {
uri: &'a str,
expected_path: &'a str,
expected_schema: &'a str,
}
let tests = [
Test {
uri: "s3://bucket/to/path/",
expected_path: "/to/path/",
expected_schema: "s3",
},
Test {
uri: "fs:///to/path/",
expected_path: "/to/path/",
expected_schema: "fs",
},
Test {
uri: "fs:///to/path/file",
expected_path: "/to/path/file",
expected_schema: "fs",
},
];
for test in tests {
let parsed_uri = Url::parse(test.uri).unwrap();
assert_eq!(parsed_uri.path(), test.expected_path);
assert_eq!(parsed_uri.scheme(), test.expected_schema);
}
}
#[test]
fn test_parse_path_and_dir() {
let parsed = Url::from_file_path("/to/path/file").unwrap();
assert_eq!(parsed.path(), "/to/path/file");
let parsed = Url::from_directory_path("/to/path/").unwrap();
assert_eq!(parsed.path(), "/to/path/");
}
#[test]
fn test_find_dir_and_filename() {
struct Test<'a> {
path: &'a str,
expected_dir: &'a str,
expected_filename: Option<String>,
}
let tests = [
Test {
path: "to/path/",
expected_dir: "to/path/",
expected_filename: None,
},
Test {
path: "to/path/filename",
expected_dir: "to/path/",
expected_filename: Some("filename".into()),
},
Test {
path: "/to/path/filename",
expected_dir: "/to/path/",
expected_filename: Some("filename".into()),
},
Test {
path: "/",
expected_dir: "/",
expected_filename: None,
},
Test {
path: "filename",
expected_dir: "/",
expected_filename: Some("filename".into()),
},
Test {
path: "",
expected_dir: "/",
expected_filename: None,
},
];
for test in tests {
let (path, filename) = DataSource::find_dir_and_filename(test.path);
assert_eq!(test.expected_dir, path);
assert_eq!(test.expected_filename, filename)
}
}
}

View File

@@ -795,6 +795,91 @@ async fn test_execute_copy_to() {
assert!(matches!(output, Output::AffectedRows(2)));
}
#[tokio::test(flavor = "multi_thread")]
async fn test_execute_copy_from() {
let instance = setup_test_instance("test_execute_copy_from").await;
// setups
execute_sql(
&instance,
"create table demo(host string, cpu double, memory double, ts timestamp time index);",
)
.await;
let output = execute_sql(
&instance,
r#"insert into demo(host, cpu, memory, ts) values
('host1', 66.6, 1024, 1655276557000),
('host2', 88.8, 333.3, 1655276558000)
"#,
)
.await;
assert!(matches!(output, Output::AffectedRows(2)));
// export
let data_dir = instance.data_tmp_dir().path();
let copy_to_stmt = format!("Copy demo TO '{}/export/demo.parquet'", data_dir.display());
let output = execute_sql(&instance, &copy_to_stmt).await;
assert!(matches!(output, Output::AffectedRows(2)));
struct Test<'a> {
sql: &'a str,
table_name: &'a str,
}
let tests = [
Test {
sql: &format!(
"Copy with_filename FROM '{}/export/demo.parquet_1_2'",
data_dir.display()
),
table_name: "with_filename",
},
Test {
sql: &format!("Copy with_path FROM '{}/export/'", data_dir.display()),
table_name: "with_path",
},
Test {
sql: &format!(
"Copy with_pattern FROM '{}/export/' WITH (PATTERN = 'demo.*')",
data_dir.display()
),
table_name: "with_pattern",
},
];
for test in tests {
// import
execute_sql(
&instance,
&format!(
"create table {}(host string, cpu double, memory double, ts timestamp time index);",
test.table_name
),
)
.await;
let output = execute_sql(&instance, test.sql).await;
assert!(matches!(output, Output::AffectedRows(2)));
let output = execute_sql(
&instance,
&format!("select * from {} order by ts", test.table_name),
)
.await;
let expected = "\
+-------+------+--------+---------------------+
| host | cpu | memory | ts |
+-------+------+--------+---------------------+
| host1 | 66.6 | 1024.0 | 2022-06-15T07:02:37 |
| host2 | 88.8 | 333.3 | 2022-06-15T07:02:38 |
+-------+------+--------+---------------------+"
.to_string();
check_output_stream(output, expected).await;
}
}
#[tokio::test(flavor = "multi_thread")]
async fn test_create_by_procedure() {
common_telemetry::init_default_ut_logging();

View File

@@ -17,6 +17,7 @@ use std::sync::Arc;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
use common_query::Output;
use common_recordbatch::util;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema};
use mito::config::EngineConfig;
@@ -26,7 +27,6 @@ use servers::Mode;
use snafu::ResultExt;
use table::engine::{EngineContext, TableEngineRef};
use table::requests::{CreateTableRequest, TableOptions};
use tempdir::TempDir;
use crate::datanode::{DatanodeOptions, FileConfig, ObjectStoreConfig, ProcedureConfig, WalConfig};
use crate::error::{CreateTableSnafu, Result};
@@ -55,7 +55,7 @@ impl MockInstance {
pub(crate) async fn with_procedure_enabled(name: &str) -> Self {
let (mut opts, _guard) = create_tmp_dir_and_datanode_opts(name);
let procedure_dir = TempDir::new(&format!("gt_procedure_{name}")).unwrap();
let procedure_dir = create_temp_dir(&format!("gt_procedure_{name}"));
opts.procedure = Some(ProcedureConfig {
store: ObjectStoreConfig::File(FileConfig {
data_dir: procedure_dir.path().to_str().unwrap().to_string(),
@@ -87,8 +87,8 @@ struct TestGuard {
}
fn create_tmp_dir_and_datanode_opts(name: &str) -> (DatanodeOptions, TestGuard) {
let wal_tmp_dir = TempDir::new(&format!("gt_wal_{name}")).unwrap();
let data_tmp_dir = TempDir::new(&format!("gt_data_{name}")).unwrap();
let wal_tmp_dir = create_temp_dir(&format!("gt_wal_{name}"));
let data_tmp_dir = create_temp_dir(&format!("gt_data_{name}"));
let opts = DatanodeOptions {
wal: WalConfig {
dir: wal_tmp_dir.path().to_str().unwrap().to_string(),

View File

@@ -16,14 +16,14 @@ use std::any::Any;
use std::fmt;
use std::sync::Arc;
use arrow::array::{Array, ArrayRef};
use snafu::ResultExt;
use arrow::array::{Array, ArrayRef, UInt32Array};
use snafu::{ensure, ResultExt};
use crate::data_type::ConcreteDataType;
use crate::error::{Result, SerializeSnafu};
use crate::error::{self, Result, SerializeSnafu};
use crate::serialize::Serializable;
use crate::value::{Value, ValueRef};
use crate::vectors::{BooleanVector, Helper, Validity, Vector, VectorRef};
use crate::vectors::{BooleanVector, Helper, UInt32Vector, Validity, Vector, VectorRef};
#[derive(Clone)]
pub struct ConstantVector {
@@ -83,6 +83,35 @@ impl ConstantVector {
self.length,
)))
}
pub(crate) fn take_vector(&self, indices: &UInt32Vector) -> Result<VectorRef> {
if indices.is_empty() {
return Ok(self.slice(0, 0));
}
ensure!(
indices.null_count() == 0,
error::UnsupportedOperationSnafu {
op: "taking a null index",
vector_type: self.vector_type_name(),
}
);
let len = self.len();
let arr = indices.to_arrow_array();
let indices_arr = arr.as_any().downcast_ref::<UInt32Array>().unwrap();
if !arrow::compute::min_boolean(
&arrow::compute::lt_scalar(indices_arr, len as u32).unwrap(),
)
.unwrap()
{
panic!("Array index out of bounds, cannot take index out of the length of the array: {len}");
}
Ok(Arc::new(ConstantVector::new(
self.inner().clone(),
indices.len(),
)))
}
}
impl Vector for ConstantVector {

View File

@@ -16,6 +16,7 @@ mod cast;
mod filter;
mod find_unique;
mod replicate;
mod take;
use common_base::BitVec;
@@ -24,7 +25,7 @@ use crate::types::LogicalPrimitiveType;
use crate::vectors::constant::ConstantVector;
use crate::vectors::{
BinaryVector, BooleanVector, ConcreteDataType, ListVector, NullVector, PrimitiveVector,
StringVector, Vector, VectorRef,
StringVector, UInt32Vector, Vector, VectorRef,
};
/// Vector compute operations.
@@ -63,6 +64,12 @@ pub trait VectorOp {
///
/// TODO(dennis) describe behaviors in details.
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef>;
/// Take elements from the vector by the given indices.
///
/// # Panics
/// Panics if an index is out of bounds.
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef>;
}
macro_rules! impl_scalar_vector_op {
@@ -84,6 +91,10 @@ macro_rules! impl_scalar_vector_op {
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef> {
cast::cast_non_constant!(self, to_type)
}
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef> {
take::take_indices!(self, $VectorType, indices)
}
}
)+};
}
@@ -108,6 +119,10 @@ impl<T: LogicalPrimitiveType> VectorOp for PrimitiveVector<T> {
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef> {
cast::cast_non_constant!(self, to_type)
}
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef> {
take::take_indices!(self, PrimitiveVector<T>, indices)
}
}
impl VectorOp for NullVector {
@@ -131,6 +146,10 @@ impl VectorOp for NullVector {
}
.fail()
}
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef> {
take::take_indices!(self, NullVector, indices)
}
}
impl VectorOp for ConstantVector {
@@ -150,4 +169,8 @@ impl VectorOp for ConstantVector {
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef> {
self.cast_vector(to_type)
}
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef> {
self.take_vector(indices)
}
}

View File

@@ -0,0 +1,203 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
macro_rules! take_indices {
($vector: expr, $VectorType: ty, $indices: ident) => {{
use std::sync::Arc;
use arrow::compute;
use snafu::ResultExt;
let arrow_array = $vector.as_arrow();
let taken = compute::take(arrow_array, $indices.as_arrow(), None)
.context(crate::error::ArrowComputeSnafu)?;
Ok(Arc::new(<$VectorType>::try_from_arrow_array(taken)?))
}};
}
pub(crate) use take_indices;
#[cfg(test)]
mod tests {
use std::sync::Arc;
use arrow::array::{PrimitiveArray, UInt32Array};
use common_time::{Date, DateTime};
use crate::prelude::VectorRef;
use crate::scalars::ScalarVector;
use crate::timestamp::{
TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
};
use crate::types::{LogicalPrimitiveType, WrapperType};
use crate::vectors::operations::VectorOp;
use crate::vectors::{
BooleanVector, ConstantVector, Int32Vector, NullVector, PrimitiveVector, StringVector,
UInt32Vector,
};
fn check_take_primitive<T>(
input: Vec<Option<T::Native>>,
indices: Vec<Option<u32>>,
expect: Vec<Option<T::Native>>,
) where
T: LogicalPrimitiveType,
PrimitiveArray<T::ArrowPrimitive>: From<Vec<Option<T::Native>>>,
{
let v = PrimitiveVector::<T>::new(PrimitiveArray::<T::ArrowPrimitive>::from(input));
let indices = UInt32Vector::new(UInt32Array::from(indices));
let output = v.take(&indices).unwrap();
let expected: VectorRef = Arc::new(PrimitiveVector::<T>::new(PrimitiveArray::<
T::ArrowPrimitive,
>::from(expect)));
assert_eq!(expected, output);
}
macro_rules! take_time_like_test {
($VectorType: ident, $ValueType: ident, $method: ident) => {{
use $crate::vectors::{$VectorType, VectorRef};
let v = $VectorType::from_iterator((0..5).map($ValueType::$method));
let indices = UInt32Vector::from_slice(&[3, 0, 1, 4]);
let out = v.take(&indices).unwrap();
let expect: VectorRef = Arc::new($VectorType::from_iterator(
[3, 0, 1, 4].into_iter().map($ValueType::$method),
));
assert_eq!(expect, out);
}};
}
#[test]
fn test_take_primitive() {
// nullable int32
check_take_primitive::<crate::types::Int32Type>(
vec![Some(1), None, Some(3), Some(4), Some(-5)],
vec![Some(3), None, Some(0), Some(1), Some(4)],
vec![Some(4), None, Some(1), None, Some(-5)],
);
// nullable float32
check_take_primitive::<crate::types::Float32Type>(
vec![Some(3.24), None, Some(1.34), Some(4.13), Some(5.13)],
vec![Some(3), None, Some(0), Some(1), Some(4)],
vec![Some(4.13), None, Some(3.24), None, Some(5.13)],
);
// nullable uint32
check_take_primitive::<crate::types::UInt32Type>(
vec![Some(0), None, Some(2), Some(3), Some(4)],
vec![Some(4), None, Some(2), Some(1), Some(3)],
vec![Some(4), None, Some(2), None, Some(3)],
);
// test date like type
take_time_like_test!(DateVector, Date, new);
take_time_like_test!(DateTimeVector, DateTime, new);
take_time_like_test!(TimestampSecondVector, TimestampSecond, from_native);
take_time_like_test!(
TimestampMillisecondVector,
TimestampMillisecond,
from_native
);
take_time_like_test!(
TimestampMicrosecondVector,
TimestampMicrosecond,
from_native
);
take_time_like_test!(TimestampNanosecondVector, TimestampNanosecond, from_native);
}
fn check_take_constant(expect_length: usize, input_length: usize, indices: &[u32]) {
let v = ConstantVector::new(Arc::new(Int32Vector::from_slice([111])), input_length);
let indices = UInt32Vector::from_slice(indices);
let out = v.take(&indices).unwrap();
assert!(out.is_const());
assert_eq!(expect_length, out.len());
}
#[test]
fn test_take_constant() {
check_take_constant(2, 5, &[3, 4]);
check_take_constant(3, 10, &[1, 2, 3]);
check_take_constant(4, 10, &[1, 5, 3, 6]);
check_take_constant(5, 10, &[1, 9, 8, 7, 3]);
}
#[test]
#[should_panic]
fn test_take_constant_out_of_index() {
check_take_constant(2, 5, &[3, 5]);
}
#[test]
#[should_panic]
fn test_take_out_of_index() {
let v = Int32Vector::from_slice([1, 2, 3, 4, 5]);
let indies = UInt32Vector::from_slice([1, 5, 6]);
v.take(&indies).unwrap();
}
#[test]
fn test_take_null() {
let v = NullVector::new(5);
let indices = UInt32Vector::from_slice([1, 3, 2]);
let out = v.take(&indices).unwrap();
let expect: VectorRef = Arc::new(NullVector::new(3));
assert_eq!(expect, out);
}
#[test]
fn test_take_scalar() {
let v = StringVector::from_slice(&["0", "1", "2", "3"]);
let indices = UInt32Vector::from_slice([1, 3, 2]);
let out = v.take(&indices).unwrap();
let expect: VectorRef = Arc::new(StringVector::from_slice(&["1", "3", "2"]));
assert_eq!(expect, out);
}
#[test]
fn test_take_bool() {
let v = BooleanVector::from_slice(&[false, true, false, true, false, false, true]);
let indices = UInt32Vector::from_slice([1, 3, 5, 6]);
let out = v.take(&indices).unwrap();
let expected: VectorRef = Arc::new(BooleanVector::from_slice(&[true, true, false, true]));
assert_eq!(out, expected);
let v = BooleanVector::from(vec![
Some(true),
None,
Some(false),
Some(true),
Some(false),
Some(false),
Some(true),
None,
]);
let indices = UInt32Vector::from(vec![Some(1), None, Some(3), Some(5), Some(6)]);
let out = v.take(&indices).unwrap();
let expected: VectorRef = Arc::new(BooleanVector::from(vec![
None,
None,
Some(true),
Some(false),
Some(true),
]));
assert_eq!(out, expected);
}
}

View File

@@ -48,10 +48,10 @@ tokio.workspace = true
tonic.workspace = true
[dev-dependencies]
common-test-util = { path = "../common/test-util" }
datanode = { path = "../datanode" }
futures = "0.3"
meta-srv = { path = "../meta-srv", features = ["mock"] }
strfmt = "0.2"
tempdir = "0.3"
toml = "0.5"
tower = "0.4"

View File

@@ -28,8 +28,8 @@ use crate::instance::FrontendInstance;
use crate::mysql::MysqlOptions;
use crate::opentsdb::OpentsdbOptions;
use crate::postgres::PostgresOptions;
use crate::prom::PromOptions;
use crate::prometheus::PrometheusOptions;
use crate::promql::PromqlOptions;
use crate::server::Services;
#[derive(Clone, Debug, Serialize, Deserialize)]
@@ -43,7 +43,7 @@ pub struct FrontendOptions {
pub opentsdb_options: Option<OpentsdbOptions>,
pub influxdb_options: Option<InfluxdbOptions>,
pub prometheus_options: Option<PrometheusOptions>,
pub promql_options: Option<PromqlOptions>,
pub prom_options: Option<PromOptions>,
pub meta_client_options: Option<MetaClientOptions>,
}
@@ -58,7 +58,7 @@ impl Default for FrontendOptions {
opentsdb_options: Some(OpentsdbOptions::default()),
influxdb_options: Some(InfluxdbOptions::default()),
prometheus_options: Some(PrometheusOptions::default()),
promql_options: Some(PromqlOptions::default()),
prom_options: Some(PromOptions::default()),
meta_client_options: None,
}
}

View File

@@ -49,7 +49,7 @@ use query::parser::PromQuery;
use query::query_engine::options::{validate_catalog_and_schema, QueryOptions};
use servers::error as server_error;
use servers::interceptor::{SqlQueryInterceptor, SqlQueryInterceptorRef};
use servers::promql::{PromqlHandler, PromqlHandlerRef};
use servers::prom::{PromHandler, PromHandlerRef};
use servers::query_handler::grpc::{GrpcQueryHandler, GrpcQueryHandlerRef};
use servers::query_handler::sql::{SqlQueryHandler, SqlQueryHandlerRef};
use servers::query_handler::{
@@ -81,7 +81,7 @@ pub trait FrontendInstance:
+ InfluxdbLineProtocolHandler
+ PrometheusProtocolHandler
+ ScriptHandler
+ PromqlHandler
+ PromHandler
+ Send
+ Sync
+ 'static
@@ -99,7 +99,7 @@ pub struct Instance {
script_handler: Option<ScriptHandlerRef>,
sql_handler: SqlQueryHandlerRef<Error>,
grpc_query_handler: GrpcQueryHandlerRef<Error>,
promql_handler: Option<PromqlHandlerRef>,
promql_handler: Option<PromHandlerRef>,
create_expr_factory: CreateExprFactoryRef,
@@ -539,7 +539,7 @@ impl ScriptHandler for Instance {
}
#[async_trait]
impl PromqlHandler for Instance {
impl PromHandler for Instance {
async fn do_query(&self, query: &PromQuery) -> server_error::Result<Output> {
if let Some(promql_handler) = &self.promql_handler {
promql_handler.do_query(query).await

View File

@@ -57,6 +57,7 @@ impl GrpcQueryHandler for DistInstance {
TableName::new(&expr.catalog_name, &expr.schema_name, &expr.table_name);
self.drop_table(table_name).await
}
DdlExpr::FlushTable(_) => todo!(),
}
}
}

View File

@@ -25,8 +25,8 @@ pub mod instance;
pub mod mysql;
pub mod opentsdb;
pub mod postgres;
pub mod prom;
pub mod prometheus;
pub mod promql;
mod server;
mod sql;
mod table;

View File

@@ -15,11 +15,11 @@
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PromqlOptions {
pub struct PromOptions {
pub addr: String,
}
impl Default for PromqlOptions {
impl Default for PromOptions {
fn default() -> Self {
Self {
addr: "127.0.0.1:4004".to_string(),
@@ -29,11 +29,11 @@ impl Default for PromqlOptions {
#[cfg(test)]
mod tests {
use super::PromqlOptions;
use super::PromOptions;
#[test]
fn test_prometheus_options() {
let default = PromqlOptions::default();
let default = PromOptions::default();
assert_eq!(default.addr, "127.0.0.1:4004".to_string());
}
}

View File

@@ -25,7 +25,7 @@ use servers::http::HttpServer;
use servers::mysql::server::{MysqlServer, MysqlSpawnConfig, MysqlSpawnRef};
use servers::opentsdb::OpentsdbServer;
use servers::postgres::PostgresServer;
use servers::promql::PromqlServer;
use servers::prom::PromServer;
use servers::query_handler::grpc::ServerGrpcQueryHandlerAdaptor;
use servers::query_handler::sql::ServerSqlQueryHandlerAdaptor;
use servers::server::Server;
@@ -183,15 +183,15 @@ impl Services {
None
};
let promql_server_and_addr = if let Some(promql_options) = &opts.promql_options {
let promql_addr = parse_addr(&promql_options.addr)?;
let prom_server_and_addr = if let Some(prom_options) = &opts.prom_options {
let prom_addr = parse_addr(&prom_options.addr)?;
let mut promql_server = PromqlServer::create_server(instance.clone());
let mut prom_server = PromServer::create_server(instance.clone());
if let Some(user_provider) = user_provider {
promql_server.set_user_provider(user_provider);
prom_server.set_user_provider(user_provider);
}
Some((promql_server as _, promql_addr))
Some((prom_server as _, prom_addr))
} else {
None
};
@@ -202,7 +202,7 @@ impl Services {
start_server(mysql_server_and_addr),
start_server(postgres_server_and_addr),
start_server(opentsdb_server_and_addr),
start_server(promql_server_and_addr),
start_server(prom_server_and_addr),
)
.context(error::StartServerSnafu)?;
Ok(())

View File

@@ -20,6 +20,7 @@ use catalog::remote::MetaKvBackend;
use client::Client;
use common_grpc::channel_manager::ChannelManager;
use common_runtime::Builder as RuntimeBuilder;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datanode::datanode::{DatanodeOptions, FileConfig, ObjectStoreConfig, WalConfig};
use datanode::instance::Instance as DatanodeInstance;
use meta_client::client::MetaClientBuilder;
@@ -33,7 +34,6 @@ use partition::route::TableRoutes;
use servers::grpc::GrpcServer;
use servers::query_handler::grpc::ServerGrpcQueryHandlerAdaptor;
use servers::Mode;
use tempdir::TempDir;
use tonic::transport::Server;
use tower::service_fn;
@@ -75,8 +75,8 @@ pub(crate) async fn create_standalone_instance(test_name: &str) -> MockStandalon
}
fn create_tmp_dir_and_datanode_opts(name: &str) -> (DatanodeOptions, TestGuard) {
let wal_tmp_dir = TempDir::new(&format!("gt_wal_{name}")).unwrap();
let data_tmp_dir = TempDir::new(&format!("gt_data_{name}")).unwrap();
let wal_tmp_dir = create_temp_dir(&format!("gt_wal_{name}"));
let data_tmp_dir = create_temp_dir(&format!("gt_data_{name}"));
let opts = DatanodeOptions {
wal: WalConfig {
dir: wal_tmp_dir.path().to_str().unwrap().to_string(),
@@ -161,8 +161,8 @@ async fn create_distributed_datanode(
datanode_id: u64,
meta_srv: MockInfo,
) -> (Arc<DatanodeInstance>, TestGuard) {
let wal_tmp_dir = TempDir::new(&format!("gt_wal_{test_name}_dist_dn_{datanode_id}")).unwrap();
let data_tmp_dir = TempDir::new(&format!("gt_data_{test_name}_dist_dn_{datanode_id}")).unwrap();
let wal_tmp_dir = create_temp_dir(&format!("gt_wal_{test_name}_dist_dn_{datanode_id}"));
let data_tmp_dir = create_temp_dir(&format!("gt_data_{test_name}_dist_dn_{datanode_id}"));
let opts = DatanodeOptions {
node_id: Some(datanode_id),
wal: WalConfig {

View File

@@ -28,9 +28,9 @@ protobuf = { version = "2", features = ["bytes"] }
raft-engine = "0.3"
snafu = { version = "0.7", features = ["backtraces"] }
store-api = { path = "../store-api" }
tempdir = "0.3"
tokio.workspace = true
tokio-util.workspace = true
[dev-dependencies]
common-test-util = { path = "../common/test-util" }
rand = "0.8"

View File

@@ -338,12 +338,12 @@ mod tests {
use std::time::Duration;
use common_telemetry::debug;
use common_test_util::temp_dir::create_temp_dir;
use futures_util::StreamExt;
use raft_engine::ReadableSize;
use store_api::logstore::entry_stream::SendableEntryStream;
use store_api::logstore::namespace::Namespace as NamespaceTrait;
use store_api::logstore::LogStore;
use tempdir::TempDir;
use crate::config::LogConfig;
use crate::error::Error;
@@ -352,7 +352,7 @@ mod tests {
#[tokio::test]
async fn test_open_logstore() {
let dir = TempDir::new("raft-engine-logstore-test").unwrap();
let dir = create_temp_dir("raft-engine-logstore-test");
let logstore = RaftEngineLogStore::try_new(LogConfig {
log_file_dir: dir.path().to_str().unwrap().to_string(),
..Default::default()
@@ -366,7 +366,7 @@ mod tests {
#[tokio::test]
async fn test_manage_namespace() {
let dir = TempDir::new("raft-engine-logstore-test").unwrap();
let dir = create_temp_dir("raft-engine-logstore-test");
let mut logstore = RaftEngineLogStore::try_new(LogConfig {
log_file_dir: dir.path().to_str().unwrap().to_string(),
..Default::default()
@@ -393,7 +393,7 @@ mod tests {
#[tokio::test]
async fn test_append_and_read() {
let dir = TempDir::new("raft-engine-logstore-test").unwrap();
let dir = create_temp_dir("raft-engine-logstore-test");
let logstore = RaftEngineLogStore::try_new(LogConfig {
log_file_dir: dir.path().to_str().unwrap().to_string(),
..Default::default()
@@ -434,7 +434,7 @@ mod tests {
#[tokio::test]
async fn test_reopen() {
let dir = TempDir::new("raft-engine-logstore-reopen-test").unwrap();
let dir = create_temp_dir("raft-engine-logstore-reopen-test");
{
let logstore = RaftEngineLogStore::try_new(LogConfig {
log_file_dir: dir.path().to_str().unwrap().to_string(),
@@ -491,7 +491,7 @@ mod tests {
#[tokio::test]
async fn test_compaction() {
common_telemetry::init_default_ut_logging();
let dir = TempDir::new("raft-engine-logstore-test").unwrap();
let dir = create_temp_dir("raft-engine-logstore-test");
let config = LogConfig {
log_file_dir: dir.path().to_str().unwrap().to_string(),
@@ -524,7 +524,7 @@ mod tests {
#[tokio::test]
async fn test_obsolete() {
common_telemetry::init_default_ut_logging();
let dir = TempDir::new("raft-engine-logstore-test").unwrap();
let dir = create_temp_dir("raft-engine-logstore-test");
let config = LogConfig {
log_file_dir: dir.path().to_str().unwrap().to_string(),

View File

@@ -12,21 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use tempdir::TempDir;
use crate::raft_engine::log_store::RaftEngineLogStore;
use crate::LogConfig;
/// Create a tmp directory for write log, used for test.
// TODO: Add a test feature
pub async fn create_tmp_local_file_log_store(dir: &str) -> (RaftEngineLogStore, TempDir) {
let dir = TempDir::new(dir).unwrap();
/// Create a write log for the provided path, used for test.
pub async fn create_tmp_local_file_log_store(path: &str) -> RaftEngineLogStore {
let cfg = LogConfig {
file_size: 128 * 1024,
log_file_dir: dir.path().to_str().unwrap().to_string(),
log_file_dir: path.to_string(),
..Default::default()
};
let logstore = RaftEngineLogStore::try_new(cfg).await.unwrap();
(logstore, dir)
RaftEngineLogStore::try_new(cfg).await.unwrap()
}

View File

@@ -32,9 +32,10 @@ use crate::error::Result;
use crate::rpc::lock::{LockRequest, LockResponse, UnlockRequest};
use crate::rpc::router::DeleteRequest;
use crate::rpc::{
BatchPutRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, CreateRequest,
DeleteRangeRequest, DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest,
PutResponse, RangeRequest, RangeResponse, RouteRequest, RouteResponse,
BatchGetRequest, BatchGetResponse, BatchPutRequest, BatchPutResponse, CompareAndPutRequest,
CompareAndPutResponse, CreateRequest, DeleteRangeRequest, DeleteRangeResponse,
MoveValueRequest, MoveValueResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
RouteRequest, RouteResponse,
};
pub type Id = (u64, u64);
@@ -245,6 +246,11 @@ impl MetaClient {
self.store_client()?.put(req.into()).await?.try_into()
}
/// BatchGet atomically get values by the given keys from the key-value store.
pub async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
self.store_client()?.batch_get(req.into()).await?.try_into()
}
/// BatchPut atomically puts the given keys into the key-value store.
pub async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
self.store_client()?.batch_put(req.into()).await?.try_into()
@@ -713,6 +719,26 @@ mod tests {
assert_eq!(2, kvs.len());
}
#[tokio::test]
async fn test_batch_get() {
let tc = new_client("test_batch_get").await;
tc.gen_data().await;
let req = BatchGetRequest::default()
.add_key(tc.key("key-1"))
.add_key(tc.key("key-2"));
let mut res = tc.client.batch_get(req).await.unwrap();
assert_eq!(2, res.take_kvs().len());
let req = BatchGetRequest::default()
.add_key(tc.key("key-1"))
.add_key(tc.key("key-222"));
let mut res = tc.client.batch_get(req).await.unwrap();
assert_eq!(1, res.take_kvs().len());
}
#[tokio::test]
async fn test_batch_put_with_prev_kv() {
let tc = new_client("test_batch_put_with_prev_kv").await;

View File

@@ -17,9 +17,9 @@ use std::sync::Arc;
use api::v1::meta::store_client::StoreClient;
use api::v1::meta::{
BatchPutRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse,
DeleteRangeRequest, DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest,
PutResponse, RangeRequest, RangeResponse,
BatchGetRequest, BatchGetResponse, BatchPutRequest, BatchPutResponse, CompareAndPutRequest,
CompareAndPutResponse, DeleteRangeRequest, DeleteRangeResponse, MoveValueRequest,
MoveValueResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
use common_grpc::channel_manager::ChannelManager;
use snafu::{ensure, OptionExt, ResultExt};
@@ -70,6 +70,11 @@ impl Client {
inner.put(req).await
}
pub async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
let inner = self.inner.read().await;
inner.batch_get(req).await
}
pub async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
let inner = self.inner.read().await;
inner.batch_put(req).await
@@ -141,6 +146,18 @@ impl Inner {
Ok(res.into_inner())
}
async fn batch_get(&self, mut req: BatchGetRequest) -> Result<BatchGetResponse> {
let mut client = self.random_client()?;
req.set_header(self.id);
let res = client
.batch_get(req)
.await
.context(error::TonicStatusSnafu)?;
Ok(res.into_inner())
}
async fn batch_put(&self, mut req: BatchPutRequest) -> Result<BatchPutResponse> {
let mut client = self.random_client()?;
req.set_header(self.id);

View File

@@ -109,135 +109,3 @@ impl ErrorExt for Error {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
type StdResult<E> = std::result::Result<(), E>;
fn throw_none_option() -> Option<String> {
None
}
#[test]
fn test_connect_failed_error() {
fn throw_tonic_error() -> StdResult<tonic::transport::Error> {
tonic::transport::Endpoint::new("http//http").map(|_| ())
}
let e = throw_tonic_error()
.context(ConnectFailedSnafu { url: "" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_illegal_grpc_client_state_error() {
let e = throw_none_option()
.context(IllegalGrpcClientStateSnafu { err_msg: "" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_tonic_status_error() {
fn throw_tonic_status_error() -> StdResult<tonic::Status> {
Err(tonic::Status::new(tonic::Code::Aborted, ""))
}
let e = throw_tonic_status_error()
.context(TonicStatusSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_ask_leader_error() {
let e = throw_none_option().context(AskLeaderSnafu).err().unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_no_leader_error() {
let e = throw_none_option().context(NoLeaderSnafu).err().unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_create_channel_error() {
fn throw_common_grpc_error() -> StdResult<common_grpc::Error> {
tonic::transport::Endpoint::new("http//http")
.map(|_| ())
.context(common_grpc::error::CreateChannelSnafu)
}
let e = throw_common_grpc_error()
.context(CreateChannelSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_not_started_error() {
let e = throw_none_option()
.context(NotStartedSnafu { name: "" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_send_heartbeat_error() {
let e = throw_none_option()
.context(SendHeartbeatSnafu { err_msg: "" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_create_heartbeat_stream_error() {
let e = throw_none_option()
.context(CreateHeartbeatStreamSnafu)
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
#[test]
fn test_route_info_corruped_error() {
let e = throw_none_option()
.context(RouteInfoCorruptedSnafu { err_msg: "" })
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Unexpected);
}
#[test]
fn test_illegal_server_state_error() {
let e = throw_none_option()
.context(IllegalServerStateSnafu {
code: 1,
err_msg: "",
})
.err()
.unwrap();
assert!(e.backtrace_opt().is_some());
assert_eq!(e.status_code(), StatusCode::Internal);
}
}

View File

@@ -28,9 +28,9 @@ pub use router::{
};
use serde::{Deserialize, Serialize};
pub use store::{
BatchPutRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse,
DeleteRangeRequest, DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest,
PutResponse, RangeRequest, RangeResponse,
BatchGetRequest, BatchGetResponse, BatchPutRequest, BatchPutResponse, CompareAndPutRequest,
CompareAndPutResponse, DeleteRangeRequest, DeleteRangeResponse, MoveValueRequest,
MoveValueResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
#[derive(Debug, Clone)]

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use api::v1::meta::{
BatchGetRequest as PbBatchGetRequest, BatchGetResponse as PbBatchGetResponse,
BatchPutRequest as PbBatchPutRequest, BatchPutResponse as PbBatchPutResponse,
CompareAndPutRequest as PbCompareAndPutRequest,
CompareAndPutResponse as PbCompareAndPutResponse, DeleteRangeRequest as PbDeleteRangeRequest,
@@ -239,6 +240,68 @@ impl PutResponse {
}
}
pub struct BatchGetRequest {
pub keys: Vec<Vec<u8>>,
}
impl From<BatchGetRequest> for PbBatchGetRequest {
fn from(req: BatchGetRequest) -> Self {
Self {
header: None,
keys: req.keys,
}
}
}
impl Default for BatchGetRequest {
fn default() -> Self {
Self::new()
}
}
impl BatchGetRequest {
#[inline]
pub fn new() -> Self {
Self { keys: vec![] }
}
#[inline]
pub fn add_key(mut self, key: impl Into<Vec<u8>>) -> Self {
self.keys.push(key.into());
self
}
}
#[derive(Debug, Clone)]
pub struct BatchGetResponse(PbBatchGetResponse);
impl TryFrom<PbBatchGetResponse> for BatchGetResponse {
type Error = error::Error;
fn try_from(pb: PbBatchGetResponse) -> Result<Self> {
util::check_response_header(pb.header.as_ref())?;
Ok(Self(pb))
}
}
impl BatchGetResponse {
#[inline]
pub fn new(res: PbBatchGetResponse) -> Self {
Self(res)
}
#[inline]
pub fn take_header(&mut self) -> Option<ResponseHeader> {
self.0.header.take().map(ResponseHeader::new)
}
#[inline]
pub fn take_kvs(&mut self) -> Vec<KeyValue> {
self.0.kvs.drain(..).map(KeyValue::new).collect()
}
}
#[derive(Debug, Clone, Default)]
pub struct BatchPutRequest {
pub kvs: Vec<PbKeyValue>,
@@ -699,6 +762,40 @@ mod tests {
assert_eq!(b"v1".to_vec(), kv.take_value());
}
#[test]
fn test_batch_get_request_trans() {
let req = BatchGetRequest::default()
.add_key(b"test_key1".to_vec())
.add_key(b"test_key2".to_vec())
.add_key(b"test_key3".to_vec());
let into_req: PbBatchGetRequest = req.into();
assert!(into_req.header.is_none());
assert_eq!(b"test_key1".as_slice(), into_req.keys.get(0).unwrap());
assert_eq!(b"test_key2".as_slice(), into_req.keys.get(1).unwrap());
assert_eq!(b"test_key3".as_slice(), into_req.keys.get(2).unwrap());
}
#[test]
fn test_batch_get_response_trans() {
let pb_res = PbBatchGetResponse {
header: None,
kvs: vec![PbKeyValue {
key: b"test_key1".to_vec(),
value: b"test_value1".to_vec(),
}],
};
let mut res = BatchGetResponse::new(pb_res);
assert!(res.take_header().is_none());
let kvs = res.take_kvs();
assert_eq!(b"test_key1".as_slice(), kvs[0].key());
assert_eq!(b"test_value1".as_slice(), kvs[0].value());
}
#[test]
fn test_batch_put_request_trans() {
let req = BatchPutRequest::new()

View File

@@ -27,7 +27,6 @@ use snafu::{ensure, OptionExt, ResultExt};
use crate::error::{match_for_io_error, Result};
use crate::keys::{StatKey, StatValue, DN_STAT_PREFIX};
use crate::metasrv::ElectionRef;
use crate::service::store::ext::KvStoreExt;
use crate::service::store::kv::ResettableKvStoreRef;
use crate::{error, util};
@@ -130,7 +129,12 @@ impl MetaPeerClient {
// Get kv information from the leader's in_mem kv store
pub async fn batch_get(&self, keys: Vec<Vec<u8>>) -> Result<Vec<KeyValue>> {
if self.is_leader() {
return self.in_memory.batch_get(keys).await;
let request = BatchGetRequest {
keys,
..Default::default()
};
return self.in_memory.batch_get(request).await.map(|resp| resp.kvs);
}
let max_retry_count = self.max_retry_count;

View File

@@ -150,6 +150,8 @@ impl Inner {
mod tests {
use std::sync::Arc;
use api::v1::meta::{BatchGetRequest, BatchGetResponse};
use super::*;
use crate::service::store::kv::KvStore;
use crate::service::store::memory::MemStore;
@@ -192,6 +194,10 @@ mod tests {
unreachable!()
}
async fn batch_get(&self, _: BatchGetRequest) -> Result<BatchGetResponse> {
unreachable!()
}
async fn compare_and_put(
&self,
_: CompareAndPutRequest,

View File

@@ -20,7 +20,6 @@ use common_telemetry::warn;
use tonic::{Request, Response};
use crate::metasrv::MetaSrv;
use crate::service::store::ext::KvStoreExt;
use crate::service::GrpcResult;
#[async_trait::async_trait]
@@ -37,8 +36,8 @@ impl cluster_server::Cluster for MetaSrv {
return Ok(Response::new(resp));
}
let req = req.into_inner();
let kvs = self.in_memory().batch_get(req.keys).await?;
let kvs = self.in_memory().batch_get(req.into_inner()).await?.kvs;
let success = ResponseHeader::success(0);
let get_resp = BatchGetResponse {

View File

@@ -22,7 +22,7 @@ use api::v1::meta::{
CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest, DeleteRangeResponse,
MoveValueRequest, MoveValueResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
use tonic::{Request, Response, Status};
use tonic::{Request, Response};
use crate::metasrv::MetaSrv;
use crate::service::GrpcResult;
@@ -43,12 +43,11 @@ impl store_server::Store for MetaSrv {
Ok(Response::new(res))
}
async fn batch_get(
&self,
_request: Request<BatchGetRequest>,
) -> Result<Response<BatchGetResponse>, Status> {
// TODO(fys): please fix this
unimplemented!()
async fn batch_get(&self, req: Request<BatchGetRequest>) -> GrpcResult<BatchGetResponse> {
let req = req.into_inner();
let res = self.kv_store().batch_get(req).await?;
Ok(Response::new(res))
}
async fn batch_put(&self, req: Request<BatchPutRequest>) -> GrpcResult<BatchPutResponse> {
@@ -121,6 +120,18 @@ mod tests {
assert!(res.is_ok());
}
#[tokio::test]
async fn test_batch_get() {
let kv_store = Arc::new(MemStore::new());
let meta_srv = MetaSrvBuilder::new().kv_store(kv_store).build().await;
let req = BatchGetRequest::default();
let res = meta_srv.batch_get(req.into_request()).await;
assert!(res.is_ok());
}
#[tokio::test]
async fn test_batch_put() {
let kv_store = Arc::new(MemStore::new());

View File

@@ -15,9 +15,9 @@
use std::sync::Arc;
use api::v1::meta::{
BatchPutRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse,
DeleteRangeRequest, DeleteRangeResponse, KeyValue, MoveValueRequest, MoveValueResponse,
PutRequest, PutResponse, RangeRequest, RangeResponse, ResponseHeader,
BatchGetRequest, BatchGetResponse, BatchPutRequest, BatchPutResponse, CompareAndPutRequest,
CompareAndPutResponse, DeleteRangeRequest, DeleteRangeResponse, KeyValue, MoveValueRequest,
MoveValueResponse, PutRequest, PutResponse, RangeRequest, RangeResponse, ResponseHeader,
};
use common_error::prelude::*;
use common_telemetry::warn;
@@ -98,6 +98,40 @@ impl KvStore for EtcdStore {
Ok(PutResponse { header, prev_kv })
}
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
let BatchGet {
cluster_id,
keys,
options,
} = req.try_into()?;
let get_ops: Vec<_> = keys
.into_iter()
.map(|k| TxnOp::get(k, options.clone()))
.collect();
let txn = Txn::new().and_then(get_ops);
let txn_res = self
.client
.kv_client()
.txn(txn)
.await
.context(error::EtcdFailedSnafu)?;
let mut kvs = vec![];
for op_res in txn_res.op_responses() {
let get_res = match op_res {
TxnOpResponse::Get(get_res) => get_res,
_ => unreachable!(),
};
kvs.extend(get_res.kvs().iter().map(KvPair::to_kv));
}
let header = Some(ResponseHeader::success(cluster_id));
Ok(BatchGetResponse { header, kvs })
}
async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
let BatchPut {
cluster_id,
@@ -360,6 +394,28 @@ impl TryFrom<PutRequest> for Put {
}
}
struct BatchGet {
cluster_id: u64,
keys: Vec<Vec<u8>>,
options: Option<GetOptions>,
}
impl TryFrom<BatchGetRequest> for BatchGet {
type Error = error::Error;
fn try_from(req: BatchGetRequest) -> Result<Self> {
let BatchGetRequest { header, keys } = req;
let options = GetOptions::default().with_keys_only();
Ok(BatchGet {
cluster_id: header.map_or(0, |h| h.cluster_id),
keys,
options: Some(options),
})
}
}
struct BatchPut {
cluster_id: u64,
kvs: Vec<KeyValue>,
@@ -539,6 +595,21 @@ mod tests {
assert!(put.options.is_some());
}
#[test]
fn test_parse_batch_get() {
let req = BatchGetRequest {
keys: vec![b"k1".to_vec(), b"k2".to_vec(), b"k3".to_vec()],
..Default::default()
};
let batch_get: BatchGet = req.try_into().unwrap();
let keys = batch_get.keys;
assert_eq!(b"k1".to_vec(), keys.get(0).unwrap().to_vec());
assert_eq!(b"k2".to_vec(), keys.get(1).unwrap().to_vec());
assert_eq!(b"k3".to_vec(), keys.get(2).unwrap().to_vec());
}
#[test]
fn test_parse_batch_put() {
let req = BatchPutRequest {

View File

@@ -21,8 +21,6 @@ use crate::service::store::kv::KvStore;
#[async_trait::async_trait]
pub trait KvStoreExt {
async fn get(&self, key: Vec<u8>) -> Result<Option<KeyValue>>;
async fn batch_get(&self, key: Vec<Vec<u8>>) -> Result<Vec<KeyValue>>;
}
#[async_trait::async_trait]
@@ -53,18 +51,6 @@ where
// Safety: the length check has been performed before using unwrap()
Ok(Some(kvs.pop().unwrap()))
}
async fn batch_get(&self, keys: Vec<Vec<u8>>) -> Result<Vec<KeyValue>> {
let mut kvs = Vec::with_capacity(keys.len());
for key in keys {
if let Some(kv) = self.get(key).await? {
kvs.push(kv);
}
}
Ok(kvs)
}
}
#[cfg(test)]
@@ -106,31 +92,6 @@ mod tests {
assert!(may_kv.is_none());
}
#[tokio::test]
async fn test_batch_get() {
let mut in_mem = Arc::new(MemStore::new()) as KvStoreRef;
put_stats_to_store(&mut in_mem).await;
let keys = vec![
"test_key1".as_bytes().to_vec(),
"test_key1".as_bytes().to_vec(),
"test_key2".as_bytes().to_vec(),
];
let kvs = in_mem.batch_get(keys).await.unwrap();
assert_eq!(3, kvs.len());
assert_eq!("test_key1".as_bytes(), kvs[0].key);
assert_eq!("test_key1".as_bytes(), kvs[1].key);
assert_eq!("test_key2".as_bytes(), kvs[2].key);
assert_eq!("test_val1".as_bytes(), kvs[0].value);
assert_eq!("test_val1".as_bytes(), kvs[1].value);
assert_eq!("test_val2".as_bytes(), kvs[2].value);
}
async fn put_stats_to_store(store: &mut KvStoreRef) {
store
.put(PutRequest {

View File

@@ -15,9 +15,9 @@
use std::sync::Arc;
use api::v1::meta::{
BatchPutRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse,
DeleteRangeRequest, DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest,
PutResponse, RangeRequest, RangeResponse,
BatchGetRequest, BatchGetResponse, BatchPutRequest, BatchPutResponse, CompareAndPutRequest,
CompareAndPutResponse, DeleteRangeRequest, DeleteRangeResponse, MoveValueRequest,
MoveValueResponse, PutRequest, PutResponse, RangeRequest, RangeResponse,
};
use crate::error::Result;
@@ -31,6 +31,8 @@ pub trait KvStore: Send + Sync {
async fn put(&self, req: PutRequest) -> Result<PutResponse>;
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse>;
async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse>;
async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse>;

View File

@@ -17,12 +17,13 @@ use std::collections::BTreeMap;
use std::ops::Range;
use api::v1::meta::{
BatchPutRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse,
DeleteRangeRequest, DeleteRangeResponse, KeyValue, MoveValueRequest, MoveValueResponse,
PutRequest, PutResponse, RangeRequest, RangeResponse, ResponseHeader,
BatchGetRequest, BatchGetResponse, BatchPutRequest, BatchPutResponse, CompareAndPutRequest,
CompareAndPutResponse, DeleteRangeRequest, DeleteRangeResponse, KeyValue, MoveValueRequest,
MoveValueResponse, PutRequest, PutResponse, RangeRequest, RangeResponse, ResponseHeader,
};
use parking_lot::RwLock;
use super::ext::KvStoreExt;
use crate::error::Result;
use crate::service::store::kv::{KvStore, ResettableKvStore};
@@ -117,6 +118,22 @@ impl KvStore for MemStore {
Ok(PutResponse { header, prev_kv })
}
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
let keys = req.keys;
let mut kvs = Vec::with_capacity(keys.len());
for key in keys {
if let Some(kv) = self.get(key).await? {
kvs.push(kv);
}
}
Ok(BatchGetResponse {
kvs,
..Default::default()
})
}
async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> {
let BatchPutRequest {
header,
@@ -247,3 +264,310 @@ impl KvStore for MemStore {
Ok(MoveValueResponse { header, kv })
}
}
#[cfg(test)]
mod tests {
use std::sync::atomic::{AtomicU8, Ordering};
use std::sync::Arc;
use api::v1::meta::{
BatchGetRequest, BatchPutRequest, CompareAndPutRequest, DeleteRangeRequest, KeyValue,
MoveValueRequest, PutRequest, RangeRequest,
};
use super::MemStore;
use crate::service::store::ext::KvStoreExt;
use crate::service::store::kv::KvStore;
use crate::util;
async fn mock_mem_store_with_data() -> MemStore {
let kv_store = MemStore::new();
let kvs = mock_kvs();
kv_store
.batch_put(BatchPutRequest {
kvs,
..Default::default()
})
.await
.unwrap();
kv_store
.put(PutRequest {
key: b"key11".to_vec(),
value: b"val11".to_vec(),
..Default::default()
})
.await
.unwrap();
kv_store
}
fn mock_kvs() -> Vec<KeyValue> {
vec![
KeyValue {
key: b"key1".to_vec(),
value: b"val1".to_vec(),
},
KeyValue {
key: b"key2".to_vec(),
value: b"val2".to_vec(),
},
KeyValue {
key: b"key3".to_vec(),
value: b"val3".to_vec(),
},
]
}
#[tokio::test]
async fn test_put() {
let kv_store = mock_mem_store_with_data().await;
let resp = kv_store
.put(PutRequest {
key: b"key11".to_vec(),
value: b"val12".to_vec(),
..Default::default()
})
.await
.unwrap();
assert!(resp.prev_kv.is_none());
let resp = kv_store
.put(PutRequest {
key: b"key11".to_vec(),
value: b"val13".to_vec(),
prev_kv: true,
..Default::default()
})
.await
.unwrap();
assert_eq!(b"key11".as_slice(), resp.prev_kv.as_ref().unwrap().key);
assert_eq!(b"val12".as_slice(), resp.prev_kv.as_ref().unwrap().value);
}
#[tokio::test]
async fn test_range() {
let kv_store = mock_mem_store_with_data().await;
let key = b"key1".to_vec();
let range_end = util::get_prefix_end_key(b"key1");
let resp = kv_store
.range(RangeRequest {
key: key.clone(),
range_end: range_end.clone(),
limit: 0,
keys_only: false,
..Default::default()
})
.await
.unwrap();
assert_eq!(2, resp.kvs.len());
assert_eq!(b"key1".as_slice(), resp.kvs[0].key);
assert_eq!(b"val1".as_slice(), resp.kvs[0].value);
assert_eq!(b"key11".as_slice(), resp.kvs[1].key);
assert_eq!(b"val11".as_slice(), resp.kvs[1].value);
let resp = kv_store
.range(RangeRequest {
key: key.clone(),
range_end: range_end.clone(),
limit: 0,
keys_only: true,
..Default::default()
})
.await
.unwrap();
assert_eq!(2, resp.kvs.len());
assert_eq!(b"key1".as_slice(), resp.kvs[0].key);
assert_eq!(b"".as_slice(), resp.kvs[0].value);
assert_eq!(b"key11".as_slice(), resp.kvs[1].key);
assert_eq!(b"".as_slice(), resp.kvs[1].value);
let resp = kv_store
.range(RangeRequest {
key: key.clone(),
limit: 0,
keys_only: false,
..Default::default()
})
.await
.unwrap();
assert_eq!(1, resp.kvs.len());
assert_eq!(b"key1".as_slice(), resp.kvs[0].key);
assert_eq!(b"val1".as_slice(), resp.kvs[0].value);
let resp = kv_store
.range(RangeRequest {
key,
range_end,
limit: 1,
keys_only: false,
..Default::default()
})
.await
.unwrap();
assert_eq!(1, resp.kvs.len());
assert_eq!(b"key1".as_slice(), resp.kvs[0].key);
assert_eq!(b"val1".as_slice(), resp.kvs[0].value);
}
#[tokio::test]
async fn test_batch_get() {
let kv_store = mock_mem_store_with_data().await;
let keys = vec![];
let batch_resp = kv_store
.batch_get(BatchGetRequest {
keys,
..Default::default()
})
.await
.unwrap();
assert!(batch_resp.kvs.is_empty());
let keys = vec![b"key10".to_vec()];
let batch_resp = kv_store
.batch_get(BatchGetRequest {
keys,
..Default::default()
})
.await
.unwrap();
assert!(batch_resp.kvs.is_empty());
let keys = vec![b"key1".to_vec(), b"key3".to_vec(), b"key4".to_vec()];
let batch_resp = kv_store
.batch_get(BatchGetRequest {
keys,
..Default::default()
})
.await
.unwrap();
assert_eq!(2, batch_resp.kvs.len());
assert_eq!(b"key1".as_slice(), batch_resp.kvs[0].key);
assert_eq!(b"val1".as_slice(), batch_resp.kvs[0].value);
assert_eq!(b"key3".as_slice(), batch_resp.kvs[1].key);
assert_eq!(b"val3".as_slice(), batch_resp.kvs[1].value);
}
#[tokio::test(flavor = "multi_thread")]
async fn test_compare_and_put() {
let kv_store = Arc::new(MemStore::new());
let success = Arc::new(AtomicU8::new(0));
let mut joins = vec![];
for _ in 0..20 {
let kv_store_clone = kv_store.clone();
let success_clone = success.clone();
let join = tokio::spawn(async move {
let req = CompareAndPutRequest {
key: b"key".to_vec(),
expect: vec![],
value: b"val_new".to_vec(),
..Default::default()
};
let resp = kv_store_clone.compare_and_put(req).await.unwrap();
if resp.success {
success_clone.fetch_add(1, Ordering::SeqCst);
}
});
joins.push(join);
}
for join in joins {
join.await.unwrap();
}
assert_eq!(1, success.load(Ordering::SeqCst));
}
#[tokio::test]
async fn test_delete_range() {
let kv_store = mock_mem_store_with_data().await;
let req = DeleteRangeRequest {
key: b"key3".to_vec(),
range_end: vec![],
prev_kv: true,
..Default::default()
};
let resp = kv_store.delete_range(req).await.unwrap();
assert_eq!(1, resp.prev_kvs.len());
assert_eq!(b"key3".as_slice(), resp.prev_kvs[0].key);
assert_eq!(b"val3".as_slice(), resp.prev_kvs[0].value);
let get_resp = kv_store.get(b"key3".to_vec()).await.unwrap();
assert!(get_resp.is_none());
let req = DeleteRangeRequest {
key: b"key2".to_vec(),
range_end: vec![],
prev_kv: false,
..Default::default()
};
let resp = kv_store.delete_range(req).await.unwrap();
assert!(resp.prev_kvs.is_empty());
let get_resp = kv_store.get(b"key2".to_vec()).await.unwrap();
assert!(get_resp.is_none());
let key = b"key1".to_vec();
let range_end = util::get_prefix_end_key(b"key1");
let req = DeleteRangeRequest {
key: key.clone(),
range_end: range_end.clone(),
prev_kv: true,
..Default::default()
};
let resp = kv_store.delete_range(req).await.unwrap();
assert_eq!(2, resp.prev_kvs.len());
let req = RangeRequest {
key,
range_end,
..Default::default()
};
let resp = kv_store.range(req).await.unwrap();
assert!(resp.kvs.is_empty());
}
#[tokio::test]
async fn test_move_value() {
let kv_store = mock_mem_store_with_data().await;
let req = MoveValueRequest {
from_key: b"key1".to_vec(),
to_key: b"key111".to_vec(),
..Default::default()
};
let resp = kv_store.move_value(req).await.unwrap();
assert_eq!(b"key1".as_slice(), resp.kv.as_ref().unwrap().key);
assert_eq!(b"val1".as_slice(), resp.kv.as_ref().unwrap().value);
let kv_store = mock_mem_store_with_data().await;
let req = MoveValueRequest {
from_key: b"notexistkey".to_vec(),
to_key: b"key222".to_vec(),
..Default::default()
};
let resp = kv_store.move_value(req).await.unwrap();
assert!(resp.kv.is_none());
}
}

View File

@@ -6,7 +6,7 @@ license.workspace = true
[features]
default = []
test = ["tempdir"]
test = ["common-test-util"]
[dependencies]
anymap = "1.0.0-beta.2"
@@ -33,8 +33,8 @@ snafu.workspace = true
storage = { path = "../storage" }
store-api = { path = "../store-api" }
table = { path = "../table" }
tempdir = { version = "0.3", optional = true }
common-test-util = { path = "../common/test-util", optional = true }
tokio.workspace = true
[dev-dependencies]
tempdir = { version = "0.3" }
common-test-util = { path = "../common/test-util" }

View File

@@ -40,11 +40,11 @@ mod procedure_test_util {
use common_procedure::{
BoxedProcedure, Context, ContextProvider, ProcedureId, ProcedureState, Result, Status,
};
use common_test_util::temp_dir::TempDir;
use log_store::NoopLogStore;
use storage::compaction::noop::NoopCompactionScheduler;
use storage::config::EngineConfig as StorageEngineConfig;
use storage::EngineImpl;
use tempdir::TempDir;
use super::*;
use crate::engine::{EngineConfig, MitoEngine};

View File

@@ -17,6 +17,7 @@
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_query::physical_plan::SessionContext;
use common_recordbatch::util;
use common_test_util::temp_dir::TempDir;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, RawSchema};
use datatypes::value::Value;
@@ -31,7 +32,6 @@ use storage::EngineImpl;
use store_api::manifest::Manifest;
use store_api::storage::ReadContext;
use table::requests::{AddColumnRequest, AlterKind, DeleteRequest, TableOptions};
use tempdir::TempDir;
use super::*;
use crate::table::test_util;

View File

@@ -176,4 +176,20 @@ mod tests {
assert_eq!(decode_list, action_list);
assert_eq!(p.unwrap(), protocol);
}
// These tests are used to ensure backward compatibility of manifest files.
// DO NOT modify the serialized string when they fail, check if your
// modification to manifest-related structs is compatible with older manifests.
#[test]
fn test_table_manifest_compatibility() {
let table_change = r#"{"table_info":{"ident":{"table_id":0,"version":0},"name":"demo","desc":null,"catalog_name":"greptime","schema_name":"public","meta":{"schema":{"column_schemas":[{"name":"host","data_type":{"String":null},"is_nullable":false,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"cpu","data_type":{"Float64":{}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"memory","data_type":{"Float64":{}},"is_nullable":false,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"ts","data_type":{"Timestamp":{"Millisecond":null}},"is_nullable":true,"is_time_index":true,"default_constraint":null,"metadata":{"greptime:time_index":"true"}}],"timestamp_index":3,"version":0},"primary_key_indices":[0],"value_indices":[1,2,3],"engine":"mito","next_column_id":1,"region_numbers":[],"engine_options":{},"options":{"write_buffer_size":null,"ttl":null,"extra_options":{}},"created_on":"2023-03-06T08:50:34.662020Z"},"table_type":"Base"}}"#;
serde_json::from_str::<TableChange>(table_change).unwrap();
let table_remove =
r#"{"table_ident":{"table_id":42,"version":0},"table_name":"test_table"}"#;
serde_json::from_str::<TableRemove>(table_remove).unwrap();
let protocol_action = r#"{"min_reader_version":0,"min_writer_version":1}"#;
serde_json::from_str::<ProtocolAction>(protocol_action).unwrap();
}
}

View File

@@ -17,6 +17,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema, Schema, SchemaBuilder, SchemaRef};
use datatypes::vectors::VectorRef;
@@ -30,7 +31,6 @@ use table::engine::{EngineContext, TableEngine};
use table::metadata::{TableInfo, TableInfoBuilder, TableMetaBuilder, TableType};
use table::requests::{CreateTableRequest, InsertRequest, TableOptions};
use table::TableRef;
use tempdir::TempDir;
use crate::config::EngineConfig;
use crate::engine::{MitoEngine, MITO_ENGINE};
@@ -96,7 +96,7 @@ pub fn build_test_table_info() -> TableInfo {
}
pub async fn new_test_object_store(prefix: &str) -> (TempDir, ObjectStore) {
let dir = TempDir::new(prefix).unwrap();
let dir = create_temp_dir(prefix);
let store_dir = dir.path().to_string_lossy();
let accessor = Builder::default().root(&store_dir).build().unwrap();
(dir, ObjectStore::new(accessor).finish())

View File

@@ -196,6 +196,10 @@ impl Region for MockRegion {
async fn close(&self) -> Result<()> {
Ok(())
}
fn disk_usage_bytes(&self) -> u64 {
0
}
}
impl MockRegionInner {

View File

@@ -14,5 +14,5 @@ tokio.workspace = true
[dev-dependencies]
anyhow = "1.0"
common-telemetry = { path = "../common/telemetry" }
tempdir = "0.3"
common-test-util = { path = "../common/test-util" }
uuid.workspace = true

View File

@@ -17,13 +17,13 @@ use std::sync::Arc;
use anyhow::Result;
use common_telemetry::logging;
use common_test_util::temp_dir::create_temp_dir;
use object_store::cache_policy::LruCacheLayer;
use object_store::services::{Fs, S3};
use object_store::test_util::TempFolder;
use object_store::{util, Object, ObjectLister, ObjectMode, ObjectStore, ObjectStoreBuilder};
use opendal::services::Oss;
use opendal::Operator;
use tempdir::TempDir;
async fn test_object_crud(store: &ObjectStore) -> Result<()> {
// Create object handler.
@@ -93,8 +93,8 @@ async fn test_object_list(store: &ObjectStore) -> Result<()> {
#[tokio::test]
async fn test_fs_backend() -> Result<()> {
let data_dir = TempDir::new("test_fs_backend")?;
let tmp_dir = TempDir::new("test_fs_backend")?;
let data_dir = create_temp_dir("test_fs_backend");
let tmp_dir = create_temp_dir("test_fs_backend");
let store = ObjectStore::new(
Fs::default()
.root(&data_dir.path().to_string_lossy())
@@ -195,7 +195,7 @@ async fn assert_cache_files(
#[tokio::test]
async fn test_object_store_cache_policy() -> Result<()> {
// create file storage
let root_dir = TempDir::new("test_fs_backend")?;
let root_dir = create_temp_dir("test_fs_backend");
let store = ObjectStore::new(
Fs::default()
.root(&root_dir.path().to_string_lossy())
@@ -204,7 +204,7 @@ async fn test_object_store_cache_policy() -> Result<()> {
);
// create file cache layer
let cache_dir = TempDir::new("test_fs_cache")?;
let cache_dir = create_temp_dir("test_fs_cache");
let cache_acc = Fs::default()
.root(&cache_dir.path().to_string_lossy())
.atomic_write_dir(&cache_dir.path().to_string_lossy())

View File

@@ -11,6 +11,7 @@ bytemuck = "1.12"
catalog = { path = "../catalog" }
common-error = { path = "../common/error" }
common-catalog = { path = "../common/catalog" }
common-function-macro = { path = "../common/function-macro" }
datafusion.workspace = true
datatypes = { path = "../datatypes" }
futures = "0.3"

View File

@@ -12,9 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod aggr_over_time;
mod idelta;
mod increase;
#[cfg(test)]
mod test_util;
pub use aggr_over_time::{
AbsentOverTime, AvgOverTime, CountOverTime, LastOverTime, MaxOverTime, MinOverTime,
PresentOverTime, SumOverTime,
};
use datafusion::arrow::array::ArrayRef;
use datafusion::error::DataFusionError;
use datafusion::physical_plan::ColumnarValue;

View File

@@ -0,0 +1,335 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_function_macro::range_fn;
use datafusion::arrow::array::{Float64Array, TimestampMillisecondArray};
use datafusion::arrow::datatypes::TimeUnit;
use datafusion::common::DataFusionError;
use datafusion::logical_expr::{ScalarUDF, Signature, TypeSignature, Volatility};
use datafusion::physical_plan::ColumnarValue;
use datatypes::arrow::array::Array;
use datatypes::arrow::compute;
use datatypes::arrow::datatypes::DataType;
use crate::functions::extract_array;
use crate::range_array::RangeArray;
/// The average value of all points in the specified interval.
#[range_fn(
name = "AvgOverTime",
ret = "Float64Array",
display_name = "prom_avg_over_time"
)]
pub fn avg_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> Option<f64> {
compute::sum(values).map(|result| result / values.len() as f64)
}
/// The minimum value of all points in the specified interval.
#[range_fn(
name = "MinOverTime",
ret = "Float64Array",
display_name = "prom_min_over_time"
)]
pub fn min_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> Option<f64> {
compute::min(values)
}
/// The maximum value of all points in the specified interval.
#[range_fn(
name = "MaxOverTime",
ret = "Float64Array",
display_name = "prom_max_over_time"
)]
pub fn max_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> Option<f64> {
compute::max(values)
}
/// The sum of all values in the specified interval.
#[range_fn(
name = "SumOverTime",
ret = "Float64Array",
display_name = "prom_sum_over_time"
)]
pub fn sum_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> Option<f64> {
compute::sum(values)
}
/// The count of all values in the specified interval.
#[range_fn(
name = "CountOverTime",
ret = "Float64Array",
display_name = "prom_count_over_time"
)]
pub fn count_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> f64 {
values.len() as f64
}
/// The most recent point value in specified interval.
#[range_fn(
name = "LastOverTime",
ret = "Float64Array",
display_name = "prom_last_over_time"
)]
pub fn last_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> Option<f64> {
values.values().last().copied()
}
/// absent_over_time returns an empty vector if the range vector passed to it has any
/// elements (floats or native histograms) and a 1-element vector with the value 1 if
/// the range vector passed to it has no elements.
#[range_fn(
name = "AbsentOverTime",
ret = "Float64Array",
display_name = "prom_absent_over_time"
)]
pub fn absent_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> Option<f64> {
if values.is_empty() {
Some(1.0)
} else {
None
}
}
/// the value 1 for any series in the specified interval.
#[range_fn(
name = "PresentOverTime",
ret = "Float64Array",
display_name = "prom_present_over_time"
)]
pub fn present_over_time(_: &TimestampMillisecondArray, values: &Float64Array) -> Option<f64> {
if values.is_empty() {
None
} else {
Some(1.0)
}
}
// TODO(ruihang): support quantile_over_time, stddev_over_time, and stdvar_over_time
#[cfg(test)]
mod test {
use super::*;
use crate::functions::test_util::simple_range_udf_runner;
// build timestamp range and value range arrays for test
fn build_test_range_arrays() -> (RangeArray, RangeArray) {
let ts_array = Arc::new(TimestampMillisecondArray::from_iter(
[
1000i64, 3000, 5000, 7000, 9000, 11000, 13000, 15000, 17000, 200000, 500000,
]
.into_iter()
.map(Some),
));
let ranges = [
(0, 2),
(0, 5),
(1, 1), // only 1 element
(2, 0), // empty range
(2, 0), // empty range
(3, 3),
(4, 3),
(5, 3),
(8, 1), // only 1 element
(9, 0), // empty range
];
let values_array = Arc::new(Float64Array::from_iter([
12.345678, 87.654321, 31.415927, 27.182818, 70.710678, 41.421356, 57.735027, 69.314718,
98.019802, 1.98019802, 61.803399,
]));
let ts_range_array = RangeArray::from_ranges(ts_array, ranges).unwrap();
let value_range_array = RangeArray::from_ranges(values_array, ranges).unwrap();
(ts_range_array, value_range_array)
}
#[test]
fn calculate_avg_over_time() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
AvgOverTime::scalar_udf(),
ts_array,
value_array,
vec![
Some(49.9999995),
Some(45.8618844),
Some(87.654321),
None,
None,
Some(46.438284),
Some(56.62235366666667),
Some(56.15703366666667),
Some(98.019802),
None,
],
);
}
#[test]
fn calculate_min_over_time() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
MinOverTime::scalar_udf(),
ts_array,
value_array,
vec![
Some(12.345678),
Some(12.345678),
Some(87.654321),
None,
None,
Some(27.182818),
Some(41.421356),
Some(41.421356),
Some(98.019802),
None,
],
);
}
#[test]
fn calculate_max_over_time() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
MaxOverTime::scalar_udf(),
ts_array,
value_array,
vec![
Some(87.654321),
Some(87.654321),
Some(87.654321),
None,
None,
Some(70.710678),
Some(70.710678),
Some(69.314718),
Some(98.019802),
None,
],
);
}
#[test]
fn calculate_sum_over_time() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
SumOverTime::scalar_udf(),
ts_array,
value_array,
vec![
Some(99.999999),
Some(229.309422),
Some(87.654321),
None,
None,
Some(139.314852),
Some(169.867061),
Some(168.471101),
Some(98.019802),
None,
],
);
}
#[test]
fn calculate_count_over_time() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
CountOverTime::scalar_udf(),
ts_array,
value_array,
vec![
Some(2.0),
Some(5.0),
Some(1.0),
Some(0.0),
Some(0.0),
Some(3.0),
Some(3.0),
Some(3.0),
Some(1.0),
Some(0.0),
],
);
}
#[test]
fn calculate_last_over_time() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
LastOverTime::scalar_udf(),
ts_array,
value_array,
vec![
Some(87.654321),
Some(70.710678),
Some(87.654321),
None,
None,
Some(41.421356),
Some(57.735027),
Some(69.314718),
Some(98.019802),
None,
],
);
}
#[test]
fn calculate_absent_over_time() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
AbsentOverTime::scalar_udf(),
ts_array,
value_array,
vec![
None,
None,
None,
Some(1.0),
Some(1.0),
None,
None,
None,
None,
Some(1.0),
],
);
}
#[test]
fn calculate_present_over_time() {
let (ts_array, value_array) = build_test_range_arrays();
simple_range_udf_runner(
PresentOverTime::scalar_udf(),
ts_array,
value_array,
vec![
Some(1.0),
Some(1.0),
Some(1.0),
None,
None,
Some(1.0),
Some(1.0),
Some(1.0),
Some(1.0),
None,
],
);
}
}

View File

@@ -169,36 +169,7 @@ impl<const IS_RATE: bool> Display for IDelta<IS_RATE> {
mod test {
use super::*;
fn idelta_runner(input_ts: RangeArray, input_value: RangeArray, expected: Vec<f64>) {
let input = vec![
ColumnarValue::Array(Arc::new(input_ts.into_dict())),
ColumnarValue::Array(Arc::new(input_value.into_dict())),
];
let output = extract_array(&IDelta::<false>::calc(&input).unwrap())
.unwrap()
.as_any()
.downcast_ref::<Float64Array>()
.unwrap()
.values()
.to_vec();
assert_eq!(output, expected);
}
fn irate_runner(input_ts: RangeArray, input_value: RangeArray, expected: Vec<f64>) {
let input = vec![
ColumnarValue::Array(Arc::new(input_ts.into_dict())),
ColumnarValue::Array(Arc::new(input_value.into_dict())),
];
let output = extract_array(&IDelta::<true>::calc(&input).unwrap())
.unwrap()
.as_any()
.downcast_ref::<Float64Array>()
.unwrap()
.values()
.to_vec();
assert_eq!(output, expected);
}
use crate::functions::test_util::simple_range_udf_runner;
#[test]
fn basic_idelta_and_irate() {
@@ -214,21 +185,26 @@ mod test {
]));
let values_ranges = [(0, 2), (0, 5), (1, 1), (3, 3), (8, 1), (9, 0)];
// test idelta
let ts_range_array = RangeArray::from_ranges(ts_array.clone(), ts_ranges).unwrap();
let value_range_array =
RangeArray::from_ranges(values_array.clone(), values_ranges).unwrap();
idelta_runner(
simple_range_udf_runner(
IDelta::<false>::scalar_udf(),
ts_range_array,
value_range_array,
vec![1.0, -5.0, 0.0, 6.0, 0.0, 0.0],
vec![Some(1.0), Some(-5.0), None, Some(6.0), None, None],
);
// test irate
let ts_range_array = RangeArray::from_ranges(ts_array, ts_ranges).unwrap();
let value_range_array = RangeArray::from_ranges(values_array, values_ranges).unwrap();
irate_runner(
simple_range_udf_runner(
IDelta::<true>::scalar_udf(),
ts_range_array,
value_range_array,
vec![0.5, 0.0, 0.0, 3.0, 0.0, 0.0],
// the second point represent counter reset
vec![Some(0.5), Some(0.0), None, Some(3.0), None, None],
);
}
}

View File

@@ -0,0 +1,43 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use datafusion::arrow::array::Float64Array;
use datafusion::logical_expr::ScalarUDF;
use datafusion::physical_plan::ColumnarValue;
use crate::functions::extract_array;
use crate::range_array::RangeArray;
/// Runner to run range UDFs that only requires ts range and value range.
pub fn simple_range_udf_runner(
range_fn: ScalarUDF,
input_ts: RangeArray,
input_value: RangeArray,
expected: Vec<Option<f64>>,
) {
let input = vec![
ColumnarValue::Array(Arc::new(input_ts.into_dict())),
ColumnarValue::Array(Arc::new(input_value.into_dict())),
];
let eval_result: Vec<Option<f64>> = extract_array(&(range_fn.fun)(&input).unwrap())
.unwrap()
.as_any()
.downcast_ref::<Float64Array>()
.unwrap()
.iter()
.collect();
assert_eq!(eval_result, expected)
}

View File

@@ -48,7 +48,10 @@ use crate::error::{
use crate::extension_plan::{
EmptyMetric, InstantManipulate, Millisecond, RangeManipulate, SeriesDivide, SeriesNormalize,
};
use crate::functions::{IDelta, Increase};
use crate::functions::{
AbsentOverTime, AvgOverTime, CountOverTime, IDelta, Increase, LastOverTime, MaxOverTime,
MinOverTime, PresentOverTime, SumOverTime,
};
const LEFT_PLAN_JOIN_ALIAS: &str = "lhs";
@@ -366,6 +369,7 @@ impl PromPlanner {
let mut func_exprs = self.create_function_expr(func, args.literals)?;
func_exprs.insert(0, self.create_time_index_column_expr()?);
func_exprs.extend_from_slice(&self.create_tag_column_exprs()?);
LogicalPlanBuilder::from(input)
.project(func_exprs)
.context(DataFusionPlanningSnafu)?
@@ -667,6 +671,14 @@ impl PromPlanner {
"increase" => ScalarFunc::Udf(Increase::scalar_udf()),
"idelta" => ScalarFunc::Udf(IDelta::<false>::scalar_udf()),
"irate" => ScalarFunc::Udf(IDelta::<true>::scalar_udf()),
"avg_over_time" => ScalarFunc::Udf(AvgOverTime::scalar_udf()),
"min_over_time" => ScalarFunc::Udf(MinOverTime::scalar_udf()),
"max_over_time" => ScalarFunc::Udf(MaxOverTime::scalar_udf()),
"sum_over_time" => ScalarFunc::Udf(SumOverTime::scalar_udf()),
"count_over_time" => ScalarFunc::Udf(CountOverTime::scalar_udf()),
"last_over_time" => ScalarFunc::Udf(LastOverTime::scalar_udf()),
"absent_over_time" => ScalarFunc::Udf(AbsentOverTime::scalar_udf()),
"present_over_time" => ScalarFunc::Udf(PresentOverTime::scalar_udf()),
_ => ScalarFunc::DataFusionBuiltin(
BuiltinScalarFunction::from_str(func.name).map_err(|_| {
UnsupportedExprSnafu {
@@ -1580,7 +1592,7 @@ mod test {
}
#[tokio::test]
#[should_panic]
#[ignore = "wait for https://github.com/apache/arrow-datafusion/issues/5513"]
async fn increase_aggr() {
let query = "increase(some_metric[5m])";
let expected = String::from(
@@ -1592,7 +1604,6 @@ mod test {
\n Sort: some_metric.tag_0 DESC NULLS LAST, some_metric.timestamp DESC NULLS LAST [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\
\n Filter: some_metric.timestamp >= TimestampMillisecond(-1000, None) AND some_metric.timestamp <= TimestampMillisecond(100000000, None) [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\
\n TableScan: some_metric, unsupported_filters=[timestamp >= TimestampMillisecond(-1000, None), timestamp <= TimestampMillisecond(100000000, None)] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]"
);
indie_query_plan_compare(query, expected).await;
@@ -1609,7 +1620,24 @@ mod test {
\n Sort: some_metric.tag_0 DESC NULLS LAST, some_metric.timestamp DESC NULLS LAST [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\
\n Filter: some_metric.timestamp >= TimestampMillisecond(-1000, None) AND some_metric.timestamp <= TimestampMillisecond(100000000, None) [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\
\n TableScan: some_metric, unsupported_filters=[timestamp >= TimestampMillisecond(-1000, None), timestamp <= TimestampMillisecond(100000000, None)] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]"
);
indie_query_plan_compare(query, expected).await;
}
#[tokio::test]
#[ignore = "wait for https://github.com/apache/arrow-datafusion/issues/5513"]
async fn count_over_time() {
let query = "count_over_time(some_metric[5m])";
let expected = String::from(
"Filter: prom_count_over_time(timestamp_range,field_0) IS NOT NULL [timestamp:Timestamp(Millisecond, None), prom_count_over_time(timestamp_range,field_0):Float64;N, tag_0:Utf8]\
\n Projection: some_metric.timestamp, prom_count_over_time(timestamp_range, field_0) AS prom_count_over_time(timestamp_range,field_0), some_metric.tag_0 [timestamp:Timestamp(Millisecond, None), prom_count_over_time(timestamp_range,field_0):Float64;N, tag_0:Utf8]\
\n PromRangeManipulate: req range=[0..100000000], interval=[5000], eval range=[300000], time index=[timestamp], values=[\"field_0\"] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Dictionary(Int64, Float64);N, timestamp_range:Dictionary(Int64, Timestamp(Millisecond, None))]\
\n PromSeriesNormalize: offset=[0], time index=[timestamp] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\
\n PromSeriesDivide: tags=[\"tag_0\"] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\
\n Sort: some_metric.tag_0 DESC NULLS LAST, some_metric.timestamp DESC NULLS LAST [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\
\n Filter: some_metric.timestamp >= TimestampMillisecond(-1000, None) AND some_metric.timestamp <= TimestampMillisecond(100000000, None) [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]\
\n TableScan: some_metric, unsupported_filters=[timestamp >= TimestampMillisecond(-1000, None), timestamp <= TimestampMillisecond(100000000, None)] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]"
);
indie_query_plan_compare(query, expected).await;

View File

@@ -24,6 +24,7 @@ python = [
]
[dependencies]
arrow.workspace = true
async-trait.workspace = true
catalog = { path = "../catalog" }
common-catalog = { path = "../common/catalog" }
@@ -67,11 +68,11 @@ table = { path = "../table" }
tokio.workspace = true
[dev-dependencies]
common-test-util = { path = "../common/test-util" }
log-store = { path = "../log-store" }
mito = { path = "../mito", features = ["test"] }
ron = "0.7"
serde = { version = "1.0", features = ["derive"] }
storage = { path = "../storage" }
store-api = { path = "../store-api" }
tempdir = "0.3"
tokio-test = "0.4"

View File

@@ -120,17 +120,18 @@ mod tests {
use super::*;
type DefaultEngine = MitoEngine<EngineImpl<RaftEngineLogStore>>;
use common_test_util::temp_dir::create_temp_dir;
use log_store::raft_engine::log_store::RaftEngineLogStore;
use log_store::LogConfig;
use mito::engine::MitoEngine;
use storage::compaction::noop::NoopCompactionScheduler;
use storage::config::EngineConfig as StorageEngineConfig;
use storage::EngineImpl;
use tempdir::TempDir;
#[tokio::test]
async fn test_insert_find_compile_script() {
let wal_dir = TempDir::new("test_insert_find_compile_script_wal").unwrap();
let wal_dir = create_temp_dir("test_insert_find_compile_script_wal");
let wal_dir_str = wal_dir.path().to_string_lossy();
common_telemetry::init_default_ut_logging();

View File

@@ -30,7 +30,7 @@ use common_recordbatch::{
RecordBatch, RecordBatchStream, RecordBatches, SendableRecordBatchStream,
};
use datafusion_expr::Volatility;
use datatypes::schema::{ColumnSchema, SchemaRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::VectorRef;
use futures::Stream;
use query::parser::{QueryLanguageParser, QueryStatement};
@@ -40,9 +40,8 @@ use snafu::{ensure, ResultExt};
use sql::statements::statement::Statement;
use crate::engine::{CompileContext, EvalContext, Script, ScriptEngine};
use crate::python::error::{self, Result};
use crate::python::error::{self, PyRuntimeSnafu, Result};
use crate::python::ffi_types::copr::{exec_parsed, parse, AnnotationInfo, CoprocessorRef};
const PY_ENGINE: &str = "python";
#[derive(Debug)]
@@ -81,17 +80,21 @@ impl PyUDF {
/// Fake a schema, should only be used with dynamically eval a Python Udf
fn fake_schema(&self, columns: &[VectorRef]) -> SchemaRef {
let empty_args = vec![];
let arg_names = self
.copr
.deco_args
.arg_names
.as_ref()
.unwrap_or(&empty_args);
// try to give schema right names in args so script can run as UDF without modify
// because when running as PyUDF, the incoming columns should have matching names to make sense
// for Coprocessor
let args = self.copr.deco_args.arg_names.clone();
let try_get_name = |i: usize| {
if let Some(arg_name) = args.as_ref().and_then(|args| args.get(i)) {
arg_name.clone()
} else {
format!("name_{i}")
}
};
let col_sch: Vec<_> = columns
.iter()
.enumerate()
.map(|(i, col)| ColumnSchema::new(arg_names[i].clone(), col.data_type(), true))
.map(|(i, col)| ColumnSchema::new(try_get_name(i), col.data_type(), true))
.collect();
let schema = datatypes::schema::Schema::new(col_sch);
Arc::new(schema)
@@ -172,7 +175,7 @@ impl Function for PyUDF {
pub struct PyScript {
query_engine: QueryEngineRef,
copr: CoprocessorRef,
pub(crate) copr: CoprocessorRef,
}
impl PyScript {
@@ -188,12 +191,48 @@ impl PyScript {
pub struct CoprStream {
stream: SendableRecordBatchStream,
copr: CoprocessorRef,
ret_schema: SchemaRef,
params: HashMap<String, String>,
}
impl CoprStream {
fn try_new(
stream: SendableRecordBatchStream,
copr: CoprocessorRef,
params: HashMap<String, String>,
) -> Result<Self> {
let mut schema = vec![];
for (ty, name) in copr.return_types.iter().zip(&copr.deco_args.ret_names) {
let ty = ty.clone().ok_or(
PyRuntimeSnafu {
msg: "return type not annotated, can't generate schema",
}
.build(),
)?;
let is_nullable = ty.is_nullable;
let ty = ty.datatype.ok_or(
PyRuntimeSnafu {
msg: "return type not annotated, can't generate schema",
}
.build(),
)?;
let col_schema = ColumnSchema::new(name, ty, is_nullable);
schema.push(col_schema);
}
let ret_schema = Arc::new(Schema::new(schema));
Ok(Self {
stream,
copr,
ret_schema,
params,
})
}
}
impl RecordBatchStream for CoprStream {
fn schema(&self) -> SchemaRef {
self.stream.schema()
// FIXME(discord9): use copr returns for schema
self.ret_schema.clone()
}
}
@@ -207,7 +246,6 @@ impl Stream for CoprStream {
let batch = exec_parsed(&self.copr, &Some(recordbatch), &self.params)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
Poll::Ready(Some(Ok(batch)))
}
Poll::Ready(other) => Poll::Ready(other),
@@ -246,11 +284,9 @@ impl Script for PyScript {
let res = self.query_engine.execute(&plan).await?;
let copr = self.copr.clone();
match res {
Output::Stream(stream) => Ok(Output::Stream(Box::pin(CoprStream {
params,
copr,
stream,
}))),
Output::Stream(stream) => Ok(Output::Stream(Box::pin(CoprStream::try_new(
stream, copr, params,
)?))),
_ => unreachable!(),
}
} else {
@@ -296,7 +332,8 @@ impl ScriptEngine for PyEngine {
})
}
}
#[cfg(test)]
pub(crate) use tests::sample_script_engine;
#[cfg(test)]
mod tests {
use catalog::local::{MemoryCatalogProvider, MemorySchemaProvider};
@@ -311,7 +348,7 @@ mod tests {
use super::*;
fn sample_script_engine() -> PyEngine {
pub(crate) fn sample_script_engine() -> PyEngine {
let catalog_list = catalog::local::new_memory_catalog_list().unwrap();
let default_schema = Arc::new(MemorySchemaProvider::new());
@@ -340,7 +377,7 @@ mod tests {
import greptime as gt
@copr(args=["number"], returns = ["number"], sql = "select * from numbers")
def test(number)->vector[u32]:
def test(number) -> vector[u32]:
return query.sql("select * from numbers")[0][0]
"#;
let script = script_engine
@@ -367,7 +404,7 @@ def test(number)->vector[u32]:
let script = r#"
@copr(returns = ["number"])
def test(**params)->vector[i64]:
def test(**params) -> vector[i64]:
return int(params['a']) + int(params['b'])
"#;
let script = script_engine
@@ -396,11 +433,10 @@ def test(**params)->vector[i64]:
let script_engine = sample_script_engine();
let script = r#"
import greptime as gt
from data_frame import col
from greptime import col
@copr(args=["number"], returns = ["number"], sql = "select * from numbers")
def test(number)->vector[u32]:
def test(number) -> vector[u32]:
return dataframe.filter(col("number")==col("number")).collect()[0][0]
"#;
let script = script_engine
@@ -432,7 +468,7 @@ def add(a, b):
return a + b;
@copr(args=["a", "b", "c"], returns = ["r"], sql="select number as a,number as b,number as c from numbers limit 100")
def test(a, b, c):
def test(a, b, c) -> vector[f64]:
return add(a, b) / g.sqrt(c + 1)
"#;
let script = script_engine
@@ -470,7 +506,7 @@ def test(a, b, c):
import greptime as gt
@copr(args=["number"], returns = ["r"], sql="select number from numbers limit 100")
def test(a):
def test(a) -> vector[i64]:
return gt.vector([x for x in a if x % 2 == 0])
"#;
let script = script_engine

View File

@@ -219,7 +219,7 @@ pub(crate) fn select_from_rb(rb: &RecordBatch, fetch_names: &[String]) -> Result
.iter()
.map(|name| {
let vector = rb.column_by_name(name).with_context(|| OtherSnafu {
reason: format!("Can't find field name {name}"),
reason: format!("Can't find field name {name} in all columns in {rb:?}"),
})?;
Ok(PyVector::from(vector.clone()))
})
@@ -229,15 +229,29 @@ pub(crate) fn select_from_rb(rb: &RecordBatch, fetch_names: &[String]) -> Result
/// match between arguments' real type and annotation types
/// if type anno is `vector[_]` then use real type(from RecordBatch's schema)
pub(crate) fn check_args_anno_real_type(
arg_names: &[String],
args: &[PyVector],
copr: &Coprocessor,
rb: &RecordBatch,
) -> Result<()> {
ensure!(
arg_names.len() == args.len(),
OtherSnafu {
reason: format!("arg_names:{arg_names:?} and args{args:?}'s length is different")
}
);
for (idx, arg) in args.iter().enumerate() {
let anno_ty = copr.arg_types[idx].clone();
let real_ty = arg.to_arrow_array().data_type().clone();
let real_ty = ConcreteDataType::from_arrow_type(&real_ty);
let is_nullable: bool = rb.schema.column_schemas()[idx].is_nullable();
let arg_name = arg_names[idx].clone();
let col_idx = rb.schema.column_index_by_name(&arg_name).ok_or(
OtherSnafu {
reason: format!("Can't find column by name {arg_name}"),
}
.build(),
)?;
let is_nullable: bool = rb.schema.column_schemas()[col_idx].is_nullable();
ensure!(
anno_ty
.clone()
@@ -424,11 +438,13 @@ pub fn exec_parsed(
pyo3_exec_parsed(copr, rb, params)
}
#[cfg(not(feature = "pyo3_backend"))]
OtherSnafu {
reason: "`pyo3` feature is disabled, therefore can't run scripts in cpython"
.to_string(),
{
OtherSnafu {
reason: "`pyo3` feature is disabled, therefore can't run scripts in cpython"
.to_string(),
}
.fail()
}
.fail()
}
}
}

View File

@@ -15,29 +15,100 @@
mod sample_testcases;
use std::collections::HashMap;
use std::sync::Arc;
use common_query::Output;
use common_recordbatch::RecordBatch;
use datafusion::arrow::array::Float64Array;
use datafusion::arrow::compute;
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::vectors::VectorRef;
#[cfg(feature = "pyo3_backend")]
use pyo3::{types::PyDict, Python};
use rustpython_compiler::Mode;
use crate::python::ffi_types::pair_tests::sample_testcases::sample_test_case;
use crate::engine::{CompileContext, EvalContext, Script, ScriptEngine};
use crate::python::engine::sample_script_engine;
use crate::python::ffi_types::pair_tests::sample_testcases::{
generate_copr_intgrate_tests, sample_test_case,
};
use crate::python::ffi_types::PyVector;
#[cfg(feature = "pyo3_backend")]
use crate::python::pyo3::{init_cpython_interpreter, vector_impl::into_pyo3_cell};
use crate::python::rspython::init_interpreter;
// TODO(discord9): paired test for slicing Vector
// & slice tests & lit() function for dataframe & test with full coprocessor&query engine ability
/// generate testcases that should be tested in paired both in RustPython and CPython
#[derive(Debug, Clone)]
struct TestCase {
struct CodeBlockTestCase {
input: HashMap<String, VectorRef>,
script: String,
expect: VectorRef,
}
/// TODO(discord9): input a simple recordbatch, set a query engine, and such,
/// so that for a full Coprocessor it will work
#[derive(Debug, Clone, Default)]
struct CoprTestCase {
// will be build to a RecordBatch and feed to coprocessor
script: String,
expect: Option<HashMap<String, VectorRef>>,
}
#[allow(unused)]
fn into_recordbatch(input: HashMap<String, VectorRef>) -> RecordBatch {
let mut schema = Vec::new();
let mut columns = Vec::new();
for (name, v) in input {
schema.push(ColumnSchema::new(name, v.data_type(), false));
columns.push(v);
}
let schema = Arc::new(Schema::new(schema));
RecordBatch::new(schema, columns).unwrap()
}
#[tokio::test]
#[allow(clippy::print_stdout)]
async fn integrated_py_copr_test() {
let testcases = generate_copr_intgrate_tests();
let script_engine = sample_script_engine();
for (idx, case) in testcases.into_iter().enumerate() {
println!("Testcase {idx}:\n script: {}", case.script);
let script = case.script;
let script = script_engine
.compile(&script, CompileContext::default())
.await
.unwrap();
let output = script
.execute(HashMap::default(), EvalContext::default())
.await
.unwrap();
let res = match output {
Output::Stream(s) => common_recordbatch::util::collect_batches(s).await.unwrap(),
Output::RecordBatches(rbs) => rbs,
_ => unreachable!(),
};
let rb = res.iter().next().expect("One and only one recordbatch");
if let Some(expect_result) = case.expect {
let mut actual_result = HashMap::new();
for col_sch in rb.schema.column_schemas() {
let col = rb.column_by_name(&col_sch.name).unwrap();
actual_result.insert(col_sch.name.clone(), col.clone());
}
for (name, col) in expect_result {
let actual_col = actual_result.get(&name).expect("Column with this name");
if !check_equal(col.clone(), actual_col.clone()) {
panic!("Column {name} doesn't match, expect {col:?}, found {actual_col:?}")
}
}
}
println!(".. Ok");
}
}
#[test]
fn pyo3_rspy_test_in_pairs() {
let testcases = sample_test_case();
@@ -76,7 +147,7 @@ fn check_equal(v0: VectorRef, v1: VectorRef) -> bool {
}
/// will panic if something is wrong, used in tests only
fn eval_rspy(case: TestCase) {
fn eval_rspy(case: CodeBlockTestCase) {
let interpreter = init_interpreter();
interpreter.enter(|vm| {
let scope = vm.new_scope_with_builtins();
@@ -112,7 +183,7 @@ fn eval_rspy(case: TestCase) {
}
#[cfg(feature = "pyo3_backend")]
fn eval_pyo3(case: TestCase) {
fn eval_pyo3(case: CodeBlockTestCase) {
init_cpython_interpreter();
Python::with_gil(|py| {
let locals = {

View File

@@ -17,10 +17,10 @@ use std::f64::consts;
use std::sync::Arc;
use datatypes::prelude::ScalarVector;
use datatypes::vectors::{BooleanVector, Float64Vector, Int64Vector, VectorRef};
use crate::python::ffi_types::pair_tests::TestCase;
use datatypes::vectors::{BooleanVector, Float64Vector, Int32Vector, Int64Vector, VectorRef};
use super::CoprTestCase;
use crate::python::ffi_types::pair_tests::CodeBlockTestCase;
macro_rules! vector {
($ty: ident, $slice: expr) => {
Arc::new($ty::from_slice($slice)) as VectorRef
@@ -34,12 +34,110 @@ macro_rules! ronish {
])
};
}
pub(super) fn generate_copr_intgrate_tests() -> Vec<CoprTestCase> {
vec![
CoprTestCase {
script: r#"
@copr(args=["number", "number"],
returns=["value"],
sql="select number from numbers limit 5", backend="rspy")
def add_vecs(n1, n2) -> vector[i32]:
return n1 + n2
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int32Vector, [0, 2, 4, 6, 8]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
@copr(args=["number", "number"],
returns=["value"],
sql="select number from numbers limit 5", backend="pyo3")
def add_vecs(n1, n2) -> vector[i32]:
return n1 + n2
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int32Vector, [0, 2, 4, 6, 8]))),
},
CoprTestCase {
script: r#"
@copr(returns=["value"])
def answer() -> vector[i64]:
from greptime import vector
return vector([42, 43, 44])
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, [42, 43, 44]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
@copr(returns=["value"], backend="pyo3")
def answer() -> vector[i64]:
from greptime import vector
return vector([42, 43, 44])
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, [42, 43, 44]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
@copr(returns=["value"], backend="pyo3")
def answer() -> vector[i64]:
from greptime import vector
return vector.from_pyarrow(vector([42, 43, 44]).to_pyarrow())
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, [42, 43, 44]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
@copr(returns=["value"], backend="pyo3")
def answer() -> vector[i64]:
from greptime import vector
import pyarrow as pa
return vector.from_pyarrow(pa.array([42, 43, 44]))
"#
.to_string(),
expect: Some(ronish!("value": vector!(Int64Vector, [42, 43, 44]))),
},
CoprTestCase {
script: r#"
@copr(args=[], returns = ["number"], sql = "select * from numbers", backend="rspy")
def answer() -> vector[i64]:
from greptime import vector, col, lit
expr_0 = (col("number")<lit(3)) & (col("number")>0)
ret = dataframe.select([col("number")]).filter(expr_0).collect()[0][0]
return ret
"#
.to_string(),
expect: Some(ronish!("number": vector!(Int64Vector, [1, 2]))),
},
#[cfg(feature = "pyo3_backend")]
CoprTestCase {
script: r#"
@copr(args=[], returns = ["number"], sql = "select * from numbers", backend="pyo3")
def answer() -> vector[i64]:
from greptime import vector, col, lit
# Bitwise Operator pred comparison operator
expr_0 = (col("number")<lit(3)) & (col("number")>0)
ret = dataframe.select([col("number")]).filter(expr_0).collect()[0][0]
return ret
"#
.to_string(),
expect: Some(ronish!("number": vector!(Int64Vector, [1, 2]))),
},
]
}
/// Generate tests for basic vector operations and basic builtin functions
/// Using a function to generate testcase instead of `.ron` configure file because it's more flexible and we are in #[cfg(test)] so no binary bloat worrying
#[allow(clippy::approx_constant)]
pub(super) fn sample_test_case() -> Vec<TestCase> {
pub(super) fn sample_test_case() -> Vec<CodeBlockTestCase> {
vec![
TestCase {
CodeBlockTestCase {
input: ronish! {
"a": vector!(Float64Vector, [1.0f64, 2.0, 3.0])
},
@@ -53,7 +151,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [1.0f64, 2.0, 3.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"a": vector!(Float64Vector, [1.0f64, 2.0, 3.0]),
"b": vector!(Float64Vector, [3.0f64, 2.0, 1.0])
@@ -65,7 +163,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [4.0f64, 4.0, 4.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"a": vector!(Float64Vector, [1.0f64, 2.0, 3.0]),
"b": vector!(Float64Vector, [3.0f64, 2.0, 1.0])
@@ -77,7 +175,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [-2.0f64, 0.0, 2.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"a": vector!(Float64Vector, [1.0f64, 2.0, 3.0]),
"b": vector!(Float64Vector, [3.0f64, 2.0, 1.0])
@@ -89,7 +187,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [3.0f64, 4.0, 3.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"a": vector!(Float64Vector, [1.0f64, 2.0, 3.0]),
"b": vector!(Float64Vector, [3.0f64, 2.0, 1.0])
@@ -101,7 +199,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [1. / 3., 1.0, 3.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1.0f64, 2.0, 3.0])
},
@@ -115,7 +213,7 @@ ret"#
[1.0f64, std::f64::consts::SQRT_2, 1.7320508075688772,]
),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1.0, 2.0, 3.0])
},
@@ -129,7 +227,7 @@ ret"#
[0.8414709848078965, 0.9092974268256817, 0.1411200080598672,]
),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1.0, 2.0, 3.0])
},
@@ -143,7 +241,7 @@ ret"#
[0.5403023058681398, -0.4161468365471424, -0.9899924966004454,]
),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1.0, 2.0, 3.0])
},
@@ -157,7 +255,7 @@ ret"#
[1.5574077246549023, -2.185039863261519, -0.1425465430742778,]
),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [0.3, 0.5, 1.0])
},
@@ -171,7 +269,7 @@ ret"#
[0.3046926540153975, 0.5235987755982989, 1.5707963267948966,]
),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [0.3, 0.5, 1.0])
},
@@ -185,7 +283,7 @@ ret"#
[1.2661036727794992, 1.0471975511965979, 0.0,]
),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [0.3, 0.5, 1.1])
},
@@ -199,7 +297,7 @@ ret"#
[0.2914567944778671, 0.4636476090008061, 0.8329812666744317,]
),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [0.3, 0.5, 1.1])
},
@@ -210,7 +308,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [0.0, 0.0, 1.0,]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [0.3, 0.5, 1.1])
},
@@ -221,7 +319,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [1.0, 1.0, 2.0,]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [0.3, 0.5, 1.1])
},
@@ -232,7 +330,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [0.0, 1.0, 1.0,]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [0.3, 0.5, 1.1])
},
@@ -243,7 +341,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [0.0, 0.0, 1.0,]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [-0.3, 0.5, -1.1])
},
@@ -254,7 +352,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [0.3, 0.5, 1.1,]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [-0.3, 0.5, -1.1])
},
@@ -265,7 +363,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [-1.0, 1.0, -1.0,]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [0., 1.0, 2.0])
},
@@ -276,7 +374,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [1.0, consts::E, 7.38905609893065,]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1.0, 2.0, 3.0])
},
@@ -287,7 +385,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [0.0, consts::LN_2, 1.0986122886681098,]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1.0, 2.0, 3.0])
},
@@ -298,7 +396,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [0.0, 1.0, 1.584962500721156,]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1.0, 2.0, 3.0])
},
@@ -309,7 +407,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [0.0, consts::LOG10_2, 0.47712125471966244,]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {},
script: r#"
from greptime import *
@@ -318,7 +416,7 @@ ret"#
.to_string(),
expect: vector!(BooleanVector, &[true, true, true]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Int64Vector, [1, 2, 2, 3])
},
@@ -329,7 +427,7 @@ ret"#
.to_string(),
expect: vector!(Int64Vector, [3]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Int64Vector, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
},
@@ -340,7 +438,7 @@ ret"#
.to_string(),
expect: vector!(Int64Vector, [6]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1.0, 2.0, 3.0])
},
@@ -351,7 +449,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [1.0, 2.0, 3.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1.0, 2.0, 3.0])
},
@@ -362,7 +460,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [2.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"a": vector!(Float64Vector, [1.0, 2.0, 3.0]),
"b": vector!(Float64Vector, [1.0, 0.0, -1.0])
@@ -374,7 +472,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [-1.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Int64Vector, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
},
@@ -385,7 +483,7 @@ ret"#
.to_string(),
expect: vector!(Int64Vector, [10]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"a": vector!(Float64Vector, [1.0, 2.0, 3.0]),
"b": vector!(Float64Vector, [1.0, 0.0, -1.0])
@@ -397,7 +495,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [-1.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"a": vector!(Float64Vector, [1.0, 2.0, 3.0]),
"b": vector!(Float64Vector, [1.0, 0.0, -1.0])
@@ -409,7 +507,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [-0.6666666666666666]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"a": vector!(Float64Vector, [1.0, 2.0, 3.0]),
},
@@ -420,7 +518,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [3.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"a": vector!(Float64Vector, [1.0, 2.0, 3.0]),
},
@@ -431,7 +529,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [1.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]),
},
@@ -442,7 +540,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [3.0276503540974917]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]),
},
@@ -453,7 +551,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [2.8722813232690143]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]),
},
@@ -464,7 +562,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [55.0]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]),
},
@@ -475,7 +573,7 @@ ret"#
.to_string(),
expect: vector!(Float64Vector, [9.166666666666666]),
},
TestCase {
CodeBlockTestCase {
input: ronish! {
"values": vector!(Float64Vector, [1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]),
},

View File

@@ -21,7 +21,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use datatypes::scalars::ScalarVector;
use datatypes::vectors::{BooleanVector, Float64Vector, VectorRef};
use datatypes::vectors::{BooleanVector, Float64Vector, Int64Vector, VectorRef};
#[cfg(feature = "pyo3_backend")]
use pyo3::{types::PyDict, Python};
use rustpython_compiler::Mode;
@@ -59,11 +59,13 @@ fn sample_py_vector() -> HashMap<String, VectorRef> {
let b2 = Arc::new(BooleanVector::from_slice(&[false, true, false, true])) as VectorRef;
let f1 = Arc::new(Float64Vector::from_slice([0.0f64, 2.0, 10.0, 42.0])) as VectorRef;
let f2 = Arc::new(Float64Vector::from_slice([-0.1f64, -42.0, 2., 7.0])) as VectorRef;
let f3 = Arc::new(Float64Vector::from_slice([1.0f64, -42.0, 2., 7.0])) as VectorRef;
HashMap::from([
("b1".to_owned(), b1),
("b2".to_owned(), b2),
("f1".to_owned(), f1),
("f2".to_owned(), f2),
("f3".to_owned(), f3),
])
}
@@ -105,6 +107,27 @@ fn get_test_cases() -> Vec<TestCase> {
42. / 7.,
])) as VectorRef,
},
TestCase {
eval: "f2.__rtruediv__(f1)".to_string(),
result: Arc::new(Float64Vector::from_slice([
0.0 / -0.1f64,
2. / -42.,
10. / 2.,
42. / 7.,
])) as VectorRef,
},
TestCase {
eval: "f2.__floordiv__(f3)".to_string(),
result: Arc::new(Int64Vector::from_slice([0, 1, 1, 1])) as VectorRef,
},
TestCase {
eval: "f3.__rfloordiv__(f2)".to_string(),
result: Arc::new(Int64Vector::from_slice([0, 1, 1, 1])) as VectorRef,
},
TestCase {
eval: "f3.filter(b1)".to_string(),
result: Arc::new(Float64Vector::from_slice([2.0, 7.0])) as VectorRef,
},
];
Vec::from(testcases)
}

View File

@@ -22,16 +22,32 @@ use datafusion_physical_expr::{math_expressions, AggregateExpr};
use datatypes::vectors::VectorRef;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::types::PyList;
use super::utils::scalar_value_to_py_any;
use crate::python::ffi_types::utils::all_to_f64;
use crate::python::ffi_types::PyVector;
use crate::python::pyo3::dataframe_impl::col;
use crate::python::pyo3::dataframe_impl::{col, lit};
use crate::python::pyo3::utils::{
columnar_value_to_py_any, try_into_columnar_value, val_to_py_any,
};
/// Try to extract a `PyVector` or convert from a `pyarrow.array` object
#[inline]
fn try_into_py_vector(py: Python, obj: PyObject) -> PyResult<PyVector> {
if let Ok(v) = obj.extract::<PyVector>(py) {
Ok(v)
} else {
PyVector::from_pyarrow(obj.as_ref(py).get_type(), py, obj.clone())
}
}
#[inline]
fn to_array_of_py_vec(py: Python, obj: &[&PyObject]) -> PyResult<Vec<PyVector>> {
obj.iter()
.map(|v| try_into_py_vector(py, v.to_object(py)))
.collect::<PyResult<_>>()
}
macro_rules! batch_import {
($m: ident, [$($fn_name: ident),*]) => {
$($m.add_function(wrap_pyfunction!($fn_name, $m)?)?;)*
@@ -41,11 +57,12 @@ macro_rules! batch_import {
#[pymodule]
#[pyo3(name = "greptime")]
pub(crate) fn greptime_builtins(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
m.add_class::<PyVector>()?;
batch_import!(
m,
[
lit,
col,
vector,
pow,
clip,
diff,
@@ -95,7 +112,8 @@ pub(crate) fn greptime_builtins(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
Ok(())
}
fn eval_func(py: Python<'_>, name: &str, v: &[&PyVector]) -> PyResult<PyVector> {
fn eval_func(py: Python<'_>, name: &str, v: &[&PyObject]) -> PyResult<PyVector> {
let v = to_array_of_py_vec(py, v)?;
py.allow_threads(|| {
let v: Vec<VectorRef> = v.iter().map(|v| v.as_vector_ref()).collect();
let func: Option<FunctionRef> = FUNCTION_REGISTRY.get_function(name);
@@ -153,7 +171,7 @@ fn eval_aggr_func(py: Python<'_>, name: &str, args: &[&PyVector]) -> PyResult<Py
/// evaluate Aggregate Expr using its backing accumulator
/// TODO(discord9): cast to f64 before use/Provide cast to f64 function?
fn eval_aggr_expr<T: AggregateExpr>(
fn eval_df_aggr_expr<T: AggregateExpr>(
py: Python<'_>,
aggr: T,
values: &[ArrayRef],
@@ -191,8 +209,8 @@ macro_rules! bind_call_unary_math_function {
macro_rules! simple_vector_fn {
($name: ident, $name_str: tt, [$($arg:ident),*]) => {
#[pyfunction]
fn $name(py: Python<'_>, $($arg: &PyVector),*) -> PyResult<PyVector> {
eval_func(py, $name_str, &[$($arg),*])
fn $name(py: Python<'_>, $($arg: PyObject),*) -> PyResult<PyVector> {
eval_func(py, $name_str, &[$(&$arg),*])
}
};
($name: ident, $name_str: tt, AGG[$($arg:ident),*]) => {
@@ -203,11 +221,6 @@ macro_rules! simple_vector_fn {
};
}
#[pyfunction]
fn vector(iterable: &PyList) -> PyResult<PyVector> {
PyVector::py_new(iterable)
}
// TODO(discord9): More Aggr functions& allow threads
simple_vector_fn!(pow, "pow", [v0, v1]);
simple_vector_fn!(clip, "clip", [v0, v1, v2]);
@@ -255,7 +268,7 @@ macro_rules! bind_aggr_expr {
fn $FUNC_NAME(py: Python<'_>, $($ARG: &PyVector),*)->PyResult<PyObject>{
// just a place holder, we just want the inner `XXXAccumulator`'s function
// so its expr is irrelevant
return eval_aggr_expr(
return eval_df_aggr_expr(
py,
expressions::$AGGR_FUNC::new(
$(
@@ -273,7 +286,7 @@ macro_rules! bind_aggr_expr {
expand into:
```
fn approx_distinct(py: Python<'_>, v0: &PyVector) -> PyResult<PyObject> {
return eval_aggr_expr(
return eval_df_aggr_expr(
py,
expressions::ApproxDistinct::new(
Arc::new(expressions::Column::new("expr0", 0)) as _,
@@ -293,7 +306,7 @@ bind_aggr_expr!(median, Median,[v0], v0, expr0=>0);
#[pyfunction]
fn approx_percentile_cont(py: Python<'_>, values: &PyVector, percent: f64) -> PyResult<PyObject> {
let percent = expressions::Literal::new(datafusion_common::ScalarValue::Float64(Some(percent)));
return eval_aggr_expr(
return eval_df_aggr_expr(
py,
expressions::ApproxPercentileCont::new(
vec![

View File

@@ -63,17 +63,11 @@ pub(crate) fn pyo3_exec_parsed(
rb: &Option<RecordBatch>,
params: &HashMap<String, String>,
) -> Result<RecordBatch> {
let arg_names = if let Some(names) = &copr.deco_args.arg_names {
names
} else {
return OtherSnafu {
reason: "PyO3 Backend doesn't support params yet".to_string(),
}
.fail();
};
// i.e params or use `vector(..)` to construct a PyVector
let arg_names = &copr.deco_args.arg_names.clone().unwrap_or(vec![]);
let args: Vec<PyVector> = if let Some(rb) = rb {
let args = select_from_rb(rb, arg_names)?;
check_args_anno_real_type(&args, copr, rb)?;
check_args_anno_real_type(arg_names, &args, copr, rb)?;
args
} else {
Vec::new()
@@ -83,12 +77,16 @@ pub(crate) fn pyo3_exec_parsed(
Python::with_gil(|py| -> Result<_> {
let mut cols = (|| -> PyResult<_> {
let dummy_decorator = "
# Postponed evaluation of annotations(PEP 563) so annotation can be set freely
# This is needed for Python < 3.9
from __future__ import annotations
# A dummy decorator, actual implementation is in Rust code
def copr(*dummy, **kwdummy):
def inner(func):
return func
return inner
coprocessor = copr
from greptime import vector
";
let gen_call = format!("\n_return_from_coprocessor = {}(*_args_for_coprocessor, **_kwargs_for_coprocessor)", copr.name);
let script = format!("{}{}{}", dummy_decorator, copr.script, gen_call);
@@ -221,10 +219,10 @@ mod copr_test {
@copr(args=["cpu", "mem"], returns=["ref"], backend="pyo3")
def a(cpu, mem, **kwargs):
import greptime as gt
from greptime import vector, log2, sum, pow, col
from greptime import vector, log2, sum, pow, col, lit
for k, v in kwargs.items():
print("%s == %s" % (k, v))
print(dataframe.select([col("cpu")]).collect())
print(dataframe.select([col("cpu")<lit(0.3)]).collect())
return (0.5 < cpu) & ~( cpu >= 0.75)
"#;
let cpu_array = Float32Vector::from_slice([0.9f32, 0.8, 0.7, 0.3]);

View File

@@ -23,6 +23,7 @@ use snafu::ResultExt;
use crate::python::error::DataFusionSnafu;
use crate::python::ffi_types::PyVector;
use crate::python::pyo3::utils::pyo3_obj_try_to_typed_scalar_value;
use crate::python::utils::block_on_async;
type PyExprRef = Py<PyExpr>;
#[pyclass]
@@ -223,6 +224,15 @@ impl PyDataFrame {
}
}
/// Convert a Python Object into a `Expr` for use in constructing literal i.e. `col("number") < lit(42)`
#[pyfunction]
pub(crate) fn lit(py: Python<'_>, value: PyObject) -> PyResult<PyExpr> {
let value = pyo3_obj_try_to_typed_scalar_value(value.as_ref(py), None)?;
let expr: PyExpr = DfExpr::Literal(value).into();
Ok(expr)
}
#[derive(Clone)]
#[pyclass]
pub(crate) struct PyExpr {
inner: DfExpr,
@@ -242,7 +252,8 @@ pub(crate) fn col(name: String) -> PyExpr {
#[pymethods]
impl PyExpr {
fn __richcmp__(&self, other: &Self, op: CompareOp) -> PyResult<Self> {
fn __richcmp__(&self, py: Python<'_>, other: PyObject, op: CompareOp) -> PyResult<Self> {
let other = other.extract::<Self>(py).or_else(|_| lit(py, other))?;
let op = match op {
CompareOp::Lt => DfExpr::lt,
CompareOp::Le => DfExpr::lt_eq,
@@ -251,20 +262,18 @@ impl PyExpr {
CompareOp::Gt => DfExpr::gt,
CompareOp::Ge => DfExpr::gt_eq,
};
Ok(op(self.inner.clone(), other.inner.clone()).into())
py.allow_threads(|| Ok(op(self.inner.clone(), other.inner.clone()).into()))
}
fn alias(&self, name: String) -> PyResult<PyExpr> {
Ok(self.inner.clone().alias(name).into())
}
fn __and__(&self, py: Python<'_>, other: PyExprRef) -> PyResult<PyExpr> {
Ok(self
.inner
.clone()
.and(other.borrow(py).inner.clone())
.into())
let other = other.borrow(py).inner.clone();
py.allow_threads(|| Ok(self.inner.clone().and(other).into()))
}
fn __or__(&self, py: Python<'_>, other: PyExprRef) -> PyResult<PyExpr> {
Ok(self.inner.clone().or(other.borrow(py).inner.clone()).into())
let other = other.borrow(py).inner.clone();
py.allow_threads(|| Ok(self.inner.clone().or(other).into()))
}
fn __invert__(&self) -> PyResult<PyExpr> {
Ok(self.inner.clone().not().into())
@@ -272,4 +281,7 @@ impl PyExpr {
fn sort(&self, asc: bool, nulls_first: bool) -> PyExpr {
self.inner.clone().sort(asc, nulls_first).into()
}
fn __repr__(&self) -> String {
format!("{:#?}", &self.inner)
}
}

View File

@@ -14,6 +14,7 @@
use std::sync::Mutex;
use arrow::pyarrow::PyArrowException;
use common_telemetry::info;
use datafusion_common::ScalarValue;
use datafusion_expr::ColumnarValue;
@@ -32,7 +33,9 @@ use crate::python::pyo3::builtins::greptime_builtins;
/// prevent race condition of init cpython
static START_PYO3: Lazy<Mutex<bool>> = Lazy::new(|| Mutex::new(false));
pub(crate) fn to_py_err(err: impl ToString) -> PyErr {
PyArrowException::new_err(err.to_string())
}
pub(crate) fn init_cpython_interpreter() {
let mut start = START_PYO3.lock().unwrap();
if !*start {
@@ -100,6 +103,15 @@ macro_rules! to_con_type {
};
}
/// Convert PyAny to [`ScalarValue`]
pub(crate) fn pyo3_obj_try_to_typed_scalar_value(
obj: &PyAny,
dtype: Option<ConcreteDataType>,
) -> PyResult<ScalarValue> {
let val = pyo3_obj_try_to_typed_val(obj, dtype)?;
val.try_to_scalar_value(&val.data_type())
.map_err(|e| PyValueError::new_err(e.to_string()))
}
/// to int/float/boolean, if dtype is None, then convert to highest prec type
pub(crate) fn pyo3_obj_try_to_typed_val(
obj: &PyAny,

View File

@@ -12,17 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use arrow::array::{make_array, ArrayData};
use arrow::pyarrow::PyArrowConvert;
use datafusion::arrow::array::BooleanArray;
use datafusion::arrow::compute;
use datafusion::arrow::compute::kernels::{arithmetic, comparison};
use datatypes::arrow::array::{Array, ArrayRef};
use datatypes::arrow::datatypes::DataType as ArrowDataType;
use datatypes::prelude::{ConcreteDataType, DataType};
use pyo3::exceptions::{PyNotImplementedError, PyValueError};
use datatypes::vectors::Helper;
use pyo3::exceptions::PyValueError;
use pyo3::prelude::*;
use pyo3::pyclass::CompareOp;
use pyo3::types::{PyBool, PyFloat, PyInt, PyList, PyString};
use pyo3::types::{PyBool, PyFloat, PyInt, PyList, PyString, PyType};
use crate::python::ffi_types::vector::{wrap_bool_result, wrap_result, PyVector};
use crate::python::pyo3::utils::pyo3_obj_try_to_typed_val;
use crate::python::ffi_types::vector::{arrow_rtruediv, wrap_bool_result, wrap_result, PyVector};
use crate::python::pyo3::utils::{pyo3_obj_try_to_typed_val, to_py_err};
macro_rules! get_con_type {
($obj:ident, $($pyty:ident => $con_ty:ident),*$(,)?) => {
@@ -179,15 +184,47 @@ impl PyVector {
}
#[allow(unused)]
fn __rtruediv__(&self, py: Python<'_>, other: PyObject) -> PyResult<Self> {
Err(PyNotImplementedError::new_err(()))
if pyo3_is_obj_scalar(other.as_ref(py)) {
self.pyo3_scalar_arith_op(py, other, Some(ArrowDataType::Float64), arrow_rtruediv)
} else {
self.pyo3_vector_arith_op(
py,
other,
Some(ArrowDataType::Float64),
wrap_result(|a, b| arithmetic::divide_dyn(b, a)),
)
}
}
#[allow(unused)]
fn __floordiv__(&self, py: Python<'_>, other: PyObject) -> PyResult<Self> {
Err(PyNotImplementedError::new_err(()))
if pyo3_is_obj_scalar(other.as_ref(py)) {
self.pyo3_scalar_arith_op(
py,
other,
Some(ArrowDataType::Int64),
wrap_result(arithmetic::divide_dyn),
)
} else {
self.pyo3_vector_arith_op(
py,
other,
Some(ArrowDataType::Int64),
wrap_result(arithmetic::divide_dyn),
)
}
}
#[allow(unused)]
fn __rfloordiv__(&self, py: Python<'_>, other: PyObject) -> PyResult<Self> {
Err(PyNotImplementedError::new_err(()))
if pyo3_is_obj_scalar(other.as_ref(py)) {
self.pyo3_scalar_arith_op(py, other, Some(ArrowDataType::Int64), arrow_rtruediv)
} else {
self.pyo3_vector_arith_op(
py,
other,
Some(ArrowDataType::Int64),
wrap_result(|a, b| arithmetic::divide_dyn(b, a)),
)
}
}
fn __and__(&self, other: &Self) -> PyResult<Self> {
Self::vector_and(self, other).map_err(PyValueError::new_err)
@@ -198,6 +235,29 @@ impl PyVector {
fn __invert__(&self) -> PyResult<Self> {
Self::vector_invert(self).map_err(PyValueError::new_err)
}
/// take a boolean array and filters the Array, returning elements matching the filter (i.e. where the values are true).
#[pyo3(name = "filter")]
fn pyo3_filter(&self, py: Python<'_>, other: &Self) -> PyResult<Self> {
py.allow_threads(|| {
let left = self.to_arrow_array();
let right = other.to_arrow_array();
if let Some(filter) = right.as_any().downcast_ref::<BooleanArray>() {
let res = compute::filter(left.as_ref(), filter);
let res =
res.map_err(|err| PyValueError::new_err(format!("Arrow Error: {err:#?}")))?;
let ret = Helper::try_into_vector(res.clone()).map_err(|e| {
PyValueError::new_err(format!(
"Can't cast result into vector, result: {res:?}, err: {e:?}",
))
})?;
Ok(ret.into())
} else {
Err(PyValueError::new_err(format!(
"Can't cast operand into a Boolean Array, which is {right:#?}"
)))
}
})
}
fn __len__(&self) -> usize {
self.len()
}
@@ -207,6 +267,17 @@ impl PyVector {
fn __repr__(&self) -> PyResult<String> {
Ok(format!("{self:#?}"))
}
/// Convert to `pyarrow` 's array
pub(crate) fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
self.to_arrow_array().data().to_pyarrow(py)
}
/// Convert from `pyarrow`'s array
#[classmethod]
pub(crate) fn from_pyarrow(_cls: &PyType, py: Python, obj: PyObject) -> PyResult<PyVector> {
let array = make_array(ArrayData::from_pyarrow(obj.as_ref(py))?);
let v = Helper::try_into_vector(array).map_err(to_py_err)?;
Ok(v.into())
}
}
#[cfg(test)]
@@ -236,10 +307,10 @@ mod test {
let b: PyVector = (Arc::new(b) as VectorRef).into();
locals.insert("bv2".to_string(), b);
let f = Float64Vector::from_slice(&[0.0f64, 1.0, 42.0, 3.0]);
let f = Float64Vector::from_slice([0.0f64, 1.0, 42.0, 3.0]);
let f: PyVector = (Arc::new(f) as VectorRef).into();
locals.insert("fv1".to_string(), f);
let f = Float64Vector::from_slice(&[1919.810f64, 0.114, 51.4, 3.0]);
let f = Float64Vector::from_slice([1919.810f64, 0.114, 51.4, 3.0]);
let f: PyVector = (Arc::new(f) as VectorRef).into();
locals.insert("fv2".to_string(), f);
locals

View File

@@ -291,7 +291,7 @@ pub(crate) mod greptime_builtin {
use common_function::scalars::{Function, FunctionRef, FUNCTION_REGISTRY};
use datafusion::arrow::datatypes::DataType as ArrowDataType;
use datafusion::physical_plan::expressions;
use datafusion_expr::ColumnarValue as DFColValue;
use datafusion_expr::{ColumnarValue as DFColValue, Expr as DfExpr};
use datafusion_physical_expr::math_expressions;
use datatypes::arrow::array::{ArrayRef, Int64Array, NullArray};
use datatypes::arrow::error::ArrowError;
@@ -308,13 +308,32 @@ pub(crate) mod greptime_builtin {
};
use crate::python::ffi_types::vector::val_to_pyobj;
use crate::python::ffi_types::PyVector;
use crate::python::rspython::utils::{is_instance, py_vec_obj_to_array, PyVectorRef};
use crate::python::rspython::dataframe_impl::data_frame::{PyExpr, PyExprRef};
use crate::python::rspython::utils::{
is_instance, py_obj_to_value, py_obj_to_vec, PyVectorRef,
};
#[pyfunction]
fn vector(args: OptionalArg<PyObjectRef>, vm: &VirtualMachine) -> PyResult<PyVector> {
PyVector::new(args, vm)
}
#[pyfunction]
fn col(name: String, vm: &VirtualMachine) -> PyExprRef {
let expr: PyExpr = DfExpr::Column(datafusion_common::Column::from_name(name)).into();
expr.into_ref(vm)
}
#[pyfunction]
pub(crate) fn lit(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult<PyExprRef> {
let val = py_obj_to_value(&obj, vm)?;
let scalar_val = val
.try_to_scalar_value(&val.data_type())
.map_err(|e| vm.new_runtime_error(format!("{e}")))?;
let expr: PyExpr = DfExpr::Literal(scalar_val).into();
Ok(expr.into_ref(vm))
}
// the main binding code, due to proc macro things, can't directly use a simpler macro
// because pyfunction is not a attr?
// ------
@@ -966,7 +985,7 @@ pub(crate) mod greptime_builtin {
let args = FuncArgs::new(vec![v.into_pyobject(vm)], KwArgs::default());
let ret = func.invoke(args, vm);
match ret{
Ok(obj) => match py_vec_obj_to_array(&obj, vm, 1){
Ok(obj) => match py_obj_to_vec(&obj, vm, 1){
Ok(v) => if v.len()==1{
Ok(v)
}else{

View File

@@ -33,7 +33,7 @@ use crate::python::ffi_types::{check_args_anno_real_type, select_from_rb, Coproc
use crate::python::rspython::builtins::init_greptime_builtins;
use crate::python::rspython::dataframe_impl::data_frame::set_dataframe_in_scope;
use crate::python::rspython::dataframe_impl::init_data_frame;
use crate::python::rspython::utils::{format_py_error, is_instance, py_vec_obj_to_array};
use crate::python::rspython::utils::{format_py_error, is_instance, py_obj_to_vec};
thread_local!(static INTERPRETER: RefCell<Option<Arc<Interpreter>>> = RefCell::new(None));
@@ -45,8 +45,9 @@ pub(crate) fn rspy_exec_parsed(
) -> Result<RecordBatch> {
// 3. get args from `rb`, and cast them into PyVector
let args: Vec<PyVector> = if let Some(rb) = rb {
let args = select_from_rb(rb, copr.deco_args.arg_names.as_ref().unwrap_or(&vec![]))?;
check_args_anno_real_type(&args, copr, rb)?;
let arg_names = copr.deco_args.arg_names.clone().unwrap_or(vec![]);
let args = select_from_rb(rb, &arg_names)?;
check_args_anno_real_type(&arg_names, &args, copr, rb)?;
args
} else {
vec![]
@@ -158,7 +159,7 @@ pub(crate) fn exec_with_cached_vm(
}
/// convert a tuple of `PyVector` or one `PyVector`(wrapped in a Python Object Ref[`PyObjectRef`])
/// to a `Vec<ArrayRef>`
/// to a `Vec<VectorRef>`
/// by default, a constant(int/float/bool) gives the a constant array of same length with input args
fn try_into_columns(
obj: &PyObjectRef,
@@ -171,11 +172,11 @@ fn try_into_columns(
.with_context(|| ret_other_error_with(format!("can't cast obj {obj:?} to PyTuple)")))?;
let cols = tuple
.iter()
.map(|obj| py_vec_obj_to_array(obj, vm, col_len))
.map(|obj| py_obj_to_vec(obj, vm, col_len))
.collect::<Result<Vec<VectorRef>>>()?;
Ok(cols)
} else {
let col = py_vec_obj_to_array(obj, vm, col_len)?;
let col = py_obj_to_vec(obj, vm, col_len)?;
Ok(vec![col])
}
}

Some files were not shown because too many files have changed in this diff Show More