Compare commits

..

35 Commits

Author SHA1 Message Date
liyang
b4b105ad35 test 2024-08-27 10:11:32 +08:00
liyang
e1d0bb3749 test 2024-08-27 02:07:07 +08:00
liyang
867d6ab600 test: skopeo authentication 2024-08-27 01:19:54 +08:00
liyang
63a442632e fix: failed to get version (#4622) 2024-08-26 15:33:30 +00:00
liyang
d39bafcfbd fix: change toolchain file name (#4621) 2024-08-26 13:04:06 +00:00
liyang
1717445ebe fix: failed to get github sha (#4620) 2024-08-26 11:42:07 +00:00
liyang
55d65da24d ci: add push dev-build images to aws ecr (#4618)
* ci: add push dev-build images to aws ecr

* chore: use toolchain file generation dev-build image tag

* chore: change dev-build version

* Update .github/workflows/release-dev-builder-images.yaml

Co-authored-by: zyy17 <zyylsxm@gmail.com>

---------

Co-authored-by: zyy17 <zyylsxm@gmail.com>
2024-08-26 09:36:55 +00:00
Weny Xu
3297d5f657 feat: allow skipping topic creation (#4616)
* feat: introduce `create_topics` opt

* feat: allow skipping topic creation

* chore: refine docs

* chore: apply suggestions from CR
2024-08-26 08:34:27 +00:00
Ning Sun
d6865911ee feat: add postgres response for trasaction related statements (#4562)
* feat: add postgres fixtures WIP

* feat: implement more postgres fixtures

* feat: add compatibility for transaction/set transaction/show transaction

* fix: improve regex for set transaction
2024-08-26 08:09:21 +00:00
dennis zhuang
63f2463273 feat!: impl admin command (#4600)
* feat: impl admin statement parser

* feat: introduce AsyncFunction and implements it for admin functions

* feat: execute admin functions

* fix: license header

* fix: panic in test

* chore: fixed by code review
2024-08-26 07:53:40 +00:00
Ruihang Xia
da337a9635 perf: acclerate scatter query (#4607)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-08-26 03:03:30 +00:00
fys
3973d6b01f chore: optimize common_version build (#4611) 2024-08-23 12:36:28 +00:00
discord9
2c731c76ad chore: add stats feature for jemalloc-ctl (#4610) 2024-08-23 11:18:30 +00:00
ozewr
40e7b58c80 feat: refactoring LruCacheLayer with list_with_metakey and concurrent_stat_in_list (#4596)
* use list_with_metakey and concurrent_stat_in_list

* change concurrent in recover_cache like before

* remove stat funcation

* use 8 concurrent

* use const value

* fmt code

* Apply suggestions from code review

---------

Co-authored-by: ozewr <l19ht@google.com>
Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-08-23 03:22:00 +00:00
zyy17
5177717f71 refactor: add fallback_to_local region option (#4578)
* refactor: add 'fallback_to_local_compaction' region option

* refactor: use 'fallback_to_local'
2024-08-23 03:09:43 +00:00
Weny Xu
8d61e6fe49 chore: bump rskafka to 75535b (#4608) 2024-08-23 03:05:52 +00:00
Ruihang Xia
a3b8d2fe8f chore: bump rust toolchain to 2024-08-21 (#4606)
* chore: bump rust toolchain to 2024-08-22

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update workflow

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* try 20240606

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-08-22 15:38:10 +00:00
Ning Sun
863ee073a9 chore: add commerial support section (#4601)
doc: add commerial support section
2024-08-22 12:03:20 +00:00
Weny Xu
25cd61b310 chore: upgrade toolchain to nightly-2024-08-07 (#4549)
* chore: upgrade toolchain to `nightly-2024-08-07`

* chore(ci): upgrade toolchain

* fix: fix unit test
2024-08-22 11:02:18 +00:00
fys
3517c13192 fix: incremental compilation always compile the common-version crate (#4605)
fix: wrong cargo:rerun
2024-08-22 11:00:33 +00:00
Ruihang Xia
b9cedf2c1a perf: optimize series divide algo (#4603)
* perf: optimize series divide algo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove dead code

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-08-22 09:16:36 +00:00
LFC
883c5bc5b0 refactor: skip checking the existence of the SST files (#4602)
refactor: skip checking the existence of the SST files when region is directly edited
2024-08-22 08:32:27 +00:00
Yingwen
d628079f4c feat: collect filters metrics for scanners (#4591)
* feat: collect filter metrics

* refactor: reuse ReaderFilterMetrics

* feat: record read rows from parquet by type

* feat: unordered scan observe rows

also fix read type

* chore: rename label
2024-08-22 03:22:05 +00:00
Weny Xu
0025fa6ec7 chore: bump opendal version to 0.49 (#4587)
* chore: bump opendal version to 0.49

* chore: apply suggestions from CR

* Update src/object-store/src/util.rs

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Co-authored-by: Yingwen <realevenyag@gmail.com>
2024-08-22 03:05:36 +00:00
Lanqing Yang
ff04109ee6 docs: add example configs introduced by pg_kvbackend (#4573)
chore: add example configs that introduced after pg_kvbackend
2024-08-22 01:52:02 +00:00
Yingwen
9c1704d4cb docs: move v0.9.1 benchmark report to tsbs dir (#4598)
* docs: move v0.9.1 benchmark report to tsbs dir

* docs: add newlines
2024-08-21 09:31:05 +00:00
Yingwen
a12a905578 chore: disable ttl for write cache by default (#4595)
* chore: remove default write cache ttl

* docs: update example config

* chore: fix ci
2024-08-21 08:38:38 +00:00
shuiyisong
449236360d docs: log benchmark (#4597)
* chore: add log benchmark stuff

* chore: minor update
2024-08-21 07:12:32 +00:00
localhost
bf16422cee fix: pipeline prepare loop break detects a conditional error (#4593) 2024-08-21 06:20:09 +00:00
Ran Joe
9db08dbbe0 refactor(mito2): reduce duplicate IndexOutput struct (#4592)
* refactor(mito2): reduce duplicate IndexOutput struct

* docs(mito2): add index output note
2024-08-20 12:30:17 +00:00
fys
9d885fa0c2 chore: bump tikv-jemalloc* to "0.6" (#4590)
chore: bump tikv-jemalloc* ti "0.6"
2024-08-20 09:08:21 +00:00
Ruihang Xia
b25a2b117e feat: remove sql in error desc (#4589)
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-08-20 06:37:30 +00:00
fys
6fccff4810 chore: keep symbol table in nightly profile (#4588)
chore: keep symbol table in nighly profile
2024-08-20 02:27:31 +00:00
ozewr
30af78700f feat: Implement the Buf to avoid extra memory allocation (#4585)
* feat: Implement the Buf to avoid extra memory allocation

* fmt toml

* fmt code

* mv entry.into_buffer to raw_entry_buffer

* less reuse opendal

* remove todo #4065

* Update src/mito2/src/wal/entry_reader.rs

Co-authored-by: Weny Xu <wenymedia@gmail.com>

* fmt code

---------

Co-authored-by: ozewr <l19ht@google.com>
Co-authored-by: Weny Xu <wenymedia@gmail.com>
2024-08-19 12:11:08 +00:00
Ruihang Xia
8de11a0e34 perf: set simple filter on primary key columns to exact filter (#4564)
* perf: set simple filter on primary key columns to exact filter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add sqlness test

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix typo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix sqlness

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2024-08-19 09:07:35 +00:00
157 changed files with 2495 additions and 856 deletions

View File

@@ -17,6 +17,12 @@ inputs:
description: Enable dev mode, only build standard greptime
required: false
default: "false"
image-namespace:
description: Image Namespace
required: true
image-registry:
description: Image Registry
required: true
working-dir:
description: Working directory to build the artifacts
required: false
@@ -31,8 +37,8 @@ runs:
run: |
cd ${{ inputs.working-dir }} && \
make run-it-in-container BUILD_JOBS=4 \
IMAGE_NAMESPACE=i8k6a5e1/greptime \
IMAGE_REGISTRY=public.ecr.aws
IMAGE_NAMESPACE=${{ inputs.image-namespace }} \
IMAGE_REGISTRY=${{ inputs.image-registry }}
- name: Upload sqlness logs
if: ${{ failure() && inputs.disable-run-tests == 'false' }} # Only upload logs when the integration tests failed.
@@ -51,8 +57,8 @@ runs:
artifacts-dir: greptime-linux-${{ inputs.arch }}-pyo3-${{ inputs.version }}
version: ${{ inputs.version }}
working-dir: ${{ inputs.working-dir }}
image-registry: public.ecr.aws
image-namespace: i8k6a5e1/greptime
image-registry: ${{ inputs.image-registry }}
image-namespace: ${{ inputs.image-namespace }}
- name: Build greptime without pyo3
if: ${{ inputs.dev-mode == 'false' }}
@@ -64,8 +70,8 @@ runs:
artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
version: ${{ inputs.version }}
working-dir: ${{ inputs.working-dir }}
image-registry: public.ecr.aws
image-namespace: i8k6a5e1/greptime
image-registry: ${{ inputs.image-registry }}
image-namespace: ${{ inputs.image-namespace }}
- name: Clean up the target directory # Clean up the target directory for the centos7 base image, or it will still use the objects of last build.
shell: bash
@@ -82,8 +88,8 @@ runs:
artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
version: ${{ inputs.version }}
working-dir: ${{ inputs.working-dir }}
image-registry: public.ecr.aws
image-namespace: i8k6a5e1/greptime
image-registry: ${{ inputs.image-registry }}
image-namespace: ${{ inputs.image-namespace }}
- name: Build greptime on android base image
uses: ./.github/actions/build-greptime-binary
@@ -94,5 +100,5 @@ runs:
version: ${{ inputs.version }}
working-dir: ${{ inputs.working-dir }}
build-android-artifacts: true
image-registry: public.ecr.aws
image-namespace: i8k6a5e1/greptime
image-registry: ${{ inputs.image-registry }}
image-namespace: ${{ inputs.image-namespace }}

View File

@@ -13,7 +13,7 @@ on:
name: Build API docs
env:
RUST_TOOLCHAIN: nightly-2024-04-20
RUST_TOOLCHAIN: nightly-2024-06-06
jobs:
apidoc:

View File

@@ -177,6 +177,8 @@ jobs:
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
dev-mode: true # Only build the standard greptime binary.
working-dir: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}
build-linux-arm64-artifacts:
name: Build linux-arm64 artifacts
@@ -206,6 +208,8 @@ jobs:
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
dev-mode: true # Only build the standard greptime binary.
working-dir: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}
release-images-to-dockerhub:
name: Build and push images to DockerHub

View File

@@ -30,7 +30,7 @@ concurrency:
cancel-in-progress: true
env:
RUST_TOOLCHAIN: nightly-2024-04-20
RUST_TOOLCHAIN: nightly-2024-06-06
jobs:
check-typos-and-docs:

View File

@@ -154,6 +154,8 @@ jobs:
cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}
build-linux-arm64-artifacts:
name: Build linux-arm64 artifacts
@@ -173,6 +175,8 @@ jobs:
cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}
release-images-to-dockerhub:
name: Build and push images to DockerHub

View File

@@ -10,7 +10,7 @@ concurrency:
cancel-in-progress: true
env:
RUST_TOOLCHAIN: nightly-2024-04-20
RUST_TOOLCHAIN: nightly-2024-06-06
permissions:
issues: write

View File

@@ -3,10 +3,6 @@ name: Release dev-builder images
on:
workflow_dispatch: # Allows you to run this workflow manually.
inputs:
version:
description: Version of the dev-builder
required: false
default: latest
release_dev_builder_ubuntu_image:
type: boolean
description: Release dev-builder-ubuntu image
@@ -28,22 +24,103 @@ jobs:
name: Release dev builder images
if: ${{ inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }} # Only manually trigger this job.
runs-on: ubuntu-20.04-16-cores
outputs:
version: ${{ steps.set-version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Configure build image version
id: set-version
shell: bash
run: |
commitShortSHA=`echo ${{ github.sha }} | cut -c1-8`
buildTime=`date +%Y%m%d%H%M%S`
BUILD_VERSION="$commitShortSHA-$buildTime"
RUST_TOOLCHAIN_VERSION=$(cat rust-toolchain.toml | grep -Eo '[0-9]{4}-[0-9]{2}-[0-9]{2}')
IMAGE_VERSION="${RUST_TOOLCHAIN_VERSION}-${BUILD_VERSION}"
echo "VERSION=${IMAGE_VERSION}" >> $GITHUB_ENV
echo "version=$IMAGE_VERSION" >> $GITHUB_OUTPUT
- name: Build and push dev builder images
uses: ./.github/actions/build-dev-builder-images
with:
version: ${{ inputs.version }}
version: ${{ env.VERSION }}
dockerhub-image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
dockerhub-image-registry-token: ${{ secrets.DOCKERHUB_TOKEN }}
build-dev-builder-ubuntu: ${{ inputs.release_dev_builder_ubuntu_image }}
build-dev-builder-centos: ${{ inputs.release_dev_builder_centos_image }}
build-dev-builder-android: ${{ inputs.release_dev_builder_android_image }}
release-dev-builder-images-ecr:
name: Release dev builder images to AWS ECR
runs-on: ubuntu-20.04
needs: [
release-dev-builder-images
]
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ECR_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_ECR_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.ECR_REGION }}
- name: Login to Amazon ECR
id: login-ecr-public
uses: aws-actions/amazon-ecr-login@v2
env:
AWS_REGION: ${{ vars.ECR_REGION }}
with:
registry-type: public
- name: Push dev-builder-ubuntu image
shell: bash
if: ${{ inputs.release_dev_builder_ubuntu_image }}
run: |
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }}
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:latest \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-ubuntu:latest
- name: Push dev-builder-centos image
shell: bash
if: ${{ inputs.release_dev_builder_centos_image }}
run: |
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }}
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:latest \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-centos:latest
- name: Push dev-builder-android image
shell: bash
if: ${{ inputs.release_dev_builder_android_image }}
run: |
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }}
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:latest \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-android:latest
release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
name: Release dev builder images to CN region
runs-on: ubuntu-20.04
@@ -51,35 +128,39 @@ jobs:
release-dev-builder-images
]
steps:
- name: Login to AliCloud Container Registry
uses: docker/login-action@v3
with:
registry: ${{ vars.ACR_IMAGE_REGISTRY }}
username: ${{ secrets.ALICLOUD_USERNAME }}
password: ${{ secrets.ALICLOUD_PASSWORD }}
- name: Push dev-builder-ubuntu image
shell: bash
if: ${{ inputs.release_dev_builder_ubuntu_image }}
env:
DST_REGISTRY_USERNAME: ${{ secrets.ALICLOUD_USERNAME }}
DST_REGISTRY_PASSWORD: ${{ secrets.ALICLOUD_PASSWORD }}
run: |
docker run quay.io/skopeo/stable:latest copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ inputs.version }} \
--dest-creds "$DST_REGISTRY_USERNAME":"$DST_REGISTRY_PASSWORD" \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ inputs.version }}
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }}
- name: Push dev-builder-centos image
shell: bash
if: ${{ inputs.release_dev_builder_centos_image }}
env:
DST_REGISTRY_USERNAME: ${{ secrets.ALICLOUD_USERNAME }}
DST_REGISTRY_PASSWORD: ${{ secrets.ALICLOUD_PASSWORD }}
run: |
docker run quay.io/skopeo/stable:latest copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ inputs.version }} \
--dest-creds "$DST_REGISTRY_USERNAME":"$DST_REGISTRY_PASSWORD" \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ inputs.version }}
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }}
- name: Push dev-builder-android image
shell: bash
if: ${{ inputs.release_dev_builder_android_image }}
env:
DST_REGISTRY_USERNAME: ${{ secrets.ALICLOUD_USERNAME }}
DST_REGISTRY_PASSWORD: ${{ secrets.ALICLOUD_PASSWORD }}
run: |
docker run quay.io/skopeo/stable:latest copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ inputs.version }} \
--dest-creds "$DST_REGISTRY_USERNAME":"$DST_REGISTRY_PASSWORD" \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ inputs.version }}
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }}

View File

@@ -82,7 +82,7 @@ on:
# Use env variables to control all the release process.
env:
# The arguments of building greptime.
RUST_TOOLCHAIN: nightly-2024-04-20
RUST_TOOLCHAIN: nightly-2024-06-06
CARGO_PROFILE: nightly
# Controls whether to run tests, include unit-test, integration-test and sqlness.
@@ -183,6 +183,8 @@ jobs:
cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}
build-linux-arm64-artifacts:
name: Build linux-arm64 artifacts
@@ -202,6 +204,8 @@ jobs:
cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
image-registry: ${{ vars.ECR_IMAGE_REGISTRY }}
image-namespace: ${{ vars.ECR_IMAGE_NAMESPACE }}
build-macos-artifacts:
name: Build macOS artifacts

30
Cargo.lock generated
View File

@@ -1954,6 +1954,7 @@ dependencies = [
"statrs",
"store-api",
"table",
"tokio",
]
[[package]]
@@ -4568,9 +4569,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]]
name = "human-panic"
version = "1.2.3"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4f016c89920bbb30951a8405ecacbb4540db5524313b9445736e7e1855cf370"
checksum = "1c5a08ed290eac04006e21e63d32e90086b6182c7cd0452d10f4264def1fec9a"
dependencies = [
"anstream",
"anstyle",
@@ -7010,9 +7011,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "opendal"
version = "0.48.0"
version = "0.49.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "615d41187deea0ea7fab5b48e9afef6ae8fc742fdcfa248846ee3d92ff71e986"
checksum = "39d516adf7db912c38af382c3e92c27cd62fbbc240e630920555d784c2ab1494"
dependencies = [
"anyhow",
"async-trait",
@@ -9170,8 +9171,9 @@ dependencies = [
[[package]]
name = "rsasl"
version = "2.0.2"
source = "git+https://github.com/wenyxu/rsasl.git?rev=06ebb683d5539c3410de4ce9fa37ff9b97e790a4#06ebb683d5539c3410de4ce9fa37ff9b97e790a4"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45035615cdd68c71daac89aef75b130d4b2cad29599966e1b4671f8fbb463559"
dependencies = [
"base64 0.22.1",
"core2",
@@ -9188,9 +9190,8 @@ dependencies = [
[[package]]
name = "rskafka"
version = "0.5.0"
source = "git+https://github.com/WenyXu/rskafka.git?rev=940c6030012c5b746fad819fb72e3325b26e39de#940c6030012c5b746fad819fb72e3325b26e39de"
source = "git+https://github.com/influxdata/rskafka.git?rev=75535b5ad9bae4a5dbb582c82e44dfd81ec10105#75535b5ad9bae4a5dbb582c82e44dfd81ec10105"
dependencies = [
"async-trait",
"bytes",
"chrono",
"crc32c",
@@ -9199,7 +9200,6 @@ dependencies = [
"integer-encoding 4.0.0",
"lz4",
"parking_lot 0.12.3",
"pin-project-lite",
"rand",
"rsasl",
"rustls 0.23.10",
@@ -11800,9 +11800,9 @@ dependencies = [
[[package]]
name = "tikv-jemalloc-ctl"
version = "0.5.4"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "619bfed27d807b54f7f776b9430d4f8060e66ee138a28632ca898584d462c31c"
checksum = "f21f216790c8df74ce3ab25b534e0718da5a1916719771d3fec23315c99e468b"
dependencies = [
"libc",
"paste",
@@ -11811,9 +11811,9 @@ dependencies = [
[[package]]
name = "tikv-jemalloc-sys"
version = "0.5.4+5.3.0-patched"
version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1"
checksum = "cd3c60906412afa9c2b5b5a48ca6a5abe5736aec9eb48ad05037a677e52e4e2d"
dependencies = [
"cc",
"libc",
@@ -11821,9 +11821,9 @@ dependencies = [
[[package]]
name = "tikv-jemallocator"
version = "0.5.4"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca"
checksum = "4cec5ff18518d81584f477e9bfdf957f5bb0979b0bac3af4ca30b5b3ae2d2865"
dependencies = [
"libc",
"tikv-jemalloc-sys",

View File

@@ -77,6 +77,7 @@ clippy.readonly_write_lock = "allow"
rust.unknown_lints = "deny"
# Remove this after https://github.com/PyO3/pyo3/issues/4094
rust.non_local_definitions = "allow"
rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
[workspace.dependencies]
# We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -151,8 +152,7 @@ reqwest = { version = "0.12", default-features = false, features = [
"stream",
"multipart",
] }
# SCRAM-SHA-512 requires https://github.com/dequbed/rsasl/pull/48, https://github.com/influxdata/rskafka/pull/247
rskafka = { git = "https://github.com/WenyXu/rskafka.git", rev = "940c6030012c5b746fad819fb72e3325b26e39de", features = [
rskafka = { git = "https://github.com/influxdata/rskafka.git", rev = "75535b5ad9bae4a5dbb582c82e44dfd81ec10105", features = [
"transport-tls",
] }
rstest = "0.21"
@@ -251,7 +251,7 @@ debug = 1
[profile.nightly]
inherits = "release"
strip = true
strip = "debuginfo"
lto = "thin"
debug = false
incremental = false

View File

@@ -106,7 +106,7 @@ strip-android-bin: build-android-bin ## Strip greptime binary for android.
docker run --network=host \
-v ${PWD}:/greptimedb \
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-android:latest \
bash -c '$${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip /greptimedb/target/aarch64-linux-android/release/greptime'
bash -c '$${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip --strip-debug /greptimedb/target/aarch64-linux-android/release/greptime'
.PHONY: clean
clean: ## Clean the project.

View File

@@ -150,7 +150,7 @@ Our official Grafana dashboard is available at [grafana](grafana/README.md) dire
## Project Status
The current version has not yet reached the standards for General Availability.
The current version has not yet reached the standards for General Availability.
According to our Greptime 2024 Roadmap, we aim to achieve a production-level version with the release of v1.0 by the end of 2024. [Join Us](https://github.com/GreptimeTeam/greptimedb/issues/3412)
We welcome you to test and use GreptimeDB. Some users have already adopted it in their production environments. If you're interested in trying it out, please use the latest stable release available.
@@ -172,6 +172,13 @@ In addition, you may:
- Connect us with [Linkedin](https://www.linkedin.com/company/greptime/)
- Follow us on [Twitter](https://twitter.com/greptime)
## Commerial Support
If you are running GreptimeDB OSS in your organization, we offer additional
enterprise addons, installation service, training and consulting. [Contact
us](https://greptime.com/contactus) and we will reach out to you with more
detail of our commerial license.
## License
GreptimeDB uses the [Apache License 2.0](https://apache.org/licenses/LICENSE-2.0.txt) to strike a balance between

View File

@@ -67,9 +67,10 @@
| `wal.prefill_log_files` | Bool | `false` | Whether to pre-create log files on start up.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.sync_period` | String | `10s` | Duration for fsyncing log files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.broker_endpoints` | Array | -- | The Kafka broker endpoints.<br/>**It's only used when the provider is `kafka`**. |
| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start.<br/>**It's only used when the provider is `kafka`**. |
| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
| `wal.num_topics` | Integer | `64` | Number of topics.<br/>**It's only used when the provider is `kafka`**. |
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default)<br/>**It's only used when the provider is `kafka`**. |
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>**It's only used when the provider is `kafka`**. |
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1.<br/>**It's only used when the provider is `kafka`**. |
| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition.<br/>**It's only used when the provider is `kafka`**. |
| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled.<br/>**It's only used when the provider is `kafka`**. |
| `wal.max_batch_bytes` | String | `1MB` | The max size of a single producer batch.<br/>Warning: Kafka has a default limit of 1MB per message in a topic.<br/>**It's only used when the provider is `kafka`**. |
@@ -116,11 +117,12 @@
| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
| `region_engine.mito.selector_result_cache_size` | String | `512MB` | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
| `region_engine.mito.experimental_write_cache_ttl` | String | `1h` | TTL for write cache. |
| `region_engine.mito.experimental_write_cache_ttl` | String | `None` | TTL for write cache. |
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |
@@ -259,12 +261,13 @@
| `data_home` | String | `/tmp/metasrv/` | The working home directory. |
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. |
| `store_addr` | String | `127.0.0.1:2379` | Etcd server address. |
| `store_addr` | String | `127.0.0.1:2379` | Store server address default to etcd store. |
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
| `use_memory_store` | Bool | `false` | Store data in memory. |
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. |
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
| `backend` | String | `EtcdStore` | The datastore for meta server. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
| `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. |
@@ -285,9 +288,10 @@
| `wal` | -- | -- | -- |
| `wal.provider` | String | `raft_engine` | -- |
| `wal.broker_endpoints` | Array | -- | The broker endpoints of the Kafka cluster. |
| `wal.num_topics` | Integer | `64` | Number of topics to be created upon start. |
| `wal.auto_create_topics` | Bool | `true` | Automatically create topics for WAL.<br/>Set to `true` to automatically create topics for WAL.<br/>Otherwise, use topics named `topic_name_prefix_[0..num_topics)` |
| `wal.num_topics` | Integer | `64` | Number of topics. |
| `wal.selector_type` | String | `round_robin` | Topic selector type.<br/>Available selector types:<br/>- `round_robin` (default) |
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`. |
| `wal.topic_name_prefix` | String | `greptimedb_wal_topic` | A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.<br/>i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1. |
| `wal.replication_factor` | Integer | `1` | Expected number of replicas of each partition. |
| `wal.create_topic_timeout` | String | `30s` | Above which a topic creation operation will be cancelled. |
| `wal.backoff_init` | String | `500ms` | The initial backoff for kafka clients. |
@@ -408,11 +412,12 @@
| `region_engine.mito.global_write_buffer_reject_size` | String | `2GB` | Global write buffer size threshold to reject write requests. If not set, it's default to 2 times of `global_write_buffer_size` |
| `region_engine.mito.sst_meta_cache_size` | String | `128MB` | Cache size for SST metadata. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/32 of OS memory with a max limitation of 128MB. |
| `region_engine.mito.vector_cache_size` | String | `512MB` | Cache size for vectors and arrow arrays. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.page_cache_size` | String | `512MB` | Cache size for pages of SST row groups. Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/8 of OS memory. |
| `region_engine.mito.selector_result_cache_size` | String | `512MB` | Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.<br/>If not set, it's default to 1/16 of OS memory with a max limitation of 512MB. |
| `region_engine.mito.enable_experimental_write_cache` | Bool | `false` | Whether to enable the experimental write cache. |
| `region_engine.mito.experimental_write_cache_path` | String | `""` | File system path for write cache, defaults to `{data_home}/write_cache`. |
| `region_engine.mito.experimental_write_cache_size` | String | `512MB` | Capacity for write cache. |
| `region_engine.mito.experimental_write_cache_ttl` | String | `1h` | TTL for write cache. |
| `region_engine.mito.experimental_write_cache_ttl` | String | `None` | TTL for write cache. |
| `region_engine.mito.sst_write_buffer_size` | String | `8MB` | Buffer size for SST writing. |
| `region_engine.mito.scan_parallelism` | Integer | `0` | Parallelism to scan a region (default: 1/4 of cpu cores).<br/>- `0`: using the default value (1/4 of cpu cores).<br/>- `1`: scan in current thread.<br/>- `n`: scan in parallelism n. |
| `region_engine.mito.parallel_scan_channel_size` | Integer | `32` | Capacity of the channel to send data from parallel scan tasks to the main task. |

View File

@@ -394,9 +394,13 @@ sst_meta_cache_size = "128MB"
vector_cache_size = "512MB"
## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
## If not set, it's default to 1/8 of OS memory.
page_cache_size = "512MB"
## Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
selector_result_cache_size = "512MB"
## Whether to enable the experimental write cache.
enable_experimental_write_cache = false
@@ -407,7 +411,8 @@ experimental_write_cache_path = ""
experimental_write_cache_size = "512MB"
## TTL for write cache.
experimental_write_cache_ttl = "1h"
## +toml2docs:none-default
experimental_write_cache_ttl = "8h"
## Buffer size for SST writing.
sst_write_buffer_size = "8MB"

View File

@@ -7,7 +7,7 @@ bind_addr = "127.0.0.1:3002"
## The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
server_addr = "127.0.0.1:3002"
## Etcd server address.
## Store server address default to etcd store.
store_addr = "127.0.0.1:2379"
## Datanode selector type.
@@ -32,6 +32,9 @@ store_key_prefix = ""
## - Using shared storage (e.g., s3).
enable_region_failover = false
## The datastore for meta server.
backend = "EtcdStore"
## The runtime options.
[runtime]
## The number of threads to execute the runtime for global read operations.
@@ -96,7 +99,12 @@ provider = "raft_engine"
## The broker endpoints of the Kafka cluster.
broker_endpoints = ["127.0.0.1:9092"]
## Number of topics to be created upon start.
## Automatically create topics for WAL.
## Set to `true` to automatically create topics for WAL.
## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
auto_create_topics = true
## Number of topics.
num_topics = 64
## Topic selector type.
@@ -105,6 +113,7 @@ num_topics = 64
selector_type = "round_robin"
## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
## i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1.
topic_name_prefix = "greptimedb_wal_topic"
## Expected number of replicas of each partition.

View File

@@ -171,7 +171,12 @@ sync_period = "10s"
## **It's only used when the provider is `kafka`**.
broker_endpoints = ["127.0.0.1:9092"]
## Number of topics to be created upon start.
## Automatically create topics for WAL.
## Set to `true` to automatically create topics for WAL.
## Otherwise, use topics named `topic_name_prefix_[0..num_topics)`
auto_create_topics = true
## Number of topics.
## **It's only used when the provider is `kafka`**.
num_topics = 64
@@ -182,6 +187,7 @@ num_topics = 64
selector_type = "round_robin"
## A Kafka topic is constructed by concatenating `topic_name_prefix` and `topic_id`.
## i.g., greptimedb_wal_topic_0, greptimedb_wal_topic_1.
## **It's only used when the provider is `kafka`**.
topic_name_prefix = "greptimedb_wal_topic"
@@ -431,9 +437,13 @@ sst_meta_cache_size = "128MB"
vector_cache_size = "512MB"
## Cache size for pages of SST row groups. Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
## If not set, it's default to 1/8 of OS memory.
page_cache_size = "512MB"
## Cache size for time series selector (e.g. `last_value()`). Setting it to 0 to disable the cache.
## If not set, it's default to 1/16 of OS memory with a max limitation of 512MB.
selector_result_cache_size = "512MB"
## Whether to enable the experimental write cache.
enable_experimental_write_cache = false
@@ -444,7 +454,8 @@ experimental_write_cache_path = ""
experimental_write_cache_size = "512MB"
## TTL for write cache.
experimental_write_cache_ttl = "1h"
## +toml2docs:none-default
experimental_write_cache_ttl = "8h"
## Buffer size for SST writing.
sst_write_buffer_size = "8MB"

View File

@@ -0,0 +1,51 @@
# Log benchmark configuration
This repo holds the configuration we used to benchmark GreptimeDB, Clickhouse and Elastic Search.
Here are the versions of databases we used in the benchmark
| name | version |
| :------------ | :--------- |
| GreptimeDB | v0.9.2 |
| Clickhouse | 24.9.1.219 |
| Elasticsearch | 8.15.0 |
## Structured model vs Unstructured model
We divide test into two parts, using structured model and unstructured model accordingly. You can also see the difference in create table clause.
__Structured model__
The log data is pre-processed into columns by vector. For example an insert request looks like following
```SQL
INSERT INTO test_table (bytes, http_version, ip, method, path, status, user, timestamp) VALUES ()
```
The goal is to test string/text support for each database. In real scenarios it means the datasource(or log data producers) have separate fields defined, or have already processed the raw input.
__Unstructured model__
The log data is inserted as a long string, and then we build fulltext index upon these strings. For example an insert request looks like following
```SQL
INSERT INTO test_table (message, timestamp) VALUES ()
```
The goal is to test fuzzy search performance for each database. In real scenarios it means the log is produced by some kind of middleware and inserted directly into the database.
## Creating tables
See [here](./create_table.sql) for GreptimeDB and Clickhouse's create table clause.
The mapping of Elastic search is created automatically.
## Vector Configuration
We use vector to generate random log data and send inserts to databases.
Please refer to [structured config](./structured_vector.toml) and [unstructured config](./unstructured_vector.toml) for detailed configuration.
## SQLs and payloads
Please refer to [SQL query](./query.sql) for GreptimeDB and Clickhouse, and [query payload](./query.md) for Elastic search.
## Steps to reproduce
0. Decide whether to run structured model test or unstructured mode test.
1. Build vector binary(see vector's config file for specific branch) and databases binaries accordingly.
2. Create table in GreptimeDB and Clickhouse in advance.
3. Run vector to insert data.
4. When data insertion is finished, run queries against each database. Note: you'll need to update timerange value after data insertion.
## Addition
- You can tune GreptimeDB's configuration to get better performance.
- You can setup GreptimeDB to use S3 as storage, see [here](https://docs.greptime.com/user-guide/operations/configuration/#storage-options).

View File

@@ -0,0 +1,56 @@
-- GreptimeDB create table clause
-- structured test, use vector to pre-process log data into fields
CREATE TABLE IF NOT EXISTS `test_table` (
`bytes` Int64 NULL,
`http_version` STRING NULL,
`ip` STRING NULL,
`method` STRING NULL,
`path` STRING NULL,
`status` SMALLINT UNSIGNED NULL,
`user` STRING NULL,
`timestamp` TIMESTAMP(3) NOT NULL,
PRIMARY KEY (`user`, `path`, `status`),
TIME INDEX (`timestamp`)
)
ENGINE=mito
WITH(
append_mode = 'true'
);
-- unstructured test, build fulltext index on message column
CREATE TABLE IF NOT EXISTS `test_table` (
`message` STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'),
`timestamp` TIMESTAMP(3) NOT NULL,
TIME INDEX (`timestamp`)
)
ENGINE=mito
WITH(
append_mode = 'true'
);
-- Clickhouse create table clause
-- structured test
CREATE TABLE IF NOT EXISTS test_table
(
bytes UInt64 NOT NULL,
http_version String NOT NULL,
ip String NOT NULL,
method String NOT NULL,
path String NOT NULL,
status UInt8 NOT NULL,
user String NOT NULL,
timestamp String NOT NULL,
)
ENGINE = MergeTree()
ORDER BY (user, path, status);
-- unstructured test
SET allow_experimental_full_text_index = true;
CREATE TABLE IF NOT EXISTS test_table
(
message String,
timestamp String,
INDEX inv_idx(message) TYPE full_text(0) GRANULARITY 1
)
ENGINE = MergeTree()
ORDER BY tuple();

View File

@@ -0,0 +1,199 @@
# Query URL and payload for Elastic Search
## Count
URL: `http://127.0.0.1:9200/_count`
## Query by timerange
URL: `http://127.0.0.1:9200/_search`
You can use the following payload to get the full timerange first.
```JSON
{"size":0,"aggs":{"max_timestamp":{"max":{"field":"timestamp"}},"min_timestamp":{"min":{"field":"timestamp"}}}}
```
And then use this payload to query by timerange.
```JSON
{
"from": 0,
"size": 1000,
"query": {
"range": {
"timestamp": {
"gte": "2024-08-16T04:30:44.000Z",
"lte": "2024-08-16T04:51:52.000Z"
}
}
}
}
```
## Query by condition
URL: `http://127.0.0.1:9200/_search`
### Structured payload
```JSON
{
"from": 0,
"size": 10000,
"query": {
"bool": {
"must": [
{
"term": {
"user.keyword": "CrucifiX"
}
},
{
"term": {
"method.keyword": "OPTION"
}
},
{
"term": {
"path.keyword": "/user/booperbot124"
}
},
{
"term": {
"http_version.keyword": "HTTP/1.1"
}
},
{
"term": {
"status": "401"
}
}
]
}
}
}
```
### Unstructured payload
```JSON
{
"from": 0,
"size": 10000,
"query": {
"bool": {
"must": [
{
"match_phrase": {
"message": "CrucifiX"
}
},
{
"match_phrase": {
"message": "OPTION"
}
},
{
"match_phrase": {
"message": "/user/booperbot124"
}
},
{
"match_phrase": {
"message": "HTTP/1.1"
}
},
{
"match_phrase": {
"message": "401"
}
}
]
}
}
}
```
## Query by condition and timerange
URL: `http://127.0.0.1:9200/_search`
### Structured payload
```JSON
{
"size": 10000,
"query": {
"bool": {
"must": [
{
"term": {
"user.keyword": "CrucifiX"
}
},
{
"term": {
"method.keyword": "OPTION"
}
},
{
"term": {
"path.keyword": "/user/booperbot124"
}
},
{
"term": {
"http_version.keyword": "HTTP/1.1"
}
},
{
"term": {
"status": "401"
}
},
{
"range": {
"timestamp": {
"gte": "2024-08-19T07:03:37.383Z",
"lte": "2024-08-19T07:24:58.883Z"
}
}
}
]
}
}
}
```
### Unstructured payload
```JSON
{
"size": 10000,
"query": {
"bool": {
"must": [
{
"match_phrase": {
"message": "CrucifiX"
}
},
{
"match_phrase": {
"message": "OPTION"
}
},
{
"match_phrase": {
"message": "/user/booperbot124"
}
},
{
"match_phrase": {
"message": "HTTP/1.1"
}
},
{
"match_phrase": {
"message": "401"
}
},
{
"range": {
"timestamp": {
"gte": "2024-08-19T05:16:17.099Z",
"lte": "2024-08-19T05:46:02.722Z"
}
}
}
]
}
}
}
```

View File

@@ -0,0 +1,50 @@
-- Structured query for GreptimeDB and Clickhouse
-- query count
select count(*) from test_table;
-- query by timerange. Note: place the timestamp range in the where clause
-- GreptimeDB
-- you can use `select max(timestamp)::bigint from test_table;` and `select min(timestamp)::bigint from test_table;`
-- to get the full timestamp range
select * from test_table where timestamp between 1723710843619 and 1723711367588;
-- Clickhouse
-- you can use `select max(timestamp) from test_table;` and `select min(timestamp) from test_table;`
-- to get the full timestamp range
select * from test_table where timestamp between '2024-08-16T03:58:46Z' and '2024-08-16T04:03:50Z';
-- query by condition
SELECT * FROM test_table WHERE user = 'CrucifiX' and method = 'OPTION' and path = '/user/booperbot124' and http_version = 'HTTP/1.1' and status = 401;
-- query by condition and timerange
-- GreptimeDB
SELECT * FROM test_table WHERE user = "CrucifiX" and method = "OPTION" and path = "/user/booperbot124" and http_version = "HTTP/1.1" and status = 401
and timestamp between 1723774396760 and 1723774788760;
-- Clickhouse
SELECT * FROM test_table WHERE user = 'CrucifiX' and method = 'OPTION' and path = '/user/booperbot124' and http_version = 'HTTP/1.1' and status = 401
and timestamp between '2024-08-16T03:58:46Z' and '2024-08-16T04:03:50Z';
-- Unstructured query for GreptimeDB and Clickhouse
-- query by condition
-- GreptimeDB
SELECT * FROM test_table WHERE MATCHES(message, "+CrucifiX +OPTION +/user/booperbot124 +HTTP/1.1 +401");
-- Clickhouse
SELECT * FROM test_table WHERE (message LIKE '%CrucifiX%')
AND (message LIKE '%OPTION%')
AND (message LIKE '%/user/booperbot124%')
AND (message LIKE '%HTTP/1.1%')
AND (message LIKE '%401%');
-- query by condition and timerange
-- GreptimeDB
SELECT * FROM test_table WHERE MATCHES(message, "+CrucifiX +OPTION +/user/booperbot124 +HTTP/1.1 +401")
and timestamp between 1723710843619 and 1723711367588;
-- Clickhouse
SELECT * FROM test_table WHERE (message LIKE '%CrucifiX%')
AND (message LIKE '%OPTION%')
AND (message LIKE '%/user/booperbot124%')
AND (message LIKE '%HTTP/1.1%')
AND (message LIKE '%401%')
AND timestamp between '2024-08-15T10:25:26.524000000Z' AND '2024-08-15T10:31:31.746000000Z';

View File

@@ -0,0 +1,57 @@
# Please note we use patched branch to build vector
# https://github.com/shuiyisong/vector/tree/chore/greptime_log_ingester_logitem
[sources.demo_logs]
type = "demo_logs"
format = "apache_common"
# interval value = 1 / rps
# say you want to insert at 20k/s, that is 1 / 20000 = 0.00005
# set to 0 to run as fast as possible
interval = 0
# total rows to insert
count = 100000000
lines = [ "line1" ]
[transforms.parse_logs]
type = "remap"
inputs = ["demo_logs"]
source = '''
. = parse_regex!(.message, r'^(?P<ip>\S+) - (?P<user>\S+) \[(?P<timestamp>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) (?P<http_version>\S+)" (?P<status>\d+) (?P<bytes>\d+)$')
# Convert timestamp to a standard format
.timestamp = parse_timestamp!(.timestamp, format: "%d/%b/%Y:%H:%M:%S %z")
# Convert status and bytes to integers
.status = to_int!(.status)
.bytes = to_int!(.bytes)
'''
[sinks.sink_greptime_logs]
type = "greptimedb_logs"
# The table to insert into
table = "test_table"
pipeline_name = "demo_pipeline"
compression = "none"
inputs = [ "parse_logs" ]
endpoint = "http://127.0.0.1:4000"
# Batch size for each insertion
batch.max_events = 4000
[sinks.clickhouse]
type = "clickhouse"
inputs = [ "parse_logs" ]
database = "default"
endpoint = "http://127.0.0.1:8123"
format = "json_each_row"
# The table to insert into
table = "test_table"
[sinks.sink_elasticsearch]
type = "elasticsearch"
inputs = [ "parse_logs" ]
api_version = "auto"
compression = "none"
doc_type = "_doc"
endpoints = [ "http://127.0.0.1:9200" ]
id_key = "id"
mode = "bulk"

View File

@@ -0,0 +1,43 @@
# Please note we use patched branch to build vector
# https://github.com/shuiyisong/vector/tree/chore/greptime_log_ingester_ft
[sources.demo_logs]
type = "demo_logs"
format = "apache_common"
# interval value = 1 / rps
# say you want to insert at 20k/s, that is 1 / 20000 = 0.00005
# set to 0 to run as fast as possible
interval = 0
# total rows to insert
count = 100000000
lines = [ "line1" ]
[sinks.sink_greptime_logs]
type = "greptimedb_logs"
# The table to insert into
table = "test_table"
pipeline_name = "demo_pipeline"
compression = "none"
inputs = [ "demo_logs" ]
endpoint = "http://127.0.0.1:4000"
# Batch size for each insertion
batch.max_events = 500
[sinks.clickhouse]
type = "clickhouse"
inputs = [ "demo_logs" ]
database = "default"
endpoint = "http://127.0.0.1:8123"
format = "json_each_row"
# The table to insert into
table = "test_table"
[sinks.sink_elasticsearch]
type = "elasticsearch"
inputs = [ "demo_logs" ]
api_version = "auto"
compression = "none"
doc_type = "_doc"
endpoints = [ "http://127.0.0.1:9200" ]
id_key = "id"
mode = "bulk"

View File

@@ -1,2 +1,3 @@
[toolchain]
channel = "nightly-2024-04-20"
channel = "nightly-2024-06-06"

View File

@@ -91,7 +91,7 @@ impl Database {
///
/// - the name of database when using GreptimeDB standalone or cluster
/// - the name provided by GreptimeCloud or other multi-tenant GreptimeDB
/// environment
/// environment
pub fn new_with_dbname(dbname: impl Into<String>, client: Client) -> Self {
Self {
catalog: String::default(),

View File

@@ -51,7 +51,7 @@ file-engine.workspace = true
flow.workspace = true
frontend = { workspace = true, default-features = false }
futures.workspace = true
human-panic = "1.2.2"
human-panic = "2.0"
lazy_static.workspace = true
meta-client.workspace = true
meta-srv.workspace = true
@@ -80,7 +80,7 @@ tonic.workspace = true
tracing-appender = "0.2"
[target.'cfg(not(windows))'.dependencies]
tikv-jemallocator = "0.5"
tikv-jemallocator = "0.6"
[dev-dependencies]
client = { workspace = true, features = ["testing"] }

View File

@@ -139,13 +139,10 @@ async fn start(cli: Command) -> Result<()> {
}
fn setup_human_panic() {
let metadata = human_panic::Metadata {
version: env!("CARGO_PKG_VERSION").into(),
name: "GreptimeDB".into(),
authors: Default::default(),
homepage: "https://github.com/GreptimeTeam/greptimedb/discussions".into(),
};
human_panic::setup_panic!(metadata);
human_panic::setup_panic!(
human_panic::Metadata::new("GreptimeDB", env!("CARGO_PKG_VERSION"))
.homepage("https://github.com/GreptimeTeam/greptimedb/discussions")
);
common_telemetry::set_panic_hook();
}

View File

@@ -82,6 +82,7 @@ fn test_load_datanode_example_config() {
vector_cache_size: ReadableSize::mb(512),
page_cache_size: ReadableSize::mb(512),
max_background_jobs: 4,
experimental_write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
..Default::default()
}),
RegionEngineConfig::File(EngineConfig {}),
@@ -218,6 +219,7 @@ fn test_load_standalone_example_config() {
vector_cache_size: ReadableSize::mb(512),
page_cache_size: ReadableSize::mb(512),
max_background_jobs: 4,
experimental_write_cache_ttl: Some(Duration::from_secs(60 * 60 * 8)),
..Default::default()
}),
RegionEngineConfig::File(EngineConfig {}),

View File

@@ -48,19 +48,19 @@ pub fn build_db_string(catalog: &str, schema: &str) -> String {
/// The database name may come from different sources:
///
/// - MySQL `schema` name in MySQL protocol login request: it's optional and user
/// and switch database using `USE` command
/// and switch database using `USE` command
/// - Postgres `database` parameter in Postgres wire protocol, required
/// - HTTP RESTful API: the database parameter, optional
/// - gRPC: the dbname field in header, optional but has a higher priority than
/// original catalog/schema
/// original catalog/schema
///
/// When database name is provided, we attempt to parse catalog and schema from
/// it. We assume the format `[<catalog>-]<schema>`:
///
/// - If `[<catalog>-]` part is not provided, we use whole database name as
/// schema name
/// schema name
/// - if `[<catalog>-]` is provided, we split database name with `-` and use
/// `<catalog>` and `<schema>`.
/// `<catalog>` and `<schema>`.
pub fn parse_catalog_and_schema_from_db_string(db: &str) -> (String, String) {
match parse_optional_catalog_and_schema_from_db_string(db) {
(Some(catalog), schema) => (catalog, schema),

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use object_store::services::Fs;
use object_store::util::DefaultLoggingInterceptor;
use object_store::ObjectStore;
use snafu::ResultExt;
@@ -22,13 +23,9 @@ pub fn build_fs_backend(root: &str) -> Result<ObjectStore> {
let builder = Fs::default();
let object_store = ObjectStore::new(builder.root(root))
.context(BuildBackendSnafu)?
.layer(
object_store::layers::LoggingLayer::default()
// Print the expected error only in DEBUG level.
// See https://docs.rs/opendal/latest/opendal/layers/struct.LoggingLayer.html#method.with_error_level
.with_error_level(Some("debug"))
.expect("input error level must be valid"),
)
.layer(object_store::layers::LoggingLayer::new(
DefaultLoggingInterceptor,
))
.layer(object_store::layers::TracingLayer)
.layer(object_store::layers::PrometheusMetricsLayer::new(true))
.finish();

View File

@@ -15,6 +15,7 @@
use std::collections::HashMap;
use object_store::services::S3;
use object_store::util::DefaultLoggingInterceptor;
use object_store::ObjectStore;
use snafu::ResultExt;
@@ -84,13 +85,9 @@ pub fn build_s3_backend(
// TODO(weny): Consider finding a better way to eliminate duplicate code.
Ok(ObjectStore::new(builder)
.context(error::BuildBackendSnafu)?
.layer(
object_store::layers::LoggingLayer::default()
// Print the expected error only in DEBUG level.
// See https://docs.rs/opendal/latest/opendal/layers/struct.LoggingLayer.html#method.with_error_level
.with_error_level(Some("debug"))
.expect("input error level must be valid"),
)
.layer(object_store::layers::LoggingLayer::new(
DefaultLoggingInterceptor,
))
.layer(object_store::layers::TracingLayer)
.layer(object_store::layers::PrometheusMetricsLayer::new(true))
.finish())

View File

@@ -39,3 +39,4 @@ table.workspace = true
[dev-dependencies]
ron = "0.7"
serde = { version = "1.0", features = ["derive"] }
tokio.workspace = true

View File

@@ -110,7 +110,7 @@ mod test {
use session::context::QueryContext;
use super::*;
use crate::function::{Function, FunctionContext};
use crate::function::{AsyncFunction, FunctionContext};
#[test]
fn test_flush_flow_metadata() {
@@ -130,8 +130,8 @@ mod test {
);
}
#[test]
fn test_missing_flow_service() {
#[tokio::test]
async fn test_missing_flow_service() {
let f = FlushFlowFunction;
let args = vec!["flow_name"];
@@ -140,7 +140,7 @@ mod test {
.map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::default(), &args).unwrap_err();
let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
assert_eq!(
"Missing FlowServiceHandler, not expected",
result.to_string()

View File

@@ -32,7 +32,7 @@ pub struct FunctionContext {
impl FunctionContext {
/// Create a mock [`FunctionContext`] for test.
#[cfg(any(test, feature = "testing"))]
#[cfg(test)]
pub fn mock() -> Self {
Self {
query_ctx: QueryContextBuilder::default().build().into(),
@@ -56,8 +56,10 @@ pub trait Function: fmt::Display + Sync + Send {
/// Returns the name of the function, should be unique.
fn name(&self) -> &str;
/// The returned data type of function execution.
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType>;
/// The signature of function.
fn signature(&self) -> Signature;
/// Evaluate the function, e.g. run/execute the function.
@@ -65,3 +67,22 @@ pub trait Function: fmt::Display + Sync + Send {
}
pub type FunctionRef = Arc<dyn Function>;
/// Async Scalar function trait
#[async_trait::async_trait]
pub trait AsyncFunction: fmt::Display + Sync + Send {
/// Returns the name of the function, should be unique.
fn name(&self) -> &str;
/// The returned data type of function execution.
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType>;
/// The signature of function.
fn signature(&self) -> Signature;
/// Evaluate the function, e.g. run/execute the function.
/// TODO(dennis): simplify the signature and refactor all the admin functions.
async fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef>;
}
pub type AsyncFunctionRef = Arc<dyn AsyncFunction>;

View File

@@ -18,7 +18,7 @@ use std::sync::{Arc, RwLock};
use once_cell::sync::Lazy;
use crate::function::FunctionRef;
use crate::function::{AsyncFunctionRef, FunctionRef};
use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
use crate::scalars::date::DateFunction;
use crate::scalars::expression::ExpressionFunction;
@@ -32,6 +32,7 @@ use crate::table::TableFunction;
#[derive(Default)]
pub struct FunctionRegistry {
functions: RwLock<HashMap<String, FunctionRef>>,
async_functions: RwLock<HashMap<String, AsyncFunctionRef>>,
aggregate_functions: RwLock<HashMap<String, AggregateFunctionMetaRef>>,
}
@@ -44,6 +45,27 @@ impl FunctionRegistry {
.insert(func.name().to_string(), func);
}
pub fn register_async(&self, func: AsyncFunctionRef) {
let _ = self
.async_functions
.write()
.unwrap()
.insert(func.name().to_string(), func);
}
pub fn get_async_function(&self, name: &str) -> Option<AsyncFunctionRef> {
self.async_functions.read().unwrap().get(name).cloned()
}
pub fn async_functions(&self) -> Vec<AsyncFunctionRef> {
self.async_functions
.read()
.unwrap()
.values()
.cloned()
.collect()
}
pub fn register_aggregate_function(&self, func: AggregateFunctionMetaRef) {
let _ = self
.aggregate_functions

View File

@@ -75,7 +75,7 @@ where
// to keep the not_greater length == floor+1
// so to ensure the peek of the not_greater is array[floor]
// and the peek of the greater is array[floor+1]
let p = if let Some(p) = self.p { p } else { 0.0_f64 };
let p = self.p.unwrap_or(0.0_f64);
let floor = (((self.n - 1) as f64) * p / (100_f64)).floor();
if value <= *self.not_greater.peek().unwrap() {
self.not_greater.push(value);

View File

@@ -28,7 +28,7 @@ pub struct FunctionState {
impl FunctionState {
/// Create a mock [`FunctionState`] for test.
#[cfg(any(test, feature = "testing"))]
#[cfg(test)]
pub fn mock() -> Self {
use std::sync::Arc;

View File

@@ -22,7 +22,7 @@ mod version;
use std::sync::Arc;
use build::BuildFunction;
use database::DatabaseFunction;
use database::{CurrentSchemaFunction, DatabaseFunction};
use pg_catalog::PGCatalogFunction;
use procedure_state::ProcedureStateFunction;
use timezone::TimezoneFunction;
@@ -37,8 +37,9 @@ impl SystemFunction {
registry.register(Arc::new(BuildFunction));
registry.register(Arc::new(VersionFunction));
registry.register(Arc::new(DatabaseFunction));
registry.register(Arc::new(CurrentSchemaFunction));
registry.register(Arc::new(TimezoneFunction));
registry.register(Arc::new(ProcedureStateFunction));
registry.register_async(Arc::new(ProcedureStateFunction));
PGCatalogFunction::register(registry);
}
}

View File

@@ -26,11 +26,35 @@ use crate::function::{Function, FunctionContext};
#[derive(Clone, Debug, Default)]
pub struct DatabaseFunction;
const NAME: &str = "database";
#[derive(Clone, Debug, Default)]
pub struct CurrentSchemaFunction;
const DATABASE_FUNCTION_NAME: &str = "database";
const CURRENT_SCHEMA_FUNCTION_NAME: &str = "current_schema";
impl Function for DatabaseFunction {
fn name(&self) -> &str {
NAME
DATABASE_FUNCTION_NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn signature(&self) -> Signature {
Signature::uniform(0, vec![], Volatility::Immutable)
}
fn eval(&self, func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
let db = func_ctx.query_ctx.current_schema();
Ok(Arc::new(StringVector::from_slice(&[&db])) as _)
}
}
impl Function for CurrentSchemaFunction {
fn name(&self) -> &str {
CURRENT_SCHEMA_FUNCTION_NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
@@ -54,6 +78,12 @@ impl fmt::Display for DatabaseFunction {
}
}
impl fmt::Display for CurrentSchemaFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "CURRENT_SCHEMA")
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;

View File

@@ -14,11 +14,13 @@
mod pg_get_userbyid;
mod table_is_visible;
mod version;
use std::sync::Arc;
use pg_get_userbyid::PGGetUserByIdFunction;
use table_is_visible::PGTableIsVisibleFunction;
use version::PGVersionFunction;
use crate::function_registry::FunctionRegistry;
@@ -35,5 +37,6 @@ impl PGCatalogFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(PGTableIsVisibleFunction));
registry.register(Arc::new(PGGetUserByIdFunction));
registry.register(Arc::new(PGVersionFunction));
}
}

View File

@@ -0,0 +1,54 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use std::{env, fmt};
use common_query::error::Result;
use common_query::prelude::{Signature, Volatility};
use datatypes::data_type::ConcreteDataType;
use datatypes::vectors::{StringVector, VectorRef};
use crate::function::{Function, FunctionContext};
#[derive(Clone, Debug, Default)]
pub(crate) struct PGVersionFunction;
impl fmt::Display for PGVersionFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, crate::pg_catalog_func_fullname!("VERSION"))
}
}
impl Function for PGVersionFunction {
fn name(&self) -> &str {
crate::pg_catalog_func_fullname!("version")
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::string_datatype())
}
fn signature(&self) -> Signature {
Signature::exact(vec![], Volatility::Immutable)
}
fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef> {
let result = StringVector::from(vec![format!(
"PostgreSQL 16.3 GreptimeDB {}",
env!("CARGO_PKG_VERSION")
)]);
Ok(Arc::new(result))
}
}

View File

@@ -96,7 +96,7 @@ mod tests {
use datatypes::vectors::StringVector;
use super::*;
use crate::function::{Function, FunctionContext};
use crate::function::{AsyncFunction, FunctionContext};
#[test]
fn test_procedure_state_misc() {
@@ -114,8 +114,8 @@ mod tests {
));
}
#[test]
fn test_missing_procedure_service() {
#[tokio::test]
async fn test_missing_procedure_service() {
let f = ProcedureStateFunction;
let args = vec!["pid"];
@@ -125,15 +125,15 @@ mod tests {
.map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::default(), &args).unwrap_err();
let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
assert_eq!(
"Missing ProcedureServiceHandler, not expected",
result.to_string()
);
}
#[test]
fn test_procedure_state() {
#[tokio::test]
async fn test_procedure_state() {
let f = ProcedureStateFunction;
let args = vec!["pid"];
@@ -143,7 +143,7 @@ mod tests {
.map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::mock(), &args).unwrap();
let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
let expect: VectorRef = Arc::new(StringVector::from(vec![
"{\"status\":\"Done\",\"error\":\"OK\"}",

View File

@@ -31,11 +31,11 @@ pub(crate) struct TableFunction;
impl TableFunction {
/// Register all table functions to [`FunctionRegistry`].
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(MigrateRegionFunction));
registry.register(Arc::new(FlushRegionFunction));
registry.register(Arc::new(CompactRegionFunction));
registry.register(Arc::new(FlushTableFunction));
registry.register(Arc::new(CompactTableFunction));
registry.register(Arc::new(FlushFlowFunction));
registry.register_async(Arc::new(MigrateRegionFunction));
registry.register_async(Arc::new(FlushRegionFunction));
registry.register_async(Arc::new(CompactRegionFunction));
registry.register_async(Arc::new(FlushTableFunction));
registry.register_async(Arc::new(CompactTableFunction));
registry.register_async(Arc::new(FlushFlowFunction));
}
}

View File

@@ -77,7 +77,7 @@ mod tests {
use datatypes::vectors::UInt64Vector;
use super::*;
use crate::function::{Function, FunctionContext};
use crate::function::{AsyncFunction, FunctionContext};
macro_rules! define_region_function_test {
($name: ident, $func: ident) => {
@@ -97,8 +97,8 @@ mod tests {
} if valid_types == ConcreteDataType::numerics()));
}
#[test]
fn [<test_ $name _missing_table_mutation>]() {
#[tokio::test]
async fn [<test_ $name _missing_table_mutation>]() {
let f = $func;
let args = vec![99];
@@ -108,15 +108,15 @@ mod tests {
.map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::default(), &args).unwrap_err();
let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
assert_eq!(
"Missing TableMutationHandler, not expected",
result.to_string()
);
}
#[test]
fn [<test_ $name>]() {
#[tokio::test]
async fn [<test_ $name>]() {
let f = $func;
@@ -127,7 +127,7 @@ mod tests {
.map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::mock(), &args).unwrap();
let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
let expect: VectorRef = Arc::new(UInt64Vector::from_slice([42]));
assert_eq!(expect, result);

View File

@@ -210,7 +210,7 @@ mod tests {
use session::context::QueryContext;
use super::*;
use crate::function::{Function, FunctionContext};
use crate::function::{AsyncFunction, FunctionContext};
macro_rules! define_table_function_test {
($name: ident, $func: ident) => {
@@ -230,8 +230,8 @@ mod tests {
} if valid_types == vec![ConcreteDataType::string_datatype()]));
}
#[test]
fn [<test_ $name _missing_table_mutation>]() {
#[tokio::test]
async fn [<test_ $name _missing_table_mutation>]() {
let f = $func;
let args = vec!["test"];
@@ -241,15 +241,15 @@ mod tests {
.map(|arg| Arc::new(StringVector::from(vec![arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::default(), &args).unwrap_err();
let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
assert_eq!(
"Missing TableMutationHandler, not expected",
result.to_string()
);
}
#[test]
fn [<test_ $name>]() {
#[tokio::test]
async fn [<test_ $name>]() {
let f = $func;
@@ -260,7 +260,7 @@ mod tests {
.map(|arg| Arc::new(StringVector::from(vec![arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::mock(), &args).unwrap();
let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
let expect: VectorRef = Arc::new(UInt64Vector::from_slice([42]));
assert_eq!(expect, result);

View File

@@ -123,7 +123,7 @@ mod tests {
use datatypes::vectors::{StringVector, UInt64Vector, VectorRef};
use super::*;
use crate::function::{Function, FunctionContext};
use crate::function::{AsyncFunction, FunctionContext};
#[test]
fn test_migrate_region_misc() {
@@ -140,8 +140,8 @@ mod tests {
} if sigs.len() == 2));
}
#[test]
fn test_missing_procedure_service() {
#[tokio::test]
async fn test_missing_procedure_service() {
let f = MigrateRegionFunction;
let args = vec![1, 1, 1];
@@ -151,15 +151,15 @@ mod tests {
.map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::default(), &args).unwrap_err();
let result = f.eval(FunctionContext::default(), &args).await.unwrap_err();
assert_eq!(
"Missing ProcedureServiceHandler, not expected",
result.to_string()
);
}
#[test]
fn test_migrate_region() {
#[tokio::test]
async fn test_migrate_region() {
let f = MigrateRegionFunction;
let args = vec![1, 1, 1];
@@ -169,7 +169,7 @@ mod tests {
.map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _)
.collect::<Vec<_>>();
let result = f.eval(FunctionContext::mock(), &args).unwrap();
let result = f.eval(FunctionContext::mock(), &args).await.unwrap();
let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"]));
assert_eq!(expect, result);

View File

@@ -187,7 +187,8 @@ fn build_struct(
}
impl crate::function::Function for #name {
#[async_trait::async_trait]
impl crate::function::AsyncFunction for #name {
fn name(&self) -> &'static str {
#display_name
}
@@ -200,7 +201,7 @@ fn build_struct(
#sig_fn()
}
fn eval(&self, func_ctx: crate::function::FunctionContext, columns: &[datatypes::vectors::VectorRef]) -> common_query::error::Result<datatypes::vectors::VectorRef> {
async fn eval(&self, func_ctx: crate::function::FunctionContext, columns: &[datatypes::vectors::VectorRef]) -> common_query::error::Result<datatypes::vectors::VectorRef> {
// Ensure under the `greptime` catalog for security
crate::ensure_greptime!(func_ctx);
@@ -212,51 +213,36 @@ fn build_struct(
};
let columns = Vec::from(columns);
// TODO(dennis): DataFusion doesn't support async UDF currently
std::thread::spawn(move || {
use snafu::OptionExt;
use datatypes::data_type::DataType;
use snafu::OptionExt;
use datatypes::data_type::DataType;
let query_ctx = &func_ctx.query_ctx;
let handler = func_ctx
.state
.#handler
.as_ref()
.context(#snafu_type)?;
let query_ctx = &func_ctx.query_ctx;
let handler = func_ctx
.state
.#handler
.as_ref()
.context(#snafu_type)?;
let mut builder = store_api::storage::ConcreteDataType::#ret()
.create_mutable_vector(rows_num);
let mut builder = store_api::storage::ConcreteDataType::#ret()
.create_mutable_vector(rows_num);
if columns_num == 0 {
let result = common_runtime::block_on_global(async move {
#fn_name(handler, query_ctx, &[]).await
})?;
if columns_num == 0 {
let result = #fn_name(handler, query_ctx, &[]).await?;
builder.push_value_ref(result.as_value_ref());
} else {
for i in 0..rows_num {
let args: Vec<_> = columns.iter()
.map(|vector| vector.get_ref(i))
.collect();
let result = #fn_name(handler, query_ctx, &args).await?;
builder.push_value_ref(result.as_value_ref());
} else {
for i in 0..rows_num {
let args: Vec<_> = columns.iter()
.map(|vector| vector.get_ref(i))
.collect();
let result = common_runtime::block_on_global(async move {
#fn_name(handler, query_ctx, &args).await
})?;
builder.push_value_ref(result.as_value_ref());
}
}
}
Ok(builder.to_vector())
})
.join()
.map_err(|e| {
common_telemetry::error!(e; "Join thread error");
common_query::error::Error::ThreadJoin {
location: snafu::Location::default(),
}
})?
Ok(builder.to_vector())
}
}

View File

@@ -76,6 +76,7 @@ pub fn range_fn(args: TokenStream, input: TokenStream) -> TokenStream {
/// - `&ProcedureServiceHandlerRef` or `&TableMutationHandlerRef` or `FlowServiceHandlerRef` as the first argument,
/// - `&QueryContextRef` as the second argument, and
/// - `&[ValueRef<'_>]` as the third argument which is SQL function input values in each row.
///
/// Return type must be `common_query::error::Result<Value>`.
///
/// # Example see `common/function/src/system/procedure_state.rs`.

View File

@@ -15,8 +15,8 @@ tempfile = "3.4"
tokio.workspace = true
[target.'cfg(not(windows))'.dependencies]
tikv-jemalloc-ctl = { version = "0.5", features = ["use_std"] }
tikv-jemalloc-ctl = { version = "0.6", features = ["use_std", "stats"] }
[target.'cfg(not(windows))'.dependencies.tikv-jemalloc-sys]
features = ["stats", "profiling", "unprefixed_malloc_on_supported_platforms"]
version = "0.5"
version = "0.6"

View File

@@ -172,8 +172,8 @@ impl From<TableLock> for StringKey {
///
/// Note:
/// - Allows modification the corresponding region's [TableRouteValue](crate::key::table_route::TableRouteValue),
/// [TableDatanodeValue](crate::key::datanode_table::DatanodeTableValue) even if
/// it acquires the [RegionLock::Write] only without acquiring the [TableLock::Write].
/// [TableDatanodeValue](crate::key::datanode_table::DatanodeTableValue) even if
/// it acquires the [RegionLock::Write] only without acquiring the [TableLock::Write].
///
/// - Should acquire [TableLock] of the table at same procedure.
///

View File

@@ -76,6 +76,10 @@ impl TopicManager {
/// The initializer first tries to restore persisted topics from the kv backend.
/// If not enough topics retrieved, the initializer will try to contact the Kafka cluster and request creating more topics.
pub async fn start(&self) -> Result<()> {
// Skip creating topics.
if !self.config.auto_create_topics {
return Ok(());
}
let num_topics = self.config.kafka_topic.num_topics;
ensure!(num_topics > 0, InvalidNumTopicsSnafu { num_topics });

View File

@@ -51,7 +51,7 @@ const META_TTL: Duration = Duration::from_secs(60 * 10);
/// [Notify] is not a condition variable, we can't guarantee the waiters are notified
/// if they didn't call `notified()` before we signal the notify. So we
/// 1. use dedicated notify for each condition, such as waiting for a lock, waiting
/// for children;
/// for children;
/// 2. always use `notify_one` and ensure there are only one waiter.
#[derive(Debug)]
pub(crate) struct ProcedureMeta {

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(lazy_cell)]
use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::LazyLock;

View File

@@ -19,7 +19,11 @@ use build_data::{format_timestamp, get_source_time};
use shadow_rs::{CARGO_METADATA, CARGO_TREE};
fn main() -> shadow_rs::SdResult<()> {
println!("cargo:rerun-if-changed=.git/refs/heads");
println!(
"cargo:rerun-if-changed={}/.git/refs/heads",
env!("CARGO_RUSTC_CURRENT_DIR")
);
println!(
"cargo:rustc-env=SOURCE_TIMESTAMP={}",
if let Ok(t) = get_source_time() {

View File

@@ -53,6 +53,7 @@ impl From<DatanodeWalConfig> for MetasrvWalConfig {
connection: config.connection,
backoff: config.backoff,
kafka_topic: config.kafka_topic,
auto_create_topics: config.auto_create_topics,
}),
}
}
@@ -188,6 +189,7 @@ mod tests {
replication_factor: 1,
create_topic_timeout: Duration::from_secs(30),
},
auto_create_topics: true,
};
assert_eq!(metasrv_wal_config, MetasrvWalConfig::Kafka(expected));

View File

@@ -187,7 +187,7 @@ impl Default for KafkaConnectionConfig {
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct KafkaTopicConfig {
/// Number of topics to be created upon start.
/// Number of topics.
pub num_topics: usize,
/// Number of partitions per topic.
pub num_partitions: i32,

View File

@@ -40,6 +40,9 @@ pub struct DatanodeKafkaConfig {
/// The kafka topic config.
#[serde(flatten)]
pub kafka_topic: KafkaTopicConfig,
// Automatically create topics for WAL.
pub auto_create_topics: bool,
// Create index for WAL.
pub create_index: bool,
#[serde(with = "humantime_serde")]
pub dump_index_interval: Duration,
@@ -54,6 +57,7 @@ impl Default for DatanodeKafkaConfig {
consumer_wait_timeout: Duration::from_millis(100),
backoff: BackoffConfig::default(),
kafka_topic: KafkaTopicConfig::default(),
auto_create_topics: true,
create_index: true,
dump_index_interval: Duration::from_secs(60),
}

View File

@@ -18,7 +18,7 @@ use super::common::KafkaConnectionConfig;
use crate::config::kafka::common::{backoff_prefix, BackoffConfig, KafkaTopicConfig};
/// Kafka wal configurations for metasrv.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(default)]
pub struct MetasrvKafkaConfig {
/// The kafka connection config.
@@ -30,4 +30,17 @@ pub struct MetasrvKafkaConfig {
/// The kafka config.
#[serde(flatten)]
pub kafka_topic: KafkaTopicConfig,
// Automatically create topics for WAL.
pub auto_create_topics: bool,
}
impl Default for MetasrvKafkaConfig {
fn default() -> Self {
Self {
connection: Default::default(),
backoff: Default::default(),
kafka_topic: Default::default(),
auto_create_topics: true,
}
}
}

View File

@@ -27,7 +27,7 @@ use common_runtime::Runtime;
use query::dataframe::DataFrame;
use query::plan::LogicalPlan;
use query::planner::LogicalPlanner;
use query::query_engine::DescribeResult;
use query::query_engine::{DescribeResult, QueryEngineState};
use query::{QueryEngine, QueryEngineContext};
use session::context::QueryContextRef;
use store_api::metadata::RegionMetadataRef;
@@ -86,6 +86,9 @@ impl QueryEngine for MockQueryEngine {
fn engine_context(&self, _query_ctx: QueryContextRef) -> QueryEngineContext {
unimplemented!()
}
fn engine_state(&self) -> &QueryEngineState {
unimplemented!()
}
}
/// Create a region server without any engine

View File

@@ -17,7 +17,6 @@
//! It also contains definition of expression, adapter and plan, and internal state management.
#![feature(let_chains)]
#![feature(duration_abs_diff)]
#![allow(dead_code)]
#![warn(clippy::missing_docs_in_private_items)]
#![warn(clippy::too_many_lines)]

View File

@@ -441,6 +441,9 @@ pub fn check_permission(
}
match stmt {
// Will be checked in execution.
// TODO(dennis): add a hook for admin commands.
Statement::Admin(_) => {}
// These are executed by query engine, and will be checked there.
Statement::Query(_) | Statement::Explain(_) | Statement::Tql(_) | Statement::Delete(_) => {}
// database ops won't be checked

View File

@@ -40,8 +40,9 @@ pub(crate) const ESTIMATED_META_SIZE: usize = 256;
/// - If the entry is able to fit into a Kafka record, it's converted into a Full record.
///
/// - If the entry is too large to fit into a Kafka record, it's converted into a collection of records.
///
/// Those records must contain exactly one First record and one Last record, and potentially several
/// Middle records. There may be no Middle record.
/// Middle records. There may be no Middle record.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
pub enum RecordType {
/// The record is self-contained, i.e. an entry's data is fully stored into this record.

View File

@@ -23,6 +23,7 @@ use store_api::logstore::entry::{Entry, NaiveEntry};
use store_api::logstore::provider::Provider;
use store_api::storage::RegionId;
#[allow(renamed_and_removed_lints)]
pub mod protos {
include!(concat!(env!("OUT_DIR"), concat!("/", "protos/", "mod.rs")));
}

View File

@@ -15,8 +15,8 @@
#![feature(async_closure)]
#![feature(result_flattening)]
#![feature(assert_matches)]
#![feature(option_take_if)]
#![feature(extract_if)]
#![feature(option_take_if)]
pub mod bootstrap;
mod cache_invalidator;

View File

@@ -33,8 +33,8 @@ impl UpdateMetadata {
/// About the failure of updating the [TableRouteValue](common_meta::key::table_region::TableRegionValue):
///
/// - There may be another [RegionMigrationProcedure](crate::procedure::region_migration::RegionMigrationProcedure)
/// that is executed concurrently for **other region**.
/// It will only update **other region** info. Therefore, It's safe to retry after failure.
/// that is executed concurrently for **other region**.
/// It will only update **other region** info. Therefore, It's safe to retry after failure.
///
/// - There is no other DDL procedure executed concurrently for the current table.
pub async fn downgrade_leader_region(&self, ctx: &mut Context) -> Result<()> {

View File

@@ -122,7 +122,7 @@ impl UpdateMetadata {
///
/// Abort(non-retry):
/// - TableRoute or RegionRoute is not found.
/// Typically, it's impossible, there is no other DDL procedure executed concurrently for the current table.
/// Typically, it's impossible, there is no other DDL procedure executed concurrently for the current table.
///
/// Retry:
/// - Failed to update [TableRouteValue](common_meta::key::table_region::TableRegionValue).

View File

@@ -27,6 +27,7 @@ pub trait WeightedChoose<Item>: Send + Sync {
/// Note:
/// 1. make sure weight_array is not empty.
/// 2. the total weight is greater than 0.
///
/// Otherwise an error will be returned.
fn set_weight_array(&mut self, weight_array: Vec<WeightedItem<Item>>) -> Result<()>;

View File

@@ -61,9 +61,9 @@ impl CheckLeader for RwLock<State> {
/// To use this cache, the following constraints must be followed:
/// 1. The leader node can create this metadata.
/// 2. The follower node can create this metadata. The leader node can lazily retrieve
/// the corresponding data through the caching loading mechanism.
/// the corresponding data through the caching loading mechanism.
/// 3. Only the leader node can update this metadata, as the cache cannot detect
/// modifications made to the data on the follower node.
/// modifications made to the data on the follower node.
/// 4. Only the leader node can delete this metadata for the same reason mentioned above.
pub struct LeaderCachedKvBackend {
check_leader: CheckLeaderRef,

View File

@@ -223,7 +223,7 @@ pub enum Error {
#[snafu(display("Unsupported region request: {}", request))]
UnsupportedRegionRequest {
request: RegionRequest,
request: Box<RegionRequest>,
#[snafu(implicit)]
location: Location,
},

View File

@@ -175,14 +175,6 @@ impl AccessLayer {
Ok(sst_info)
}
/// Returns whether the file exists in the object store.
pub(crate) async fn is_exist(&self, file_meta: &FileMeta) -> Result<bool> {
let path = location::sst_file_path(&self.region_dir, file_meta.file_id);
self.object_store
.is_exist(&path)
.await
.context(OpenDalSnafu)
}
}
/// `OperationType` represents the origin of the `SstWriteRequest`.

View File

@@ -48,8 +48,8 @@ use crate::compaction::picker::{new_picker, CompactionTask};
use crate::compaction::task::CompactionTaskImpl;
use crate::config::MitoConfig;
use crate::error::{
CompactRegionSnafu, Error, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, Result,
TimeRangePredicateOverflowSnafu,
CompactRegionSnafu, Error, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu,
RemoteCompactionSnafu, Result, TimeRangePredicateOverflowSnafu,
};
use crate::metrics::COMPACTION_STAGE_ELAPSED;
use crate::read::projection::ProjectionMapper;
@@ -314,6 +314,16 @@ impl CompactionScheduler {
return Ok(());
}
Err(e) => {
if !current_version.options.compaction.fallback_to_local() {
error!(e; "Failed to schedule remote compaction job for region {}", region_id);
return RemoteCompactionSnafu {
region_id,
job_id: None,
reason: e.reason,
}
.fail();
}
error!(e; "Failed to schedule remote compaction job for region {}, fallback to local compaction", region_id);
// Return the waiters back to the caller for local compaction.
@@ -531,7 +541,10 @@ impl<'a> CompactionSstReaderBuilder<'a> {
scan_input.with_predicate(time_range_to_predicate(time_range, &self.metadata)?);
}
SeqScan::new(scan_input).build_reader().await
SeqScan::new(scan_input)
.with_compaction()
.build_reader()
.await
}
}

View File

@@ -143,7 +143,7 @@ impl Default for MitoConfig {
enable_experimental_write_cache: false,
experimental_write_cache_path: String::new(),
experimental_write_cache_size: ReadableSize::mb(512),
experimental_write_cache_ttl: Some(Duration::from_secs(60 * 60)),
experimental_write_cache_ttl: None,
sst_write_buffer_size: DEFAULT_WRITE_BUFFER_SIZE,
scan_parallelism: divide_num_cpus(4),
parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE,

View File

@@ -757,14 +757,14 @@ pub enum Error {
},
#[snafu(display(
"Failed to remotely compact region {} by job {} due to {}",
"Failed to remotely compact region {} by job {:?} due to {}",
region_id,
job_id,
reason
))]
RemoteCompaction {
region_id: RegionId,
job_id: JobId,
job_id: Option<JobId>,
reason: String,
#[snafu(implicit)]
location: Location,

View File

@@ -56,6 +56,7 @@ use crate::error::{
use crate::memtable::BoxedBatchIterator;
use crate::metrics::{READ_BATCHES_RETURN, READ_ROWS_RETURN, READ_STAGE_ELAPSED};
use crate::read::prune::PruneReader;
use crate::sst::parquet::reader::{ReaderFilterMetrics, ReaderMetrics};
/// Storage internal representation of a batch of rows for a primary key (time series).
///
@@ -752,11 +753,13 @@ pub(crate) struct ScannerMetrics {
num_batches: usize,
/// Number of rows returned.
num_rows: usize,
/// Filter related metrics for readers.
filter_metrics: ReaderFilterMetrics,
}
impl ScannerMetrics {
/// Sets and observes metrics on initializing parts.
fn observe_init_part(&mut self, build_parts_cost: Duration) {
fn observe_init_part(&mut self, build_parts_cost: Duration, reader_metrics: &ReaderMetrics) {
self.build_parts_cost = build_parts_cost;
// Observes metrics.
@@ -766,6 +769,11 @@ impl ScannerMetrics {
READ_STAGE_ELAPSED
.with_label_values(&["build_parts"])
.observe(self.build_parts_cost.as_secs_f64());
// We only call this once so we overwrite it directly.
self.filter_metrics = reader_metrics.filter_metrics;
// Observes filter metrics.
self.filter_metrics.observe();
}
/// Observes metrics on scanner finish.

View File

@@ -31,7 +31,7 @@ use crate::read::{Batch, BatchReader, BoxedBatchReader, Source};
///
/// The merge reader merges [Batch]es from multiple sources that yield sorted batches.
/// 1. Batch is ordered by primary key, time index, sequence desc, op type desc (we can
/// ignore op type as sequence is already unique).
/// ignore op type as sequence is already unique).
/// 2. Batches from sources **must** not be empty.
///
/// The reader won't concatenate batches. Each batch returned by the reader also doesn't

View File

@@ -97,13 +97,13 @@ impl PruneReader {
let num_rows_before_filter = batch.num_rows();
let Some(batch_filtered) = self.context.precise_filter(batch)? else {
// the entire batch is filtered out
self.metrics.num_rows_precise_filtered += num_rows_before_filter;
self.metrics.filter_metrics.num_rows_precise_filtered += num_rows_before_filter;
return Ok(None);
};
// update metric
let filtered_rows = num_rows_before_filter - batch_filtered.num_rows();
self.metrics.num_rows_precise_filtered += filtered_rows;
self.metrics.filter_metrics.num_rows_precise_filtered += filtered_rows;
if !batch_filtered.is_empty() {
Ok(Some(batch_filtered))

View File

@@ -50,6 +50,7 @@ use crate::sst::index::fulltext_index::applier::FulltextIndexApplierRef;
use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
use crate::sst::index::inverted_index::applier::InvertedIndexApplierRef;
use crate::sst::parquet::file_range::FileRange;
use crate::sst::parquet::reader::ReaderMetrics;
/// A scanner scans a region and returns a [SendableRecordBatchStream].
pub(crate) enum Scanner {
@@ -606,8 +607,9 @@ impl ScanInput {
pub(crate) async fn prune_file_ranges(
&self,
collector: &mut impl FileRangeCollector,
) -> Result<()> {
) -> Result<ReaderMetrics> {
let mut file_prune_cost = Duration::ZERO;
let mut reader_metrics = ReaderMetrics::default();
for file in &self.files {
let prune_start = Instant::now();
let res = self
@@ -620,7 +622,7 @@ impl ScanInput {
.inverted_index_applier(self.inverted_index_applier.clone())
.fulltext_index_applier(self.fulltext_index_applier.clone())
.expected_metadata(Some(self.mapper.metadata().clone()))
.build_reader_input()
.build_reader_input(&mut reader_metrics)
.await;
file_prune_cost += prune_start.elapsed();
let (mut file_range_ctx, row_groups) = match res {
@@ -665,7 +667,7 @@ impl ScanInput {
file_prune_cost
);
Ok(())
Ok(reader_metrics)
}
/// Scans the input source in another task and sends batches to the sender.

View File

@@ -59,6 +59,8 @@ pub struct SeqScan {
/// Semaphore to control scan parallelism of files.
/// Streams created by the scanner share the same semaphore.
semaphore: Arc<Semaphore>,
/// The scanner is used for compaction.
compaction: bool,
}
impl SeqScan {
@@ -75,9 +77,16 @@ impl SeqScan {
properties,
stream_ctx,
semaphore: Arc::new(Semaphore::new(parallelism)),
compaction: false,
}
}
/// Sets the scanner to be used for compaction.
pub(crate) fn with_compaction(mut self) -> Self {
self.compaction = true;
self
}
/// Builds a stream for the query.
///
/// The returned stream is not partitioned and will contains all the data. If want
@@ -97,9 +106,13 @@ impl SeqScan {
prepare_scan_cost: self.stream_ctx.query_start.elapsed(),
..Default::default()
};
let maybe_reader =
Self::build_all_merge_reader(&self.stream_ctx, self.semaphore.clone(), &mut metrics)
.await?;
let maybe_reader = Self::build_all_merge_reader(
&self.stream_ctx,
self.semaphore.clone(),
&mut metrics,
self.compaction,
)
.await?;
// Safety: `build_merge_reader()` always returns a reader if partition is None.
let reader = maybe_reader.unwrap();
Ok(Box::new(reader))
@@ -110,6 +123,7 @@ impl SeqScan {
part: &ScanPart,
sources: &mut Vec<Source>,
row_selector: Option<TimeSeriesRowSelector>,
compaction: bool,
) -> Result<()> {
sources.reserve(part.memtable_ranges.len() + part.file_ranges.len());
// Read memtables.
@@ -117,6 +131,11 @@ impl SeqScan {
let iter = mem.build_iter()?;
sources.push(Source::Iter(iter));
}
let read_type = if compaction {
"compaction"
} else {
"seq_scan_files"
};
// Read files.
for file in &part.file_ranges {
if file.is_empty() {
@@ -148,6 +167,8 @@ impl SeqScan {
"Seq scan region {}, file {}, {} ranges finished, metrics: {:?}",
region_id, file_id, range_num, reader_metrics
);
// Reports metrics.
reader_metrics.observe_rows(read_type);
};
let stream = Box::pin(stream);
sources.push(Source::Stream(stream));
@@ -161,6 +182,7 @@ impl SeqScan {
stream_ctx: &StreamContext,
semaphore: Arc<Semaphore>,
metrics: &mut ScannerMetrics,
compaction: bool,
) -> Result<Option<BoxedBatchReader>> {
// initialize parts list
let mut parts = stream_ctx.parts.lock().await;
@@ -173,7 +195,7 @@ impl SeqScan {
return Ok(None);
};
Self::build_part_sources(part, &mut sources, None)?;
Self::build_part_sources(part, &mut sources, None, compaction)?;
}
Self::build_reader_from_sources(stream_ctx, sources, semaphore).await
@@ -187,6 +209,7 @@ impl SeqScan {
range_id: usize,
semaphore: Arc<Semaphore>,
metrics: &mut ScannerMetrics,
compaction: bool,
) -> Result<Option<BoxedBatchReader>> {
let mut sources = Vec::new();
let build_start = {
@@ -198,7 +221,12 @@ impl SeqScan {
};
let build_start = Instant::now();
Self::build_part_sources(part, &mut sources, stream_ctx.input.series_row_selector)?;
Self::build_part_sources(
part,
&mut sources,
stream_ctx.input.series_row_selector,
compaction,
)?;
build_start
};
@@ -281,12 +309,13 @@ impl SeqScan {
let stream_ctx = self.stream_ctx.clone();
let semaphore = self.semaphore.clone();
let partition_ranges = self.properties.partitions[partition].clone();
let compaction = self.compaction;
let stream = try_stream! {
let first_poll = stream_ctx.query_start.elapsed();
for partition_range in partition_ranges {
let maybe_reader =
Self::build_merge_reader(&stream_ctx, partition_range.identifier, semaphore.clone(), &mut metrics)
Self::build_merge_reader(&stream_ctx, partition_range.identifier, semaphore.clone(), &mut metrics, compaction)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
@@ -359,6 +388,7 @@ impl SeqScan {
};
let stream_ctx = self.stream_ctx.clone();
let semaphore = self.semaphore.clone();
let compaction = self.compaction;
// build stream
let stream = try_stream! {
@@ -379,6 +409,7 @@ impl SeqScan {
id,
semaphore.clone(),
&mut metrics,
compaction,
)
.await
.map_err(BoxedError::new)
@@ -439,7 +470,7 @@ impl SeqScan {
if part_list.0.is_none() {
let now = Instant::now();
let mut distributor = SeqDistributor::default();
input.prune_file_ranges(&mut distributor).await?;
let reader_metrics = input.prune_file_ranges(&mut distributor).await?;
distributor.append_mem_ranges(
&input.memtables,
Some(input.mapper.column_ids()),
@@ -451,7 +482,7 @@ impl SeqScan {
let build_part_cost = now.elapsed();
part_list.1 = build_part_cost;
metrics.observe_init_part(build_part_cost);
metrics.observe_init_part(build_part_cost, &reader_metrics);
} else {
// Updates the cost of building parts.
metrics.build_parts_cost = part_list.1;

View File

@@ -211,6 +211,7 @@ impl RegionScanner for UnorderedScan {
}
}
reader_metrics.observe_rows("unordered_scan_files");
metrics.total_cost = query_start.elapsed();
metrics.observe_metrics_on_finish();
debug!(
@@ -263,7 +264,7 @@ async fn maybe_init_parts(
if part_list.0.is_none() {
let now = Instant::now();
let mut distributor = UnorderedDistributor::default();
input.prune_file_ranges(&mut distributor).await?;
let reader_metrics = input.prune_file_ranges(&mut distributor).await?;
distributor.append_mem_ranges(
&input.memtables,
Some(input.mapper.column_ids()),
@@ -275,7 +276,7 @@ async fn maybe_init_parts(
let build_part_cost = now.elapsed();
part_list.1 = build_part_cost;
metrics.observe_init_part(build_part_cost);
metrics.observe_init_part(build_part_cost, &reader_metrics);
} else {
// Updates the cost of building parts.
metrics.build_parts_cost = part_list.1;

View File

@@ -170,6 +170,12 @@ impl CompactionOptions {
CompactionOptions::Twcs(opts) => opts.remote_compaction,
}
}
pub(crate) fn fallback_to_local(&self) -> bool {
match self {
CompactionOptions::Twcs(opts) => opts.fallback_to_local,
}
}
}
impl Default for CompactionOptions {
@@ -201,6 +207,9 @@ pub struct TwcsOptions {
/// Whether to use remote compaction.
#[serde_as(as = "DisplayFromStr")]
pub remote_compaction: bool,
/// Whether to fall back to local compaction if remote compaction fails.
#[serde_as(as = "DisplayFromStr")]
pub fallback_to_local: bool,
}
with_prefix!(prefix_twcs "compaction.twcs.");
@@ -228,6 +237,7 @@ impl Default for TwcsOptions {
max_inactive_window_files: 1,
time_window: None,
remote_compaction: false,
fallback_to_local: true,
}
}
}
@@ -590,6 +600,7 @@ mod tests {
("compaction.twcs.time_window", "2h"),
("compaction.type", "twcs"),
("compaction.twcs.remote_compaction", "false"),
("compaction.twcs.fallback_to_local", "true"),
("storage", "S3"),
("append_mode", "false"),
("index.inverted_index.ignore_column_ids", "1,2,3"),
@@ -614,6 +625,7 @@ mod tests {
max_inactive_window_files: 3,
time_window: Some(Duration::from_secs(3600 * 2)),
remote_compaction: false,
fallback_to_local: true,
}),
storage: Some("S3".to_string()),
append_mode: false,
@@ -645,6 +657,7 @@ mod tests {
max_inactive_window_files: usize::MAX,
time_window: Some(Duration::from_secs(3600 * 2)),
remote_compaction: false,
fallback_to_local: true,
}),
storage: Some("S3".to_string()),
append_mode: false,
@@ -710,6 +723,7 @@ mod tests {
max_inactive_window_files: 7,
time_window: Some(Duration::from_secs(3600 * 2)),
remote_compaction: false,
fallback_to_local: true,
}),
storage: Some("S3".to_string()),
append_mode: false,

View File

@@ -52,39 +52,27 @@ pub struct IndexOutput {
pub fulltext_index: FulltextIndexOutput,
}
/// Base output of the index creation.
#[derive(Debug, Clone, Default)]
pub struct IndexBaseOutput {
/// Size of the index.
pub index_size: ByteCount,
/// Number of rows in the index.
pub row_count: RowCount,
/// Available columns in the index.
pub columns: Vec<ColumnId>,
}
impl IndexBaseOutput {
pub fn is_available(&self) -> bool {
self.index_size > 0
}
}
/// Output of the inverted index creation.
#[derive(Debug, Clone, Default)]
pub struct InvertedIndexOutput {
/// Size of the index.
pub index_size: ByteCount,
/// Number of rows in the index.
pub row_count: RowCount,
/// Available columns in the index.
pub columns: Vec<ColumnId>,
}
pub type InvertedIndexOutput = IndexBaseOutput;
/// Output of the fulltext index creation.
#[derive(Debug, Clone, Default)]
pub struct FulltextIndexOutput {
/// Size of the index.
pub index_size: ByteCount,
/// Number of rows in the index.
pub row_count: RowCount,
/// Available columns in the index.
pub columns: Vec<ColumnId>,
}
impl InvertedIndexOutput {
pub fn is_available(&self) -> bool {
self.index_size > 0
}
}
impl FulltextIndexOutput {
pub fn is_available(&self) -> bool {
self.index_size > 0
}
}
pub type FulltextIndexOutput = IndexBaseOutput;
/// The index creator that hides the error handling details.
#[derive(Default)]

View File

@@ -174,14 +174,19 @@ impl ParquetReaderBuilder {
///
/// This needs to perform IO operation.
pub async fn build(&self) -> Result<ParquetReader> {
let (context, row_groups) = self.build_reader_input().await?;
let mut metrics = ReaderMetrics::default();
let (context, row_groups) = self.build_reader_input(&mut metrics).await?;
ParquetReader::new(Arc::new(context), row_groups).await
}
/// Builds a [FileRangeContext] and collects row groups to read.
///
/// This needs to perform IO operation.
pub(crate) async fn build_reader_input(&self) -> Result<(FileRangeContext, RowGroupMap)> {
pub(crate) async fn build_reader_input(
&self,
metrics: &mut ReaderMetrics,
) -> Result<(FileRangeContext, RowGroupMap)> {
let start = Instant::now();
let file_path = self.file_handle.file_path(&self.file_dir);
@@ -219,10 +224,8 @@ impl ParquetReaderBuilder {
parquet_to_arrow_field_levels(parquet_schema_desc, projection_mask.clone(), hint)
.context(ReadParquetSnafu { path: &file_path })?;
let mut metrics = ReaderMetrics::default();
let row_groups = self
.row_groups_to_read(&read_format, &parquet_meta, &mut metrics)
.row_groups_to_read(&read_format, &parquet_meta, &mut metrics.filter_metrics)
.await;
let reader_builder = RowGroupReaderBuilder {
@@ -336,7 +339,7 @@ impl ParquetReaderBuilder {
&self,
read_format: &ReadFormat,
parquet_meta: &ParquetMetaData,
metrics: &mut ReaderMetrics,
metrics: &mut ReaderFilterMetrics,
) -> BTreeMap<usize, Option<RowSelection>> {
let num_row_groups = parquet_meta.num_row_groups();
let num_rows = parquet_meta.file_metadata().num_rows();
@@ -382,7 +385,7 @@ impl ParquetReaderBuilder {
row_group_size: usize,
parquet_meta: &ParquetMetaData,
output: &mut BTreeMap<usize, Option<RowSelection>>,
metrics: &mut ReaderMetrics,
metrics: &mut ReaderFilterMetrics,
) -> bool {
let Some(index_applier) = &self.fulltext_index_applier else {
return false;
@@ -462,7 +465,7 @@ impl ParquetReaderBuilder {
row_group_size: usize,
parquet_meta: &ParquetMetaData,
output: &mut BTreeMap<usize, Option<RowSelection>>,
metrics: &mut ReaderMetrics,
metrics: &mut ReaderFilterMetrics,
) -> bool {
let Some(index_applier) = &self.inverted_index_applier else {
return false;
@@ -529,7 +532,7 @@ impl ParquetReaderBuilder {
read_format: &ReadFormat,
parquet_meta: &ParquetMetaData,
output: &mut BTreeMap<usize, Option<RowSelection>>,
metrics: &mut ReaderMetrics,
metrics: &mut ReaderFilterMetrics,
) -> bool {
let Some(predicate) = &self.predicate else {
return false;
@@ -724,9 +727,9 @@ fn time_range_to_predicate(
Ok(predicates)
}
/// Parquet reader metrics.
#[derive(Debug, Default, Clone)]
pub(crate) struct ReaderMetrics {
/// Metrics of filtering rows groups and rows.
#[derive(Debug, Default, Clone, Copy)]
pub(crate) struct ReaderFilterMetrics {
/// Number of row groups before filtering.
pub(crate) num_row_groups_before_filtering: usize,
/// Number of row groups filtered by fulltext index.
@@ -743,6 +746,57 @@ pub(crate) struct ReaderMetrics {
pub(crate) num_rows_in_row_group_fulltext_index_filtered: usize,
/// Number of rows in row group filtered by inverted index.
pub(crate) num_rows_in_row_group_inverted_index_filtered: usize,
}
impl ReaderFilterMetrics {
/// Adds `other` metrics to this metrics.
pub(crate) fn merge_from(&mut self, other: &ReaderFilterMetrics) {
self.num_row_groups_before_filtering += other.num_row_groups_before_filtering;
self.num_row_groups_fulltext_index_filtered += other.num_row_groups_fulltext_index_filtered;
self.num_row_groups_inverted_index_filtered += other.num_row_groups_inverted_index_filtered;
self.num_row_groups_min_max_filtered += other.num_row_groups_min_max_filtered;
self.num_rows_precise_filtered += other.num_rows_precise_filtered;
self.num_rows_in_row_group_before_filtering += other.num_rows_in_row_group_before_filtering;
self.num_rows_in_row_group_fulltext_index_filtered +=
other.num_rows_in_row_group_fulltext_index_filtered;
self.num_rows_in_row_group_inverted_index_filtered +=
other.num_rows_in_row_group_inverted_index_filtered;
}
/// Reports metrics.
pub(crate) fn observe(&self) {
READ_ROW_GROUPS_TOTAL
.with_label_values(&["before_filtering"])
.inc_by(self.num_row_groups_before_filtering as u64);
READ_ROW_GROUPS_TOTAL
.with_label_values(&["fulltext_index_filtered"])
.inc_by(self.num_row_groups_fulltext_index_filtered as u64);
READ_ROW_GROUPS_TOTAL
.with_label_values(&["inverted_index_filtered"])
.inc_by(self.num_row_groups_inverted_index_filtered as u64);
READ_ROW_GROUPS_TOTAL
.with_label_values(&["minmax_index_filtered"])
.inc_by(self.num_row_groups_min_max_filtered as u64);
PRECISE_FILTER_ROWS_TOTAL
.with_label_values(&["parquet"])
.inc_by(self.num_rows_precise_filtered as u64);
READ_ROWS_IN_ROW_GROUP_TOTAL
.with_label_values(&["before_filtering"])
.inc_by(self.num_rows_in_row_group_before_filtering as u64);
READ_ROWS_IN_ROW_GROUP_TOTAL
.with_label_values(&["fulltext_index_filtered"])
.inc_by(self.num_rows_in_row_group_fulltext_index_filtered as u64);
READ_ROWS_IN_ROW_GROUP_TOTAL
.with_label_values(&["inverted_index_filtered"])
.inc_by(self.num_rows_in_row_group_inverted_index_filtered as u64);
}
}
/// Parquet reader metrics.
#[derive(Debug, Default, Clone)]
pub(crate) struct ReaderMetrics {
/// Filtered row groups and rows metrics.
pub(crate) filter_metrics: ReaderFilterMetrics,
/// Duration to build the parquet reader.
pub(crate) build_cost: Duration,
/// Duration to scan the reader.
@@ -758,22 +812,20 @@ pub(crate) struct ReaderMetrics {
impl ReaderMetrics {
/// Adds `other` metrics to this metrics.
pub(crate) fn merge_from(&mut self, other: &ReaderMetrics) {
self.num_row_groups_before_filtering += other.num_row_groups_before_filtering;
self.num_row_groups_fulltext_index_filtered += other.num_row_groups_fulltext_index_filtered;
self.num_row_groups_inverted_index_filtered += other.num_row_groups_inverted_index_filtered;
self.num_row_groups_min_max_filtered += other.num_row_groups_min_max_filtered;
self.num_rows_precise_filtered += other.num_rows_precise_filtered;
self.num_rows_in_row_group_before_filtering += other.num_rows_in_row_group_before_filtering;
self.num_rows_in_row_group_fulltext_index_filtered +=
other.num_rows_in_row_group_fulltext_index_filtered;
self.num_rows_in_row_group_inverted_index_filtered +=
other.num_rows_in_row_group_inverted_index_filtered;
self.filter_metrics.merge_from(&other.filter_metrics);
self.build_cost += other.build_cost;
self.scan_cost += other.scan_cost;
self.num_record_batches += other.num_record_batches;
self.num_batches += other.num_batches;
self.num_rows += other.num_rows;
}
/// Reports total rows.
pub(crate) fn observe_rows(&self, read_type: &str) {
READ_ROWS_TOTAL
.with_label_values(&[read_type])
.inc_by(self.num_rows as u64);
}
}
/// Builder to build a [ParquetRecordBatchReader] for a row group.
@@ -1006,10 +1058,12 @@ impl Drop for ParquetReader {
self.context.reader_builder().file_handle.region_id(),
self.context.reader_builder().file_handle.file_id(),
self.context.reader_builder().file_handle.time_range(),
metrics.num_row_groups_before_filtering
- metrics.num_row_groups_inverted_index_filtered
- metrics.num_row_groups_min_max_filtered,
metrics.num_row_groups_before_filtering,
metrics.filter_metrics.num_row_groups_before_filtering
- metrics
.filter_metrics
.num_row_groups_inverted_index_filtered
- metrics.filter_metrics.num_row_groups_min_max_filtered,
metrics.filter_metrics.num_row_groups_before_filtering,
metrics
);
@@ -1020,33 +1074,8 @@ impl Drop for ParquetReader {
READ_STAGE_ELAPSED
.with_label_values(&["scan_row_groups"])
.observe(metrics.scan_cost.as_secs_f64());
READ_ROWS_TOTAL
.with_label_values(&["parquet"])
.inc_by(metrics.num_rows as u64);
READ_ROW_GROUPS_TOTAL
.with_label_values(&["before_filtering"])
.inc_by(metrics.num_row_groups_before_filtering as u64);
READ_ROW_GROUPS_TOTAL
.with_label_values(&["fulltext_index_filtered"])
.inc_by(metrics.num_row_groups_fulltext_index_filtered as u64);
READ_ROW_GROUPS_TOTAL
.with_label_values(&["inverted_index_filtered"])
.inc_by(metrics.num_row_groups_inverted_index_filtered as u64);
READ_ROW_GROUPS_TOTAL
.with_label_values(&["minmax_index_filtered"])
.inc_by(metrics.num_row_groups_min_max_filtered as u64);
PRECISE_FILTER_ROWS_TOTAL
.with_label_values(&["parquet"])
.inc_by(metrics.num_rows_precise_filtered as u64);
READ_ROWS_IN_ROW_GROUP_TOTAL
.with_label_values(&["before_filtering"])
.inc_by(metrics.num_rows_in_row_group_before_filtering as u64);
READ_ROWS_IN_ROW_GROUP_TOTAL
.with_label_values(&["fulltext_index_filtered"])
.inc_by(metrics.num_rows_in_row_group_fulltext_index_filtered as u64);
READ_ROWS_IN_ROW_GROUP_TOTAL
.with_label_values(&["inverted_index_filtered"])
.inc_by(metrics.num_rows_in_row_group_inverted_index_filtered as u64);
metrics.observe_rows("parquet_reader");
metrics.filter_metrics.observe();
}
}

View File

@@ -960,6 +960,7 @@ pub fn build_rows(start: usize, end: usize) -> Vec<Row> {
/// - `key`: A string key that is common across all rows.
/// - `timestamps`: Array of timestamp values.
/// - `fields`: Array of tuples where each tuple contains two optional i64 values, representing two optional float fields.
///
/// Returns a vector of `Row` each containing the key, two optional float fields, and a timestamp.
pub fn build_rows_with_fields(
key: &str,

View File

@@ -15,6 +15,7 @@
use api::v1::WalEntry;
use async_stream::stream;
use futures::StreamExt;
use object_store::Buffer;
use prost::Message;
use snafu::{ensure, ResultExt};
use store_api::logstore::entry::Entry;
@@ -28,13 +29,20 @@ pub(crate) fn decode_raw_entry(raw_entry: Entry) -> Result<(EntryId, WalEntry)>
let entry_id = raw_entry.entry_id();
let region_id = raw_entry.region_id();
ensure!(raw_entry.is_complete(), CorruptedEntrySnafu { region_id });
// TODO(weny): implement the [Buf] for return value, avoid extra memory allocation.
let bytes = raw_entry.into_bytes();
let wal_entry = WalEntry::decode(bytes.as_slice()).context(DecodeWalSnafu { region_id })?;
let buffer = into_buffer(raw_entry);
let wal_entry = WalEntry::decode(buffer).context(DecodeWalSnafu { region_id })?;
Ok((entry_id, wal_entry))
}
fn into_buffer(raw_entry: Entry) -> Buffer {
match raw_entry {
Entry::Naive(entry) => Buffer::from(entry.data),
Entry::MultiplePart(entry) => {
Buffer::from_iter(entry.parts.into_iter().map(bytes::Bytes::from))
}
}
}
/// [WalEntryReader] provides the ability to read and decode entries from the underlying store.
///
/// Notes: It will consume the inner stream and only allow invoking the `read` at once.

View File

@@ -19,10 +19,9 @@
use std::collections::{HashMap, VecDeque};
use common_telemetry::{info, warn};
use snafu::ensure;
use store_api::storage::RegionId;
use crate::error::{InvalidRequestSnafu, RegionBusySnafu, RegionNotFoundSnafu, Result};
use crate::error::{RegionBusySnafu, RegionNotFoundSnafu, Result};
use crate::manifest::action::{
RegionChange, RegionEdit, RegionMetaAction, RegionMetaActionList, RegionTruncate,
};
@@ -289,20 +288,6 @@ impl<S> RegionWorkerLoop<S> {
/// Checks the edit, writes and applies it.
async fn edit_region(region: &MitoRegionRef, edit: RegionEdit) -> Result<()> {
let region_id = region.region_id;
for file_meta in &edit.files_to_add {
let is_exist = region.access_layer.is_exist(file_meta).await?;
ensure!(
is_exist,
InvalidRequestSnafu {
region_id,
reason: format!(
"trying to add a not exist file '{}' when editing region",
file_meta.file_id
)
}
);
}
info!("Applying {edit:?} to region {}", region_id);
let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit));

View File

@@ -17,7 +17,7 @@ futures.workspace = true
lazy_static.workspace = true
md5 = "0.7"
moka = { workspace = true, features = ["future"] }
opendal = { version = "0.48", features = [
opendal = { version = "0.49", features = [
"layers-tracing",
"services-azblob",
"services-fs",

View File

@@ -18,15 +18,17 @@ use common_telemetry::debug;
use futures::FutureExt;
use moka::future::Cache;
use moka::notification::ListenerFuture;
use opendal::raw::oio::{List, Read, Reader, Write};
use opendal::raw::{Access, OpDelete, OpList, OpRead, OpStat, OpWrite, RpRead};
use opendal::{Error as OpendalError, ErrorKind, Result};
use opendal::raw::oio::{Read, Reader, Write};
use opendal::raw::{Access, OpDelete, OpRead, OpStat, OpWrite, RpRead};
use opendal::{Error as OpendalError, ErrorKind, Metakey, OperatorBuilder, Result};
use crate::metrics::{
OBJECT_STORE_LRU_CACHE_BYTES, OBJECT_STORE_LRU_CACHE_ENTRIES, OBJECT_STORE_LRU_CACHE_HIT,
OBJECT_STORE_LRU_CACHE_MISS, OBJECT_STORE_READ_ERROR,
};
const RECOVER_CACHE_LIST_CONCURRENT: usize = 8;
/// Cache value for read file
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
enum ReadResult {
@@ -142,19 +144,16 @@ impl<C: Access> ReadCache<C> {
/// Recover existing cache items from `file_cache` to `mem_cache`.
/// Return entry count and total approximate entry size in bytes.
pub(crate) async fn recover_cache(&self) -> Result<(u64, u64)> {
let (_, mut pager) = self.file_cache.list("/", OpList::default()).await?;
let op = OperatorBuilder::new(self.file_cache.clone()).finish();
let mut entries = op
.list_with("/")
.metakey(Metakey::ContentLength | Metakey::ContentType)
.concurrent(RECOVER_CACHE_LIST_CONCURRENT)
.await?;
while let Some(entry) = pager.next().await? {
while let Some(entry) = entries.pop() {
let read_key = entry.path();
// We can't retrieve the metadata from `[opendal::raw::oio::Entry]` directly,
// because it's private field.
let size = {
let stat = self.file_cache.stat(read_key, OpStat::default()).await?;
stat.into_metadata().content_length()
};
let size = entry.metadata().content_length();
OBJECT_STORE_LRU_CACHE_ENTRIES.inc();
OBJECT_STORE_LRU_CACHE_BYTES.add(size as i64);
self.mem_cache

View File

@@ -159,9 +159,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
let create_res = self.inner.create_dir(path, args).await;
timer.observe_duration();
create_res.map_err(|e| {
create_res.inspect_err(|e| {
increment_errors_total(Operation::CreateDir, e.kind());
e
})
}
@@ -175,9 +174,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
.with_label_values(&[&self.scheme, Operation::Read.into_static(), path_label])
.start_timer();
let (rp, r) = self.inner.read(path, args).await.map_err(|e| {
let (rp, r) = self.inner.read(path, args).await.inspect_err(|e| {
increment_errors_total(Operation::Read, e.kind());
e
})?;
Ok((
@@ -205,9 +203,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
.with_label_values(&[&self.scheme, Operation::Write.into_static(), path_label])
.start_timer();
let (rp, r) = self.inner.write(path, args).await.map_err(|e| {
let (rp, r) = self.inner.write(path, args).await.inspect_err(|e| {
increment_errors_total(Operation::Write, e.kind());
e
})?;
Ok((
@@ -236,9 +233,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
let stat_res = self.inner.stat(path, args).await;
timer.observe_duration();
stat_res.map_err(|e| {
stat_res.inspect_err(|e| {
increment_errors_total(Operation::Stat, e.kind());
e
})
}
@@ -254,9 +250,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
let delete_res = self.inner.delete(path, args).await;
timer.observe_duration();
delete_res.map_err(|e| {
delete_res.inspect_err(|e| {
increment_errors_total(Operation::Delete, e.kind());
e
})
}
@@ -273,9 +268,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
let list_res = self.inner.list(path, args).await;
timer.observe_duration();
list_res.map_err(|e| {
list_res.inspect_err(|e| {
increment_errors_total(Operation::List, e.kind());
e
})
}
@@ -290,9 +284,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
let result = self.inner.batch(args).await;
timer.observe_duration();
result.map_err(|e| {
result.inspect_err(|e| {
increment_errors_total(Operation::Batch, e.kind());
e
})
}
@@ -308,9 +301,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
let result = self.inner.presign(path, args).await;
timer.observe_duration();
result.map_err(|e| {
result.inspect_err(|e| {
increment_errors_total(Operation::Presign, e.kind());
e
})
}
@@ -335,9 +327,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
timer.observe_duration();
result.map_err(|e| {
result.inspect_err(|e| {
increment_errors_total(Operation::BlockingCreateDir, e.kind());
e
})
}
@@ -376,9 +367,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
),
)
})
.map_err(|e| {
.inspect_err(|e| {
increment_errors_total(Operation::BlockingRead, e.kind());
e
})
}
@@ -417,9 +407,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
),
)
})
.map_err(|e| {
.inspect_err(|e| {
increment_errors_total(Operation::BlockingWrite, e.kind());
e
})
}
@@ -442,9 +431,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
.start_timer();
let result = self.inner.blocking_stat(path, args);
timer.observe_duration();
result.map_err(|e| {
result.inspect_err(|e| {
increment_errors_total(Operation::BlockingStat, e.kind());
e
})
}
@@ -468,9 +456,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
let result = self.inner.blocking_delete(path, args);
timer.observe_duration();
result.map_err(|e| {
result.inspect_err(|e| {
increment_errors_total(Operation::BlockingDelete, e.kind());
e
})
}
@@ -494,9 +481,8 @@ impl<A: Access> LayeredAccess for PrometheusAccess<A> {
let result = self.inner.blocking_list(path, args);
timer.observe_duration();
result.map_err(|e| {
result.inspect_err(|e| {
increment_errors_total(Operation::BlockingList, e.kind());
e
})
}
}
@@ -535,18 +521,16 @@ impl<R> PrometheusMetricWrapper<R> {
impl<R: oio::Read> oio::Read for PrometheusMetricWrapper<R> {
async fn read(&mut self) -> Result<Buffer> {
self.inner.read().await.map_err(|err| {
self.inner.read().await.inspect_err(|err| {
increment_errors_total(self.op, err.kind());
err
})
}
}
impl<R: oio::BlockingRead> oio::BlockingRead for PrometheusMetricWrapper<R> {
fn read(&mut self) -> opendal::Result<Buffer> {
self.inner.read().map_err(|err| {
self.inner.read().inspect_err(|err| {
increment_errors_total(self.op, err.kind());
err
})
}
}
@@ -567,16 +551,14 @@ impl<R: oio::Write> oio::Write for PrometheusMetricWrapper<R> {
}
async fn close(&mut self) -> Result<()> {
self.inner.close().await.map_err(|err| {
self.inner.close().await.inspect_err(|err| {
increment_errors_total(self.op, err.kind());
err
})
}
async fn abort(&mut self) -> Result<()> {
self.inner.close().await.map_err(|err| {
self.inner.close().await.inspect_err(|err| {
increment_errors_total(self.op, err.kind());
err
})
}
}
@@ -589,16 +571,14 @@ impl<R: oio::BlockingWrite> oio::BlockingWrite for PrometheusMetricWrapper<R> {
.map(|_| {
self.bytes += bytes as u64;
})
.map_err(|err| {
.inspect_err(|err| {
increment_errors_total(self.op, err.kind());
err
})
}
fn close(&mut self) -> Result<()> {
self.inner.close().map_err(|err| {
self.inner.close().inspect_err(|err| {
increment_errors_total(self.op, err.kind());
err
})
}
}

View File

@@ -14,7 +14,7 @@
pub use opendal::raw::{normalize_path as raw_normalize_path, Access, HttpClient};
pub use opendal::{
services, Builder as ObjectStoreBuilder, Entry, EntryMode, Error, ErrorKind,
services, Buffer, Builder as ObjectStoreBuilder, Entry, EntryMode, Error, ErrorKind,
FuturesAsyncReader, FuturesAsyncWriter, Lister, Metakey, Operator as ObjectStore, Reader,
Result, Writer,
};

View File

@@ -12,9 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Display;
use common_telemetry::{debug, error, trace};
use futures::TryStreamExt;
use opendal::layers::{LoggingLayer, TracingLayer};
use opendal::{Entry, Lister};
use opendal::layers::{LoggingInterceptor, LoggingLayer, TracingLayer};
use opendal::raw::{AccessorInfo, Operation};
use opendal::{Entry, ErrorKind, Lister};
use crate::layers::PrometheusMetricsLayer;
use crate::ObjectStore;
@@ -140,17 +144,83 @@ pub(crate) fn extract_parent_path(path: &str) -> &str {
/// Attaches instrument layers to the object store.
pub fn with_instrument_layers(object_store: ObjectStore, path_label: bool) -> ObjectStore {
object_store
.layer(
LoggingLayer::default()
// Print the expected error only in DEBUG level.
// See https://docs.rs/opendal/latest/opendal/layers/struct.LoggingLayer.html#method.with_error_level
.with_error_level(Some("debug"))
.expect("input error level must be valid"),
)
.layer(LoggingLayer::new(DefaultLoggingInterceptor))
.layer(TracingLayer)
.layer(PrometheusMetricsLayer::new(path_label))
}
static LOGGING_TARGET: &str = "opendal::services";
struct LoggingContext<'a>(&'a [(&'a str, &'a str)]);
impl<'a> Display for LoggingContext<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (i, (k, v)) in self.0.iter().enumerate() {
if i > 0 {
write!(f, " {}={}", k, v)?;
} else {
write!(f, "{}={}", k, v)?;
}
}
Ok(())
}
}
#[derive(Debug, Copy, Clone, Default)]
pub struct DefaultLoggingInterceptor;
impl LoggingInterceptor for DefaultLoggingInterceptor {
#[inline]
fn log(
&self,
info: &AccessorInfo,
operation: Operation,
context: &[(&str, &str)],
message: &str,
err: Option<&opendal::Error>,
) {
if let Some(err) = err {
// Print error if it's unexpected, otherwise in error.
if err.kind() == ErrorKind::Unexpected {
error!(
target: LOGGING_TARGET,
"service={} name={} {}: {operation} {message} {err:#?}",
info.scheme(),
info.name(),
LoggingContext(context),
);
} else {
debug!(
target: LOGGING_TARGET,
"service={} name={} {}: {operation} {message} {err}",
info.scheme(),
info.name(),
LoggingContext(context),
);
};
}
// Print debug message if operation is oneshot, otherwise in trace.
if operation.is_oneshot() {
debug!(
target: LOGGING_TARGET,
"service={} name={} {}: {operation} {message}",
info.scheme(),
info.name(),
LoggingContext(context),
);
} else {
trace!(
target: LOGGING_TARGET,
"service={} name={} {}: {operation} {message}",
info.scheme(),
info.name(),
LoggingContext(context),
);
};
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -42,6 +42,19 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to execute admin function"))]
ExecuteAdminFunction {
#[snafu(implicit)]
location: Location,
source: common_query::error::Error,
},
#[snafu(display("Failed to build admin function args: {msg}"))]
BuildAdminFunctionArgs { msg: String },
#[snafu(display("Expected {expected} args, but actual {actual}"))]
FunctionArityMismatch { expected: usize, actual: usize },
#[snafu(display("Failed to invalidate table cache"))]
InvalidateTableCache {
#[snafu(implicit)]
@@ -209,6 +222,9 @@ pub enum Error {
#[snafu(display("Table not found: {}", table_name))]
TableNotFound { table_name: String },
#[snafu(display("Admin function not found: {}", name))]
AdminFunctionNotFound { name: String },
#[snafu(display("Flow not found: {}", flow_name))]
FlowNotFound { flow_name: String },
@@ -546,6 +562,13 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to build record batch"))]
BuildRecordBatch {
#[snafu(implicit)]
location: Location,
source: common_recordbatch::error::Error,
},
#[snafu(display("Failed to read orc schema"))]
ReadOrc {
source: common_datasource::error::Error,
@@ -792,9 +815,12 @@ impl ErrorExt for Error {
| Error::InvalidViewName { .. }
| Error::InvalidView { .. }
| Error::InvalidExpr { .. }
| Error::AdminFunctionNotFound { .. }
| Error::ViewColumnsMismatch { .. }
| Error::InvalidViewStmt { .. }
| Error::ConvertIdentifier { .. }
| Error::BuildAdminFunctionArgs { .. }
| Error::FunctionArityMismatch { .. }
| Error::InvalidPartition { .. }
| Error::PhysicalExpr { .. } => StatusCode::InvalidArguments,
@@ -902,6 +928,9 @@ impl ErrorExt for Error {
| Error::InvalidTimestampRange { .. } => StatusCode::InvalidArguments,
Error::CreateLogicalTables { .. } => StatusCode::Unexpected,
Error::ExecuteAdminFunction { source, .. } => source.status_code(),
Error::BuildRecordBatch { source, .. } => source.status_code(),
}
}

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
mod admin;
mod copy_database;
mod copy_table_from;
mod copy_table_to;
@@ -277,6 +278,7 @@ impl StatementExecutor {
Statement::ShowIndex(show_index) => self.show_index(show_index, query_ctx).await,
Statement::ShowStatus(_) => self.show_status(query_ctx).await,
Statement::Use(db) => self.use_database(db, query_ctx).await,
Statement::Admin(admin) => self.execute_admin_command(admin, query_ctx).await,
}
}

View File

@@ -0,0 +1,233 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_function::function::FunctionContext;
use common_function::function_registry::FUNCTION_REGISTRY;
use common_query::prelude::TypeSignature;
use common_query::Output;
use common_recordbatch::{RecordBatch, RecordBatches};
use common_telemetry::tracing;
use common_time::Timezone;
use datatypes::data_type::DataType;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema};
use datatypes::value::Value;
use datatypes::vectors::VectorRef;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use sql::ast::{Expr, FunctionArg, FunctionArgExpr, Value as SqlValue};
use sql::statements::admin::Admin;
use sql::statements::sql_value_to_value;
use crate::error::{self, Result};
use crate::statement::StatementExecutor;
const DUMMY_COLUMN: &str = "<dummy>";
impl StatementExecutor {
/// Execute the [`Admin`] statement and returns the output.
#[tracing::instrument(skip_all)]
pub(super) async fn execute_admin_command(
&self,
stmt: Admin,
query_ctx: QueryContextRef,
) -> Result<Output> {
let Admin::Func(func) = &stmt;
// the function name should be in lower case.
let func_name = func.name.to_string().to_lowercase();
let admin_func = FUNCTION_REGISTRY
.get_async_function(&func_name)
.context(error::AdminFunctionNotFoundSnafu { name: func_name })?;
let signature = admin_func.signature();
let arg_values = func
.args
.iter()
.map(|arg| {
let FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(value))) = arg else {
return error::BuildAdminFunctionArgsSnafu {
msg: "unsupported function arg {arg}",
}
.fail();
};
Ok(value)
})
.collect::<Result<Vec<_>>>()?;
let args = args_to_vector(&signature.type_signature, &arg_values, &query_ctx)?;
let arg_types = args.iter().map(|arg| arg.data_type()).collect::<Vec<_>>();
let func_ctx = FunctionContext {
query_ctx,
state: self.query_engine.engine_state().function_state(),
};
let result = admin_func
.eval(func_ctx, &args)
.await
.context(error::ExecuteAdminFunctionSnafu)?;
let column_schemas = vec![ColumnSchema::new(
// Use statement as the result column name
stmt.to_string(),
admin_func
.return_type(&arg_types)
.context(error::ExecuteAdminFunctionSnafu)?,
false,
)];
let schema = Arc::new(Schema::new(column_schemas));
let batch =
RecordBatch::new(schema.clone(), vec![result]).context(error::BuildRecordBatchSnafu)?;
let batches =
RecordBatches::try_new(schema, vec![batch]).context(error::BuildRecordBatchSnafu)?;
Ok(Output::new_with_record_batches(batches))
}
}
/// Try to cast the arguments to vectors by function's signature.
fn args_to_vector(
type_signature: &TypeSignature,
args: &Vec<&SqlValue>,
query_ctx: &QueryContextRef,
) -> Result<Vec<VectorRef>> {
let tz = query_ctx.timezone();
match type_signature {
TypeSignature::Variadic(valid_types) => {
values_to_vectors_by_valid_types(valid_types, args, Some(&tz))
}
TypeSignature::Uniform(arity, valid_types) => {
ensure!(
*arity == args.len(),
error::FunctionArityMismatchSnafu {
actual: args.len(),
expected: *arity,
}
);
values_to_vectors_by_valid_types(valid_types, args, Some(&tz))
}
TypeSignature::Exact(data_types) => {
values_to_vectors_by_exact_types(data_types, args, Some(&tz))
}
TypeSignature::VariadicAny => {
let data_types = args
.iter()
.map(|value| try_get_data_type_for_sql_value(value))
.collect::<Result<Vec<_>>>()?;
values_to_vectors_by_exact_types(&data_types, args, Some(&tz))
}
TypeSignature::Any(arity) => {
ensure!(
*arity == args.len(),
error::FunctionArityMismatchSnafu {
actual: args.len(),
expected: *arity,
}
);
let data_types = args
.iter()
.map(|value| try_get_data_type_for_sql_value(value))
.collect::<Result<Vec<_>>>()?;
values_to_vectors_by_exact_types(&data_types, args, Some(&tz))
}
TypeSignature::OneOf(type_sigs) => {
for type_sig in type_sigs {
if let Ok(vectors) = args_to_vector(type_sig, args, query_ctx) {
return Ok(vectors);
}
}
error::BuildAdminFunctionArgsSnafu {
msg: "function signature not match",
}
.fail()
}
}
}
/// Try to cast sql values to vectors by exact data types.
fn values_to_vectors_by_exact_types(
exact_types: &[ConcreteDataType],
args: &[&SqlValue],
tz: Option<&Timezone>,
) -> Result<Vec<VectorRef>> {
args.iter()
.zip(exact_types.iter())
.map(|(value, data_type)| {
let value = sql_value_to_value(DUMMY_COLUMN, data_type, value, tz, None)
.context(error::ParseSqlValueSnafu)?;
Ok(value_to_vector(value))
})
.collect()
}
/// Try to cast sql values to vectors by valid data types.
fn values_to_vectors_by_valid_types(
valid_types: &[ConcreteDataType],
args: &[&SqlValue],
tz: Option<&Timezone>,
) -> Result<Vec<VectorRef>> {
args.iter()
.map(|value| {
for data_type in valid_types {
if let Ok(value) = sql_value_to_value(DUMMY_COLUMN, data_type, value, tz, None) {
return Ok(value_to_vector(value));
}
}
error::BuildAdminFunctionArgsSnafu {
msg: "failed to cast {value}",
}
.fail()
})
.collect::<Result<Vec<_>>>()
}
/// Build a [`VectorRef`] from [`Value`]
fn value_to_vector(value: Value) -> VectorRef {
let data_type = value.data_type();
let mut mutable_vector = data_type.create_mutable_vector(1);
mutable_vector.push_value_ref(value.as_value_ref());
mutable_vector.to_vector()
}
/// Try to infer the data type from sql value.
fn try_get_data_type_for_sql_value(value: &SqlValue) -> Result<ConcreteDataType> {
match value {
SqlValue::Number(_, _) => Ok(ConcreteDataType::float64_datatype()),
SqlValue::Null => Ok(ConcreteDataType::null_datatype()),
SqlValue::Boolean(_) => Ok(ConcreteDataType::boolean_datatype()),
SqlValue::HexStringLiteral(_)
| SqlValue::DoubleQuotedString(_)
| SqlValue::SingleQuotedString(_) => Ok(ConcreteDataType::string_datatype()),
_ => error::BuildAdminFunctionArgsSnafu {
msg: format!("unsupported sql value: {value}"),
}
.fail(),
}
}

View File

@@ -284,7 +284,7 @@ where
let mut search_from = 0;
// because of the key in the json map is ordered
for (payload_key, payload_value) in map.into_iter() {
if search_from >= self.required_keys.len() - 1 {
if search_from >= self.required_keys.len() {
break;
}
@@ -359,15 +359,16 @@ mod tests {
#[test]
fn test_pipeline_prepare() {
let input_value_str = r#"
{
let input_value_str = r#"
{
"my_field": "1,2",
"foo": "bar"
}
"#;
let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
let pipeline_yaml = r#"
let pipeline_yaml = r#"
---
description: Pipeline for Apache Tomcat
@@ -381,32 +382,73 @@ transform:
- field: field2
type: uint32
"#;
let pipeline: Pipeline<GreptimeTransformer> =
parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
let mut payload = pipeline.init_intermediate_state();
pipeline.prepare(input_value, &mut payload).unwrap();
assert_eq!(
&["greptime_timestamp", "my_field"].to_vec(),
pipeline.required_keys()
);
assert_eq!(
payload,
vec![
Value::Null,
Value::String("1,2".to_string()),
Value::Null,
Value::Null
]
);
let result = pipeline.exec_mut(&mut payload).unwrap();
let pipeline: Pipeline<GreptimeTransformer> =
parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
let mut payload = pipeline.init_intermediate_state();
pipeline.prepare(input_value, &mut payload).unwrap();
assert_eq!(
&["greptime_timestamp", "my_field"].to_vec(),
pipeline.required_keys()
);
assert_eq!(
payload,
vec![
Value::Null,
Value::String("1,2".to_string()),
Value::Null,
Value::Null
]
);
let result = pipeline.exec_mut(&mut payload).unwrap();
assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
match &result.values[2].value_data {
Some(ValueData::TimestampNanosecondValue(v)) => {
assert_ne!(*v, 0);
assert_eq!(result.values[0].value_data, Some(ValueData::U32Value(1)));
assert_eq!(result.values[1].value_data, Some(ValueData::U32Value(2)));
match &result.values[2].value_data {
Some(ValueData::TimestampNanosecondValue(v)) => {
assert_ne!(*v, 0);
}
_ => panic!("expect null value"),
}
_ => panic!("expect null value"),
}
{
let input_value_str = r#"
{
"reqTimeSec": "1573840000.000"
}
"#;
let pipeline_yaml = r#"
---
description: Pipeline for Demo Log
processors:
- gsub:
field: reqTimeSec
pattern: "\\."
replacement: ""
- epoch:
field: reqTimeSec
resolution: millisecond
ignore_missing: true
transform:
- field: reqTimeSec
type: epoch, millisecond
index: timestamp
"#;
let input_value: serde_json::Value = serde_json::from_str(input_value_str).unwrap();
let pipeline: Pipeline<GreptimeTransformer> =
parse(&Content::Yaml(pipeline_yaml.into())).unwrap();
let mut payload = pipeline.init_intermediate_state();
pipeline.prepare(input_value, &mut payload).unwrap();
assert_eq!(&["reqTimeSec"].to_vec(), pipeline.required_keys());
assert_eq!(payload, vec![Value::String("1573840000.000".to_string())]);
let result = pipeline.exec_mut(&mut payload).unwrap();
assert_eq!(
result.values[0].value_data,
Some(ValueData::TimestampMillisecondValue(1573840000000))
);
}
}

View File

@@ -47,7 +47,7 @@ use crate::extension_plan::Millisecond;
/// Empty source plan that generate record batch with two columns:
/// - time index column, computed from start, end and interval
/// - value column, generated by the input expr. The expr should not
/// reference any column except the time index column.
/// reference any column except the time index column.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct EmptyMetric {
start: Millisecond,

View File

@@ -205,11 +205,12 @@ impl ExecutionPlan for SeriesDivideExec {
.collect();
Ok(Box::pin(SeriesDivideStream {
tag_indices,
buffer: None,
buffer: vec![],
schema,
input,
metric: baseline_metric,
num_series: 0,
inspect_start: 0,
}))
}
@@ -231,11 +232,13 @@ impl DisplayAs for SeriesDivideExec {
/// Assume the input stream is ordered on the tag columns.
pub struct SeriesDivideStream {
tag_indices: Vec<usize>,
buffer: Option<RecordBatch>,
buffer: Vec<RecordBatch>,
schema: SchemaRef,
input: SendableRecordBatchStream,
metric: BaselineMetrics,
num_series: usize,
/// Index of buffered batches to start inspect next time.
inspect_start: usize,
}
impl RecordBatchStream for SeriesDivideStream {
@@ -248,30 +251,45 @@ impl Stream for SeriesDivideStream {
type Item = DataFusionResult<RecordBatch>;
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let timer = std::time::Instant::now();
loop {
if let Some(batch) = self.buffer.as_ref() {
let same_length = self.find_first_diff_row(batch) + 1;
if same_length >= batch.num_rows() {
if !self.buffer.is_empty() {
let cut_at = self.find_first_diff_row();
if let Some((batch_index, row_index)) = cut_at {
// slice out the first time series and return it.
let half_batch_of_first_series =
self.buffer[batch_index].slice(0, row_index + 1);
let half_batch_of_second_series = self.buffer[batch_index].slice(
row_index + 1,
self.buffer[batch_index].num_rows() - row_index - 1,
);
let result_batches = self
.buffer
.drain(0..batch_index)
.chain([half_batch_of_first_series])
.collect::<Vec<_>>();
self.buffer[0] = half_batch_of_second_series;
let result_batch = compute::concat_batches(&self.schema, &result_batches)?;
self.inspect_start = 0;
self.num_series += 1;
self.metric.elapsed_compute().add_elapsed(timer);
return Poll::Ready(Some(Ok(result_batch)));
} else {
// continue to fetch next batch as the current buffer only contains one time series.
let next_batch = ready!(self.as_mut().fetch_next_batch(cx)).transpose()?;
// SAFETY: if-let guards the buffer is not None;
// and we cannot change the buffer at this point.
let batch = self.buffer.take().expect("this batch must exist");
if let Some(next_batch) = next_batch {
self.buffer = Some(compute::concat_batches(
&batch.schema(),
&[batch, next_batch],
)?);
self.buffer.push(next_batch);
continue;
} else {
// input stream is ended
let result = compute::concat_batches(&self.schema, &self.buffer)?;
self.buffer.clear();
self.inspect_start = 0;
self.num_series += 1;
return Poll::Ready(Some(Ok(batch)));
self.metric.elapsed_compute().add_elapsed(timer);
return Poll::Ready(Some(Ok(result)));
}
} else {
let result_batch = batch.slice(0, same_length);
let remaining_batch = batch.slice(same_length, batch.num_rows() - same_length);
self.buffer = Some(remaining_batch);
self.num_series += 1;
return Poll::Ready(Some(Ok(result_batch)));
}
} else {
let batch = match ready!(self.as_mut().fetch_next_batch(cx)) {
@@ -282,7 +300,7 @@ impl Stream for SeriesDivideStream {
}
error => return Poll::Ready(error),
};
self.buffer = Some(batch);
self.buffer.push(batch);
continue;
}
}
@@ -294,40 +312,72 @@ impl SeriesDivideStream {
mut self: Pin<&mut Self>,
cx: &mut Context<'_>,
) -> Poll<Option<DataFusionResult<RecordBatch>>> {
let poll = match self.input.poll_next_unpin(cx) {
Poll::Ready(batch) => {
let _timer = self.metric.elapsed_compute().timer();
Poll::Ready(batch)
}
Poll::Pending => Poll::Pending,
};
let poll = self.input.poll_next_unpin(cx);
self.metric.record_poll(poll)
}
fn find_first_diff_row(&self, batch: &RecordBatch) -> usize {
/// Return the position to cut buffer.
/// None implies the current buffer only contains one time series.
fn find_first_diff_row(&mut self) -> Option<(usize, usize)> {
// fast path: no tag columns means all data belongs to the same series.
if self.tag_indices.is_empty() {
return batch.num_rows();
return None;
}
let num_rows = batch.num_rows();
let mut result = num_rows;
let mut resumed_batch_index = self.inspect_start;
for index in &self.tag_indices {
let array = batch.column(*index);
let string_array = array.as_any().downcast_ref::<StringArray>().unwrap();
// the first row number that not equal to the next row.
let mut same_until = 0;
while same_until < num_rows - 1 {
if string_array.value(same_until) != string_array.value(same_until + 1) {
break;
for batch in &self.buffer[resumed_batch_index..] {
let num_rows = batch.num_rows();
let mut result_index = num_rows;
// check if the first row is the same with last batch's last row
if resumed_batch_index > self.inspect_start {
let last_batch = &self.buffer[resumed_batch_index - 1];
let last_row = last_batch.num_rows() - 1;
for index in &self.tag_indices {
let current_array = batch.column(*index);
let last_array = last_batch.column(*index);
let current_value = current_array
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.value(0);
let last_value = last_array
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.value(last_row);
if current_value != last_value {
return Some((resumed_batch_index, 0));
}
}
same_until += 1;
}
result = result.min(same_until);
// check column by column
for index in &self.tag_indices {
let array = batch.column(*index);
let string_array = array.as_any().downcast_ref::<StringArray>().unwrap();
// the first row number that not equal to the next row.
let mut same_until = 0;
while same_until < num_rows - 1 {
if string_array.value(same_until) != string_array.value(same_until + 1) {
break;
}
same_until += 1;
}
result_index = result_index.min(same_until);
}
if result_index + 1 >= num_rows {
// all rows are the same, inspect next batch
resumed_batch_index += 1;
} else {
return Some((resumed_batch_index, result_index));
}
}
result
self.inspect_start = resumed_batch_index;
None
}
}

View File

@@ -32,11 +32,12 @@ use crate::range_array::RangeArray;
/// There are 3 variants of smoothing functions:
/// 1) "Simple exponential smoothing": only the `level` component (the weighted average of the observations) is used to make forecasts.
/// This method is applied for time-series data that does not exhibit trend or seasonality.
/// This method is applied for time-series data that does not exhibit trend or seasonality.
/// 2) "Holt's linear method" (a.k.a. "double exponential smoothing"): `level` and `trend` components are used to make forecasts.
/// This method is applied for time-series data that exhibits trend but not seasonality.
/// This method is applied for time-series data that exhibits trend but not seasonality.
/// 3) "Holt-Winter's method" (a.k.a. "triple exponential smoothing"): `level`, `trend`, and `seasonality` are used to make forecasts.
/// This method is applied for time-series data that exhibits both trend and seasonality.
///
/// This method is applied for time-series data that exhibits both trend and seasonality.
///
/// In order to keep the parity with the Prometheus functions we had to follow the same naming ("HoltWinters"), however
/// the "Holt's linear"("double exponential smoothing") suits better and reflects implementation.

View File

@@ -34,7 +34,7 @@
//! - bit 0 (lowest bit): whether `FooterPayload` is compressed
//! - all other bits are reserved for future use and should be set to 0 on write
//! * all other bytes are reserved for future use and should be set to 0 on write
//! A 4 byte integer is always signed, in a twos complement representation, stored little-endian.
//! A 4 byte integer is always signed, in a twos complement representation, stored little-endian.
//!
//! ## Footer Payload
//!

View File

@@ -447,6 +447,10 @@ impl QueryEngine for DatafusionQueryEngine {
state.config_mut().set_extension(query_ctx.clone());
QueryEngineContext::new(state, query_ctx)
}
fn engine_state(&self) -> &QueryEngineState {
&self.state
}
}
impl QueryExecutor for DatafusionQueryEngine {

View File

@@ -17,7 +17,9 @@
use std::any::Any;
use std::sync::{Arc, Mutex};
use api::v1::SemanticType;
use async_trait::async_trait;
use common_recordbatch::filter::SimpleFilterEvaluator;
use common_recordbatch::OrderOption;
use datafusion::catalog::schema::SchemaProvider;
use datafusion::catalog::{CatalogProvider, CatalogProviderList};
@@ -177,7 +179,27 @@ impl TableProvider for DummyTableProvider {
&self,
filters: &[&Expr],
) -> datafusion::error::Result<Vec<TableProviderFilterPushDown>> {
Ok(vec![TableProviderFilterPushDown::Inexact; filters.len()])
let supported = filters
.iter()
.map(|e| {
// Simple filter on primary key columns are precisely evaluated.
if let Some(simple_filter) = SimpleFilterEvaluator::try_new(e) {
if self
.metadata
.column_by_name(simple_filter.column_name())
.and_then(|c| (c.semantic_type == SemanticType::Tag).then_some(()))
.is_some()
{
TableProviderFilterPushDown::Exact
} else {
TableProviderFilterPushDown::Inexact
}
} else {
TableProviderFilterPushDown::Inexact
}
})
.collect();
Ok(supported)
}
}

Some files were not shown because too many files have changed in this diff Show More