Compare commits

..

16 Commits

Author SHA1 Message Date
Discord9
f995204060 test: more reduce tests 2023-09-06 16:38:51 +08:00
Discord9
93561291e4 support more binary function 2023-09-06 16:38:51 +08:00
Discord9
9f59d68391 eval func 2023-09-06 16:37:49 +08:00
Discord9
51083b12bd reduce_bucketed 2023-09-06 16:37:49 +08:00
Discord9
c80165c377 test: simple render 2023-09-06 16:37:49 +08:00
Discord9
76d8709774 sink&source 2023-09-06 16:37:49 +08:00
Discord9
2cf7d6d569 feat: build_accumulable 2023-09-06 16:37:49 +08:00
Discord9
045c8079e6 feat: flow util func 2023-09-06 16:37:49 +08:00
Discord9
54f2f6495f mfp & reduce partially 2023-09-06 16:37:49 +08:00
Discord9
2798d266f5 feat: render plan partially writen 2023-09-06 16:37:49 +08:00
Discord9
824d03a642 working on reduce 2023-09-06 16:36:41 +08:00
Discord9
47f41371d0 Arrangement&types 2023-09-06 16:36:41 +08:00
Discord9
d702b6e5c4 use newer DD 2023-09-06 16:36:41 +08:00
Discord9
13c02f3f92 basic skeleton 2023-09-06 16:36:41 +08:00
Discord9
b52eb2313e renamed as greptime-flow 2023-09-06 16:36:41 +08:00
Discord9
d422bc8401 basic demo 2023-09-06 16:36:41 +08:00
1275 changed files with 79645 additions and 80528 deletions

View File

@@ -12,4 +12,9 @@ rustflags = [
"-Wclippy::print_stdout", "-Wclippy::print_stdout",
"-Wclippy::print_stderr", "-Wclippy::print_stderr",
"-Wclippy::implicit_clone", "-Wclippy::implicit_clone",
# It seems clippy has made a false positive decision here when upgrading rust toolchain to
# nightly-2023-08-07, we do need it to be borrowed mutably.
# Allow it for now; try disallow it when the toolchain is upgraded in the future.
"-Aclippy::needless_pass_by_ref_mut",
] ]

View File

@@ -41,27 +41,13 @@ body:
required: true required: true
- type: textarea - type: textarea
id: reproduce id: what-happened
attributes: attributes:
label: Minimal reproduce step label: What happened?
description: | description: |
Please walk us through and provide steps and details on how Tell us what happened and also what you would have expected to
to reproduce the issue. If possible, provide scripts that we happen instead.
can run to trigger the bug. placeholder: "Describe the bug"
validations:
required: true
- type: textarea
id: expected-manner
attributes:
label: What did you expect to see?
validations:
required: true
- type: textarea
id: actual-manner
attributes:
label: What did you see instead?
validations: validations:
required: true required: true
@@ -86,3 +72,14 @@ body:
trace. This will be automatically formatted into code, so no trace. This will be automatically formatted into code, so no
need for backticks. need for backticks.
render: bash render: bash
- type: textarea
id: reproduce
attributes:
label: How can we reproduce the bug?
description: |
Please walk us through and provide steps and details on how
to reproduce the issue. If possible, provide scripts that we
can run to trigger the bug.
validations:
required: true

View File

@@ -0,0 +1,93 @@
name: Build and push dev-builder image
description: Build and push dev-builder image to DockerHub and ACR
inputs:
dockerhub-image-registry:
description: The dockerhub image registry to store the images
required: false
default: docker.io
dockerhub-image-registry-username:
description: The dockerhub username to login to the image registry
required: true
dockerhub-image-registry-token:
description: The dockerhub token to login to the image registry
required: true
dockerhub-image-namespace:
description: The dockerhub namespace of the image registry to store the images
required: false
default: greptime
acr-image-registry:
description: The ACR image registry to store the images
required: true
acr-image-registry-username:
description: The ACR username to login to the image registry
required: true
acr-image-registry-password:
description: The ACR password to login to the image registry
required: true
acr-image-namespace:
description: The ACR namespace of the image registry to store the images
required: false
default: greptime
version:
description: Version of the dev-builder
required: false
default: latest
runs:
using: composite
steps:
- name: Login to Dockerhub
uses: docker/login-action@v2
with:
registry: ${{ inputs.dockerhub-image-registry }}
username: ${{ inputs.dockerhub-image-registry-username }}
password: ${{ inputs.dockerhub-image-registry-token }}
- name: Build and push ubuntu dev builder image to dockerhub
shell: bash
run:
make dev-builder \
BASE_IMAGE=ubuntu \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
- name: Build and push centos dev builder image to dockerhub
shell: bash
run:
make dev-builder \
BASE_IMAGE=centos \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
- name: Login to ACR
uses: docker/login-action@v2
continue-on-error: true
with:
registry: ${{ inputs.acr-image-registry }}
username: ${{ inputs.acr-image-registry-username }}
password: ${{ inputs.acr-image-registry-password }}
- name: Build and push ubuntu dev builder image to ACR
shell: bash
continue-on-error: true
run: # buildx will cache the images that already built, so it will not take long time to build the images again.
make dev-builder \
BASE_IMAGE=ubuntu \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.acr-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.acr-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
- name: Build and push centos dev builder image to ACR
shell: bash
continue-on-error: true
run: # buildx will cache the images that already built, so it will not take long time to build the images again.
make dev-builder \
BASE_IMAGE=centos \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.acr-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.acr-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}

View File

@@ -1,76 +0,0 @@
name: Build and push dev-builder images
description: Build and push dev-builder images to DockerHub and ACR
inputs:
dockerhub-image-registry:
description: The dockerhub image registry to store the images
required: false
default: docker.io
dockerhub-image-registry-username:
description: The dockerhub username to login to the image registry
required: true
dockerhub-image-registry-token:
description: The dockerhub token to login to the image registry
required: true
dockerhub-image-namespace:
description: The dockerhub namespace of the image registry to store the images
required: false
default: greptime
version:
description: Version of the dev-builder
required: false
default: latest
build-dev-builder-ubuntu:
description: Build dev-builder-ubuntu image
required: false
default: 'true'
build-dev-builder-centos:
description: Build dev-builder-centos image
required: false
default: 'true'
build-dev-builder-android:
description: Build dev-builder-android image
required: false
default: 'true'
runs:
using: composite
steps:
- name: Login to Dockerhub
uses: docker/login-action@v2
with:
registry: ${{ inputs.dockerhub-image-registry }}
username: ${{ inputs.dockerhub-image-registry-username }}
password: ${{ inputs.dockerhub-image-registry-token }}
- name: Build and push dev-builder-ubuntu image
shell: bash
if: ${{ inputs.build-dev-builder-ubuntu == 'true' }}
run: |
make dev-builder \
BASE_IMAGE=ubuntu \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
- name: Build and push dev-builder-centos image
shell: bash
if: ${{ inputs.build-dev-builder-centos == 'true' }}
run: |
make dev-builder \
BASE_IMAGE=centos \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
- name: Build and push dev-builder-android image # Only build image for amd64 platform.
shell: bash
if: ${{ inputs.build-dev-builder-android == 'true' }}
run: |
make dev-builder \
BASE_IMAGE=android \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }} && \
docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}

View File

@@ -16,20 +16,35 @@ inputs:
version: version:
description: Version of the artifact description: Version of the artifact
required: true required: true
release-to-s3-bucket:
description: S3 bucket to store released artifacts
required: true
aws-access-key-id:
description: AWS access key id
required: true
aws-secret-access-key:
description: AWS secret access key
required: true
aws-region:
description: AWS region
required: true
upload-to-s3:
description: Upload to S3
required: false
default: 'true'
upload-latest-artifacts:
description: Upload the latest artifacts to S3
required: false
default: 'true'
working-dir: working-dir:
description: Working directory to build the artifacts description: Working directory to build the artifacts
required: false required: false
default: . default: .
build-android-artifacts:
description: Build android artifacts
required: false
default: 'false'
runs: runs:
using: composite using: composite
steps: steps:
- name: Build greptime binary - name: Build greptime binary
shell: bash shell: bash
if: ${{ inputs.build-android-artifacts == 'false' }}
run: | run: |
cd ${{ inputs.working-dir }} && \ cd ${{ inputs.working-dir }} && \
make build-by-dev-builder \ make build-by-dev-builder \
@@ -39,25 +54,14 @@ runs:
- name: Upload artifacts - name: Upload artifacts
uses: ./.github/actions/upload-artifacts uses: ./.github/actions/upload-artifacts
if: ${{ inputs.build-android-artifacts == 'false' }}
with: with:
artifacts-dir: ${{ inputs.artifacts-dir }} artifacts-dir: ${{ inputs.artifacts-dir }}
target-file: ./target/${{ inputs.cargo-profile }}/greptime target-file: ./target/${{ inputs.cargo-profile }}/greptime
version: ${{ inputs.version }} version: ${{ inputs.version }}
working-dir: ${{ inputs.working-dir }} release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
# TODO(zyy17): We can remove build-android-artifacts flag in the future. aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
- name: Build greptime binary aws-region: ${{ inputs.aws-region }}
shell: bash upload-to-s3: ${{ inputs.upload-to-s3 }}
if: ${{ inputs.build-android-artifacts == 'true' }} upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
run: |
cd ${{ inputs.working-dir }} && make strip-android-bin
- name: Upload android artifacts
uses: ./.github/actions/upload-artifacts
if: ${{ inputs.build-android-artifacts == 'true' }}
with:
artifacts-dir: ${{ inputs.artifacts-dir }}
target-file: ./target/aarch64-linux-android/release/greptime
version: ${{ inputs.version }}
working-dir: ${{ inputs.working-dir }} working-dir: ${{ inputs.working-dir }}

View File

@@ -13,10 +13,30 @@ inputs:
disable-run-tests: disable-run-tests:
description: Disable running integration tests description: Disable running integration tests
required: true required: true
release-to-s3-bucket:
description: S3 bucket to store released artifacts
required: true
aws-access-key-id:
description: AWS access key id
required: true
aws-secret-access-key:
description: AWS secret access key
required: true
aws-region:
description: AWS region
required: true
dev-mode: dev-mode:
description: Enable dev mode, only build standard greptime description: Enable dev mode, only build standard greptime
required: false required: false
default: 'false' default: 'false'
upload-to-s3:
description: Upload to S3
required: false
default: 'true'
upload-latest-artifacts:
description: Upload the latest artifacts to S3
required: false
default: 'true'
working-dir: working-dir:
description: Working directory to build the artifacts description: Working directory to build the artifacts
required: false required: false
@@ -48,6 +68,12 @@ runs:
cargo-profile: ${{ inputs.cargo-profile }} cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-pyo3-${{ inputs.version }} artifacts-dir: greptime-linux-${{ inputs.arch }}-pyo3-${{ inputs.version }}
version: ${{ inputs.version }} version: ${{ inputs.version }}
release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
upload-to-s3: ${{ inputs.upload-to-s3 }}
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
working-dir: ${{ inputs.working-dir }} working-dir: ${{ inputs.working-dir }}
- name: Build greptime without pyo3 - name: Build greptime without pyo3
@@ -59,6 +85,12 @@ runs:
cargo-profile: ${{ inputs.cargo-profile }} cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }} artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
version: ${{ inputs.version }} version: ${{ inputs.version }}
release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
upload-to-s3: ${{ inputs.upload-to-s3 }}
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
working-dir: ${{ inputs.working-dir }} working-dir: ${{ inputs.working-dir }}
- name: Clean up the target directory # Clean up the target directory for the centos7 base image, or it will still use the objects of last build. - name: Clean up the target directory # Clean up the target directory for the centos7 base image, or it will still use the objects of last build.
@@ -75,14 +107,10 @@ runs:
cargo-profile: ${{ inputs.cargo-profile }} cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }} artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
version: ${{ inputs.version }} version: ${{ inputs.version }}
release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
upload-to-s3: ${{ inputs.upload-to-s3 }}
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
working-dir: ${{ inputs.working-dir }} working-dir: ${{ inputs.working-dir }}
- name: Build greptime on android base image
uses: ./.github/actions/build-greptime-binary
if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Only build android base image on amd64.
with:
base-image: android
artifacts-dir: greptime-android-arm64-${{ inputs.version }}
version: ${{ inputs.version }}
working-dir: ${{ inputs.working-dir }}
build-android-artifacts: true

View File

@@ -19,9 +19,21 @@ inputs:
disable-run-tests: disable-run-tests:
description: Disable running integration tests description: Disable running integration tests
required: true required: true
release-to-s3-bucket:
description: S3 bucket to store released artifacts
required: true
artifacts-dir: artifacts-dir:
description: Directory to store artifacts description: Directory to store artifacts
required: true required: true
aws-access-key-id:
description: AWS access key id
required: true
aws-secret-access-key:
description: AWS secret access key
required: true
aws-region:
description: AWS region
required: true
runs: runs:
using: composite using: composite
steps: steps:
@@ -87,3 +99,7 @@ runs:
artifacts-dir: ${{ inputs.artifacts-dir }} artifacts-dir: ${{ inputs.artifacts-dir }}
target-file: target/${{ inputs.arch }}/${{ inputs.cargo-profile }}/greptime target-file: target/${{ inputs.arch }}/${{ inputs.cargo-profile }}/greptime
version: ${{ inputs.version }} version: ${{ inputs.version }}
release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}

View File

@@ -1,80 +0,0 @@
name: Build Windows artifacts
description: Build Windows artifacts
inputs:
arch:
description: Architecture to build
required: true
rust-toolchain:
description: Rust toolchain to use
required: true
cargo-profile:
description: Cargo profile to build
required: true
features:
description: Cargo features to build
required: true
version:
description: Version of the artifact
required: true
disable-run-tests:
description: Disable running integration tests
required: true
artifacts-dir:
description: Directory to store artifacts
required: true
runs:
using: composite
steps:
- uses: arduino/setup-protoc@v1
- name: Install rust toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ inputs.rust-toolchain }}
targets: ${{ inputs.arch }}
components: llvm-tools-preview
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install PyArrow Package
shell: pwsh
run: pip install pyarrow
- name: Install WSL distribution
uses: Vampire/setup-wsl@v2
with:
distribution: Ubuntu-22.04
- name: Install latest nextest release # For integration tests.
if: ${{ inputs.disable-run-tests == 'false' }}
uses: taiki-e/install-action@nextest
- name: Run integration tests
if: ${{ inputs.disable-run-tests == 'false' }}
shell: pwsh
run: make test sqlness-test
- name: Upload sqlness logs
if: ${{ failure() }} # Only upload logs when the integration tests failed.
uses: actions/upload-artifact@v3
with:
name: sqlness-logs
path: ${{ runner.temp }}/greptime-*.log
retention-days: 3
- name: Build greptime binary
shell: pwsh
run: cargo build --profile ${{ inputs.cargo-profile }} --features ${{ inputs.features }} --target ${{ inputs.arch }}
- name: Upload artifacts
uses: ./.github/actions/upload-artifacts
with:
artifacts-dir: ${{ inputs.artifacts-dir }}
target-file: target/${{ inputs.arch }}/${{ inputs.cargo-profile }}/greptime
version: ${{ inputs.version }}

View File

@@ -1,31 +0,0 @@
name: Deploy GreptimeDB cluster
description: Deploy GreptimeDB cluster on Kubernetes
inputs:
aws-ci-test-bucket:
description: 'AWS S3 bucket name for testing'
required: true
aws-region:
description: 'AWS region for testing'
required: true
data-root:
description: 'Data root for testing'
required: true
aws-access-key-id:
description: 'AWS access key id for testing'
required: true
aws-secret-access-key:
description: 'AWS secret access key for testing'
required: true
runs:
using: composite
steps:
- name: Deploy GreptimeDB by Helm
shell: bash
env:
DATA_ROOT: ${{ inputs.data-root }}
AWS_CI_TEST_BUCKET: ${{ inputs.aws-ci-test-bucket }}
AWS_REGION: ${{ inputs.aws-region }}
AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }}
run: |
./.github/scripts/deploy-greptimedb.sh

View File

@@ -1,5 +1,5 @@
name: Publish GitHub release name: Release artifacts
description: Publish GitHub release description: Release artifacts
inputs: inputs:
version: version:
description: Version to release description: Version to release
@@ -31,12 +31,10 @@ runs:
echo "prerelease=false" >> $GITHUB_ENV echo "prerelease=false" >> $GITHUB_ENV
echo "makeLatest=true" >> $GITHUB_ENV echo "makeLatest=true" >> $GITHUB_ENV
echo "generateReleaseNotes=false" >> $GITHUB_ENV echo "generateReleaseNotes=false" >> $GITHUB_ENV
echo "omitBody=true" >> $GITHUB_ENV
else else
echo "prerelease=true" >> $GITHUB_ENV echo "prerelease=true" >> $GITHUB_ENV
echo "makeLatest=false" >> $GITHUB_ENV echo "makeLatest=false" >> $GITHUB_ENV
echo "generateReleaseNotes=true" >> $GITHUB_ENV echo "generateReleaseNotes=true" >> $GITHUB_ENV
echo "omitBody=false" >> $GITHUB_ENV
fi fi
- name: Publish release - name: Publish release
@@ -47,7 +45,6 @@ runs:
makeLatest: ${{ env.makeLatest }} makeLatest: ${{ env.makeLatest }}
tag: ${{ inputs.version }} tag: ${{ inputs.version }}
generateReleaseNotes: ${{ env.generateReleaseNotes }} generateReleaseNotes: ${{ env.generateReleaseNotes }}
omitBody: ${{ env.omitBody }} # omitBody is true when the release is a official release.
allowUpdates: true allowUpdates: true
artifacts: | artifacts: |
**/greptime-*/* **/greptime-*/*

View File

@@ -1,138 +0,0 @@
name: Release CN artifacts
description: Release artifacts to CN region
inputs:
src-image-registry:
description: The source image registry to store the images
required: true
default: docker.io
src-image-namespace:
description: The namespace of the source image registry to store the images
required: true
default: greptime
src-image-name:
description: The name of the source image
required: false
default: greptimedb
dst-image-registry:
description: The destination image registry to store the images
required: true
dst-image-namespace:
description: The namespace of the destination image registry to store the images
required: true
default: greptime
dst-image-registry-username:
description: The username to login to the image registry
required: true
dst-image-registry-password:
description: The password to login to the image registry
required: true
version:
description: Version of the artifact
required: true
dev-mode:
description: Enable dev mode, only push standard greptime
required: false
default: 'false'
push-latest-tag:
description: Whether to push the latest tag of the image
required: false
default: 'true'
aws-cn-s3-bucket:
description: S3 bucket to store released artifacts in CN region
required: true
aws-cn-access-key-id:
description: AWS access key id in CN region
required: true
aws-cn-secret-access-key:
description: AWS secret access key in CN region
required: true
aws-cn-region:
description: AWS region in CN
required: true
upload-to-s3:
description: Upload to S3
required: false
default: 'true'
artifacts-dir:
description: Directory to store artifacts
required: false
default: 'artifacts'
update-version-info:
description: Update the version info in S3
required: false
default: 'true'
upload-max-retry-times:
description: Max retry times for uploading artifacts to S3
required: false
default: "20"
upload-retry-timeout:
description: Timeout for uploading artifacts to S3
required: false
default: "30" # minutes
runs:
using: composite
steps:
- name: Download artifacts
uses: actions/download-artifact@v3
with:
path: ${{ inputs.artifacts-dir }}
- name: Release artifacts to cn region
uses: nick-invision/retry@v2
if: ${{ inputs.upload-to-s3 == 'true' }}
env:
AWS_ACCESS_KEY_ID: ${{ inputs.aws-cn-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-cn-secret-access-key }}
AWS_DEFAULT_REGION: ${{ inputs.aws-cn-region }}
UPDATE_VERSION_INFO: ${{ inputs.update-version-info }}
with:
max_attempts: ${{ inputs.upload-max-retry-times }}
timeout_minutes: ${{ inputs.upload-retry-timeout }}
command: |
./.github/scripts/upload-artifacts-to-s3.sh \
${{ inputs.artifacts-dir }} \
${{ inputs.version }} \
${{ inputs.aws-cn-s3-bucket }}
- name: Push greptimedb image from Dockerhub to ACR
shell: bash
env:
DST_REGISTRY_USERNAME: ${{ inputs.dst-image-registry-username }}
DST_REGISTRY_PASSWORD: ${{ inputs.dst-image-registry-password }}
run: |
./.github/scripts/copy-image.sh \
${{ inputs.src-image-registry }}/${{ inputs.src-image-namespace }}/${{ inputs.src-image-name }}:${{ inputs.version }} \
${{ inputs.dst-image-registry }}/${{ inputs.dst-image-namespace }}
- name: Push latest greptimedb image from Dockerhub to ACR
shell: bash
if: ${{ inputs.push-latest-tag == 'true' }}
env:
DST_REGISTRY_USERNAME: ${{ inputs.dst-image-registry-username }}
DST_REGISTRY_PASSWORD: ${{ inputs.dst-image-registry-password }}
run: |
./.github/scripts/copy-image.sh \
${{ inputs.src-image-registry }}/${{ inputs.src-image-namespace }}/${{ inputs.src-image-name }}:latest \
${{ inputs.dst-image-registry }}/${{ inputs.dst-image-namespace }}
- name: Push greptimedb-centos image from DockerHub to ACR
shell: bash
if: ${{ inputs.dev-mode == 'false' }}
env:
DST_REGISTRY_USERNAME: ${{ inputs.dst-image-registry-username }}
DST_REGISTRY_PASSWORD: ${{ inputs.dst-image-registry-password }}
run: |
./.github/scripts/copy-image.sh \
${{ inputs.src-image-registry }}/${{ inputs.src-image-namespace }}/${{ inputs.src-image-name }}-centos:latest \
${{ inputs.dst-image-registry }}/${{ inputs.dst-image-namespace }}
- name: Push greptimedb-centos image from DockerHub to ACR
shell: bash
if: ${{ inputs.dev-mode == 'false' && inputs.push-latest-tag == 'true' }}
env:
DST_REGISTRY_USERNAME: ${{ inputs.dst-image-registry-username }}
DST_REGISTRY_PASSWORD: ${{ inputs.dst-image-registry-password }}
run: |
./.github/scripts/copy-image.sh \
${{ inputs.src-image-registry }}/${{ inputs.src-image-namespace }}/${{ inputs.src-image-name }}-centos:latest \
${{ inputs.dst-image-registry }}/${{ inputs.dst-image-namespace }}

View File

@@ -1,59 +0,0 @@
name: Run sqlness test
description: Run sqlness test on GreptimeDB
inputs:
aws-ci-test-bucket:
description: 'AWS S3 bucket name for testing'
required: true
aws-region:
description: 'AWS region for testing'
required: true
data-root:
description: 'Data root for testing'
required: true
aws-access-key-id:
description: 'AWS access key id for testing'
required: true
aws-secret-access-key:
description: 'AWS secret access key for testing'
required: true
runs:
using: composite
steps:
- name: Deploy GreptimeDB cluster by Helm
uses: ./.github/actions/deploy-greptimedb
with:
data-root: ${{ inputs.data-root }}
aws-ci-test-bucket: ${{ inputs.aws-ci-test-bucket }}
aws-region: ${{ inputs.aws-region }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
# TODO(zyy17): The following tests will be replaced by the real sqlness test.
- name: Run tests on greptimedb cluster
shell: bash
run: |
mysql -h 127.0.0.1 -P 14002 -e "CREATE TABLE IF NOT EXISTS system_metrics (host VARCHAR(255), idc VARCHAR(255), cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), TIME INDEX(ts));" && \
mysql -h 127.0.0.1 -P 14002 -e "SHOW TABLES;"
- name: Run tests on greptimedb cluster that uses S3
shell: bash
run: |
mysql -h 127.0.0.1 -P 24002 -e "CREATE TABLE IF NOT EXISTS system_metrics (host VARCHAR(255), idc VARCHAR(255), cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), TIME INDEX(ts));" && \
mysql -h 127.0.0.1 -P 24002 -e "SHOW TABLES;"
- name: Run tests on standalone greptimedb
shell: bash
run: |
mysql -h 127.0.0.1 -P 34002 -e "CREATE TABLE IF NOT EXISTS system_metrics (host VARCHAR(255), idc VARCHAR(255), cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), TIME INDEX(ts));" && \
mysql -h 127.0.0.1 -P 34002 -e "SHOW TABLES;"
- name: Clean S3 data
shell: bash
env:
AWS_DEFAULT_REGION: ${{ inputs.aws-region }}
AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }}
run: |
aws s3 rm s3://${{ inputs.aws-ci-test-bucket }}/${{ inputs.data-root }} --recursive

View File

@@ -10,6 +10,34 @@ inputs:
version: version:
description: Version of the artifact description: Version of the artifact
required: true required: true
release-to-s3-bucket:
description: S3 bucket to store released artifacts
required: true
aws-access-key-id:
description: AWS access key id
required: true
aws-secret-access-key:
description: AWS secret access key
required: true
aws-region:
description: AWS region
required: true
upload-to-s3:
description: Upload to S3
required: false
default: 'true'
upload-latest-artifacts:
description: Upload the latest artifacts to S3
required: false
default: 'true'
upload-max-retry-times:
description: Max retry times for uploading artifacts to S3
required: false
default: "20"
upload-retry-timeout:
description: Timeout for uploading artifacts to S3
required: false
default: "10" # minutes
working-dir: working-dir:
description: Working directory to upload the artifacts description: Working directory to upload the artifacts
required: false required: false
@@ -33,21 +61,9 @@ runs:
working-directory: ${{ inputs.working-dir }} working-directory: ${{ inputs.working-dir }}
shell: bash shell: bash
run: | run: |
tar -zcvf ${{ inputs.artifacts-dir }}.tar.gz ${{ inputs.artifacts-dir }} tar -zcvf ${{ inputs.artifacts-dir }}.tar.gz ${{ inputs.artifacts-dir }} && \
- name: Calculate checksum
if: runner.os != 'Windows'
working-directory: ${{ inputs.working-dir }}
shell: bash
run: |
echo $(shasum -a 256 ${{ inputs.artifacts-dir }}.tar.gz | cut -f1 -d' ') > ${{ inputs.artifacts-dir }}.sha256sum echo $(shasum -a 256 ${{ inputs.artifacts-dir }}.tar.gz | cut -f1 -d' ') > ${{ inputs.artifacts-dir }}.sha256sum
- name: Calculate checksum on Windows
if: runner.os == 'Windows'
working-directory: ${{ inputs.working-dir }}
shell: pwsh
run: Get-FileHash ${{ inputs.artifacts-dir }}.tar.gz -Algorithm SHA256 | select -ExpandProperty Hash > ${{ inputs.artifacts-dir }}.sha256sum
# Note: The artifacts will be double zip compressed(related issue: https://github.com/actions/upload-artifact/issues/39). # Note: The artifacts will be double zip compressed(related issue: https://github.com/actions/upload-artifact/issues/39).
# However, when we use 'actions/download-artifact@v3' to download the artifacts, it will be automatically unzipped. # However, when we use 'actions/download-artifact@v3' to download the artifacts, it will be automatically unzipped.
- name: Upload artifacts - name: Upload artifacts
@@ -61,3 +77,49 @@ runs:
with: with:
name: ${{ inputs.artifacts-dir }}.sha256sum name: ${{ inputs.artifacts-dir }}.sha256sum
path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.sha256sum path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.sha256sum
- name: Upload artifacts to S3
if: ${{ inputs.upload-to-s3 == 'true' }}
uses: nick-invision/retry@v2
env:
AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }}
AWS_DEFAULT_REGION: ${{ inputs.aws-region }}
with:
max_attempts: ${{ inputs.upload-max-retry-times }}
timeout_minutes: ${{ inputs.upload-retry-timeout }}
# The bucket layout will be:
# releases/greptimedb
# ├── v0.1.0
# │ ├── greptime-darwin-amd64-pyo3-v0.1.0.sha256sum
# │ └── greptime-darwin-amd64-pyo3-v0.1.0.tar.gz
# └── v0.2.0
# ├── greptime-darwin-amd64-pyo3-v0.2.0.sha256sum
# └── greptime-darwin-amd64-pyo3-v0.2.0.tar.gz
command: |
cd ${{ inputs.working-dir }} && \
aws s3 cp \
${{ inputs.artifacts-dir }}.tar.gz \
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/${{ inputs.version }}/${{ inputs.artifacts-dir }}.tar.gz && \
aws s3 cp \
${{ inputs.artifacts-dir }}.sha256sum \
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/${{ inputs.version }}/${{ inputs.artifacts-dir }}.sha256sum
- name: Upload latest artifacts to S3
if: ${{ inputs.upload-to-s3 == 'true' && inputs.upload-latest-artifacts == 'true' }} # We'll also upload the latest artifacts to S3 in the scheduled and formal release.
uses: nick-invision/retry@v2
env:
AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }}
AWS_DEFAULT_REGION: ${{ inputs.aws-region }}
with:
max_attempts: ${{ inputs.upload-max-retry-times }}
timeout_minutes: ${{ inputs.upload-retry-timeout }}
command: |
cd ${{ inputs.working-dir }} && \
aws s3 cp \
${{ inputs.artifacts-dir }}.tar.gz \
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/latest/${{ inputs.artifacts-dir }}.tar.gz && \
aws s3 cp \
${{ inputs.artifacts-dir }}.sha256sum \
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/latest/${{ inputs.artifacts-dir }}.sha256sum

View File

@@ -1,47 +0,0 @@
#!/usr/bin/env bash
set -e
set -o pipefail
SRC_IMAGE=$1
DST_REGISTRY=$2
SKOPEO_STABLE_IMAGE="quay.io/skopeo/stable:latest"
# Check if necessary variables are set.
function check_vars() {
for var in DST_REGISTRY_USERNAME DST_REGISTRY_PASSWORD DST_REGISTRY SRC_IMAGE; do
if [ -z "${!var}" ]; then
echo "$var is not set or empty."
echo "Usage: DST_REGISTRY_USERNAME=<your-dst-registry-username> DST_REGISTRY_PASSWORD=<your-dst-registry-password> $0 <dst-registry> <src-image>"
exit 1
fi
done
}
# Copies images from DockerHub to the destination registry.
function copy_images_from_dockerhub() {
# Check if docker is installed.
if ! command -v docker &> /dev/null; then
echo "docker is not installed. Please install docker to continue."
exit 1
fi
# Extract the name and tag of the source image.
IMAGE_NAME=$(echo "$SRC_IMAGE" | sed "s/.*\///")
echo "Copying $SRC_IMAGE to $DST_REGISTRY/$IMAGE_NAME"
docker run "$SKOPEO_STABLE_IMAGE" copy -a docker://"$SRC_IMAGE" \
--dest-creds "$DST_REGISTRY_USERNAME":"$DST_REGISTRY_PASSWORD" \
docker://"$DST_REGISTRY/$IMAGE_NAME"
}
function main() {
check_vars
copy_images_from_dockerhub
}
# Usage example:
# DST_REGISTRY_USERNAME=123 DST_REGISTRY_PASSWORD=456 \
# ./copy-image.sh greptime/greptimedb:v0.4.0 greptime-registry.cn-hangzhou.cr.aliyuncs.com
main

View File

@@ -1,172 +0,0 @@
#!/usr/bin/env bash
set -e
set -o pipefail
KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.24.0}"
ENABLE_STANDALONE_MODE="${ENABLE_STANDALONE_MODE:-true}"
DEFAULT_INSTALL_NAMESPACE=${DEFAULT_INSTALL_NAMESPACE:-default}
GREPTIMEDB_IMAGE_TAG=${GREPTIMEDB_IMAGE_TAG:-latest}
ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/"
# Ceate a cluster with 1 control-plane node and 5 workers.
function create_kind_cluster() {
cat <<EOF | kind create cluster --name "${CLUSTER}" --image kindest/node:"$KUBERNETES_VERSION" --config=-
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
EOF
}
# Add greptime Helm chart repo.
function add_greptime_chart() {
helm repo add greptime "$GREPTIME_CHART"
helm repo update
}
# Deploy a etcd cluster with 3 members.
function deploy_etcd_cluster() {
local namespace="$1"
helm install etcd "$ETCD_CHART" \
--set replicaCount=3 \
--set auth.rbac.create=false \
--set auth.rbac.token.enabled=false \
-n "$namespace"
# Wait for etcd cluster to be ready.
kubectl rollout status statefulset/etcd -n "$namespace"
}
# Deploy greptimedb-operator.
function deploy_greptimedb_operator() {
# Use the latest chart and image.
helm install greptimedb-operator greptime/greptimedb-operator \
--set image.tag=latest \
-n "$DEFAULT_INSTALL_NAMESPACE"
# Wait for greptimedb-operator to be ready.
kubectl rollout status deployment/greptimedb-operator -n "$DEFAULT_INSTALL_NAMESPACE"
}
# Deploy greptimedb cluster by using local storage.
# It will expose cluster service ports as '14000', '14001', '14002', '14003' to local access.
function deploy_greptimedb_cluster() {
local cluster_name=$1
local install_namespace=$2
kubectl create ns "$install_namespace"
deploy_etcd_cluster "$install_namespace"
helm install "$cluster_name" greptime/greptimedb-cluster \
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
--set meta.etcdEndpoints="etcd.$install_namespace:2379" \
-n "$install_namespace"
# Wait for greptimedb cluster to be ready.
while true; do
PHASE=$(kubectl -n "$install_namespace" get gtc "$cluster_name" -o jsonpath='{.status.clusterPhase}')
if [ "$PHASE" == "Running" ]; then
echo "Cluster is ready"
break
else
echo "Cluster is not ready yet: Current phase: $PHASE"
sleep 5 # wait for 5 seconds before check again.
fi
done
# Expose greptimedb cluster to local access.
kubectl -n "$install_namespace" port-forward svc/"$cluster_name"-frontend \
14000:4000 \
14001:4001 \
14002:4002 \
14003:4003 > /tmp/connections.out &
}
# Deploy greptimedb cluster by using S3.
# It will expose cluster service ports as '24000', '24001', '24002', '24003' to local access.
function deploy_greptimedb_cluster_with_s3_storage() {
local cluster_name=$1
local install_namespace=$2
kubectl create ns "$install_namespace"
deploy_etcd_cluster "$install_namespace"
helm install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
--set meta.etcdEndpoints="etcd.$install_namespace:2379" \
--set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \
--set storage.s3.region="$AWS_REGION" \
--set storage.s3.root="$DATA_ROOT" \
--set storage.s3.secretName=s3-credentials \
--set storage.credentials.secretName=s3-credentials \
--set storage.credentials.secretCreation.enabled=true \
--set storage.credentials.secretCreation.enableEncryption=false \
--set storage.credentials.secretCreation.data.access-key-id="$AWS_ACCESS_KEY_ID" \
--set storage.credentials.secretCreation.data.secret-access-key="$AWS_SECRET_ACCESS_KEY"
# Wait for greptimedb cluster to be ready.
while true; do
PHASE=$(kubectl -n "$install_namespace" get gtc "$cluster_name" -o jsonpath='{.status.clusterPhase}')
if [ "$PHASE" == "Running" ]; then
echo "Cluster is ready"
break
else
echo "Cluster is not ready yet: Current phase: $PHASE"
sleep 5 # wait for 5 seconds before check again.
fi
done
# Expose greptimedb cluster to local access.
kubectl -n "$install_namespace" port-forward svc/"$cluster_name"-frontend \
24000:4000 \
24001:4001 \
24002:4002 \
24003:4003 > /tmp/connections.out &
}
# Deploy standalone greptimedb.
# It will expose cluster service ports as '34000', '34001', '34002', '34003' to local access.
function deploy_standalone_greptimedb() {
helm install greptimedb-standalone greptime/greptimedb-standalone \
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \
-n "$DEFAULT_INSTALL_NAMESPACE"
# Wait for etcd cluster to be ready.
kubectl rollout status statefulset/greptimedb-standalone -n "$DEFAULT_INSTALL_NAMESPACE"
# Expose greptimedb to local access.
kubectl -n "$DEFAULT_INSTALL_NAMESPACE" port-forward svc/greptimedb-standalone \
34000:4000 \
34001:4001 \
34002:4002 \
34003:4003 > /tmp/connections.out &
}
# Entrypoint of the script.
function main() {
create_kind_cluster
add_greptime_chart
# Deploy standalone greptimedb in the same K8s.
if [ "$ENABLE_STANDALONE_MODE" == "true" ]; then
deploy_standalone_greptimedb
fi
deploy_greptimedb_operator
deploy_greptimedb_cluster testcluster testcluster
deploy_greptimedb_cluster_with_s3_storage testcluster-s3 testcluster-s3
}
# Usages:
# - Deploy greptimedb cluster: ./deploy-greptimedb.sh
main

View File

@@ -1,102 +0,0 @@
#!/usr/bin/env bash
set -e
set -o pipefail
ARTIFACTS_DIR=$1
VERSION=$2
AWS_S3_BUCKET=$3
RELEASE_DIRS="releases/greptimedb"
GREPTIMEDB_REPO="GreptimeTeam/greptimedb"
# Check if necessary variables are set.
function check_vars() {
for var in AWS_S3_BUCKET VERSION ARTIFACTS_DIR; do
if [ -z "${!var}" ]; then
echo "$var is not set or empty."
echo "Usage: $0 <artifacts-dir> <version> <aws-s3-bucket>"
exit 1
fi
done
}
# Uploads artifacts to AWS S3 bucket.
function upload_artifacts() {
# The bucket layout will be:
# releases/greptimedb
# ├── latest-version.txt
# ├── latest-nightly-version.txt
# ├── v0.1.0
# │ ├── greptime-darwin-amd64-pyo3-v0.1.0.sha256sum
# │ └── greptime-darwin-amd64-pyo3-v0.1.0.tar.gz
# └── v0.2.0
# ├── greptime-darwin-amd64-pyo3-v0.2.0.sha256sum
# └── greptime-darwin-amd64-pyo3-v0.2.0.tar.gz
find "$ARTIFACTS_DIR" -type f \( -name "*.tar.gz" -o -name "*.sha256sum" \) | while IFS= read -r file; do
aws s3 cp \
"$file" "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/$VERSION/$(basename "$file")"
done
}
# Updates the latest version information in AWS S3 if UPDATE_VERSION_INFO is true.
function update_version_info() {
if [ "$UPDATE_VERSION_INFO" == "true" ]; then
# If it's the officail release(like v1.0.0, v1.0.1, v1.0.2, etc.), update latest-version.txt.
if [[ "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "Updating latest-version.txt"
echo "$VERSION" > latest-version.txt
aws s3 cp \
latest-version.txt "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/latest-version.txt"
fi
# If it's the nightly release, update latest-nightly-version.txt.
if [[ "$VERSION" == *"nightly"* ]]; then
echo "Updating latest-nightly-version.txt"
echo "$VERSION" > latest-nightly-version.txt
aws s3 cp \
latest-nightly-version.txt "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/latest-nightly-version.txt"
fi
fi
}
# Downloads artifacts from Github if DOWNLOAD_ARTIFACTS_FROM_GITHUB is true.
function download_artifacts_from_github() {
if [ "$DOWNLOAD_ARTIFACTS_FROM_GITHUB" == "true" ]; then
# Check if jq is installed.
if ! command -v jq &> /dev/null; then
echo "jq is not installed. Please install jq to continue."
exit 1
fi
# Get the latest release API response.
RELEASES_API_RESPONSE=$(curl -s -H "Accept: application/vnd.github.v3+json" "https://api.github.com/repos/$GREPTIMEDB_REPO/releases/latest")
# Extract download URLs for the artifacts.
# Exclude source code archives which are typically named as 'greptimedb-<version>.zip' or 'greptimedb-<version>.tar.gz'.
ASSET_URLS=$(echo "$RELEASES_API_RESPONSE" | jq -r '.assets[] | select(.name | test("greptimedb-.*\\.(zip|tar\\.gz)$") | not) | .browser_download_url')
# Download each asset.
while IFS= read -r url; do
if [ -n "$url" ]; then
curl -LJO "$url"
echo "Downloaded: $url"
fi
done <<< "$ASSET_URLS"
fi
}
function main() {
check_vars
download_artifacts_from_github
upload_artifacts
update_version_info
}
# Usage example:
# AWS_ACCESS_KEY_ID=<your_access_key_id> \
# AWS_SECRET_ACCESS_KEY=<your_secret_access_key> \
# AWS_DEFAULT_REGION=<your_region> \
# UPDATE_VERSION_INFO=true \
# DOWNLOAD_ARTIFACTS_FROM_GITHUB=false \
# ./upload-artifacts-to-s3.sh <artifacts-dir> <version> <aws-s3-bucket>
main

View File

@@ -13,11 +13,11 @@ on:
name: Build API docs name: Build API docs
env: env:
RUST_TOOLCHAIN: nightly-2023-10-21 RUST_TOOLCHAIN: nightly-2023-08-07
jobs: jobs:
apidoc: apidoc:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v1

View File

@@ -16,11 +16,11 @@ on:
description: The runner uses to build linux-amd64 artifacts description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64 default: ec2-c6i.4xlarge-amd64
options: options:
- ubuntu-20.04 - ubuntu-latest
- ubuntu-20.04-8-cores - ubuntu-latest-8-cores
- ubuntu-20.04-16-cores - ubuntu-latest-16-cores
- ubuntu-20.04-32-cores - ubuntu-latest-32-cores
- ubuntu-20.04-64-cores - ubuntu-latest-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G - ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G - ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G - ec2-c6i.4xlarge-amd64 # 16C32G
@@ -78,7 +78,7 @@ jobs:
allocate-runners: allocate-runners:
name: Allocate runners name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }} linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }} linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -164,7 +164,12 @@ jobs:
cargo-profile: ${{ env.CARGO_PROFILE }} cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }} disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
dev-mode: true # Only build the standard greptime binary. dev-mode: true # Only build the standard greptime binary.
upload-to-s3: false # No need to upload to S3.
working-dir: ${{ env.CHECKOUT_GREPTIMEDB_PATH }} working-dir: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
build-linux-arm64-artifacts: build-linux-arm64-artifacts:
@@ -193,7 +198,12 @@ jobs:
cargo-profile: ${{ env.CARGO_PROFILE }} cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }} disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
dev-mode: true # Only build the standard greptime binary. dev-mode: true # Only build the standard greptime binary.
upload-to-s3: false # No need to upload to S3.
working-dir: ${{ env.CHECKOUT_GREPTIMEDB_PATH }} working-dir: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
release-images-to-dockerhub: release-images-to-dockerhub:
@@ -204,7 +214,7 @@ jobs:
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
build-result: ${{ steps.set-build-result.outputs.build-result }} build-result: ${{ steps.set-build-result.outputs.build-result }}
steps: steps:
@@ -229,44 +239,41 @@ jobs:
run: | run: |
echo "build-result=success" >> $GITHUB_OUTPUT echo "build-result=success" >> $GITHUB_OUTPUT
release-cn-artifacts: release-images-to-acr:
name: Release artifacts to CN region name: Build and push images to ACR
if: ${{ inputs.release_images || github.event_name == 'schedule' }} if: ${{ inputs.release_images || github.event_name == 'schedule' }}
needs: [ needs: [
allocate-runners, allocate-runners,
release-images-to-dockerhub, build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
# When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Release artifacts to CN region - name: Build and push images to ACR
uses: ./.github/actions/release-cn-artifacts uses: ./.github/actions/build-images
with: with:
src-image-registry: docker.io image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
src-image-namespace: ${{ vars.IMAGE_NAMESPACE }} image-namespace: ${{ vars.IMAGE_NAMESPACE }}
src-image-name: ${{ env.IMAGE_NAME }} image-name: ${{ env.IMAGE_NAME }}
dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }} image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }} image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
dst-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
aws-cn-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
dev-mode: true # Only build the standard images(exclude centos images).
push-latest-tag: false # Don't push the latest tag to registry. push-latest-tag: false # Don't push the latest tag to registry.
update-version-info: false # Don't update the version info in S3. dev-mode: true # Only build the standard images.
stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released. stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
name: Stop linux-amd64 runner name: Stop linux-amd64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
@@ -291,7 +298,7 @@ jobs:
name: Stop linux-arm64 runner name: Stop linux-arm64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
@@ -318,7 +325,7 @@ jobs:
needs: [ needs: [
release-images-to-dockerhub release-images-to-dockerhub
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
env: env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }} SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps: steps:

View File

@@ -29,12 +29,12 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
env: env:
RUST_TOOLCHAIN: nightly-2023-10-21 RUST_TOOLCHAIN: nightly-2023-08-07
jobs: jobs:
typos: typos:
name: Spell Check with Typos name: Spell Check with Typos
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: crate-ci/typos@v1.13.10 - uses: crate-ci/typos@v1.13.10
@@ -42,10 +42,7 @@ jobs:
check: check:
name: Check name: Check
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ${{ matrix.os }} runs-on: ubuntu-latest
strategy:
matrix:
os: [ windows-latest-8-cores, ubuntu-20.04 ]
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@@ -63,7 +60,7 @@ jobs:
toml: toml:
name: Toml Check name: Toml Check
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@@ -83,7 +80,7 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ ubuntu-20.04-8-cores ] os: [ ubuntu-latest-8-cores, windows-latest-8-cores ]
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@@ -108,7 +105,7 @@ jobs:
fmt: fmt:
name: Rustfmt name: Rustfmt
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@@ -127,7 +124,7 @@ jobs:
clippy: clippy:
name: Clippy name: Clippy
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@@ -145,7 +142,7 @@ jobs:
coverage: coverage:
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04-8-cores runs-on: ubuntu-latest-8-cores
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@@ -164,18 +161,15 @@ jobs:
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
- name: Install latest nextest release - name: Install latest nextest release
uses: taiki-e/install-action@nextest uses: taiki-e/install-action@nextest
- name: Install cargo-llvm-cov
uses: taiki-e/install-action@cargo-llvm-cov
- name: Install Python - name: Install Python
uses: actions/setup-python@v4 uses: actions/setup-python@v4
with: with:
python-version: '3.10' python-version: '3.10'
- name: Install PyArrow Package - name: Install PyArrow Package
run: pip install pyarrow run: pip install pyarrow
- name: Setup etcd server - name: Install cargo-llvm-cov
working-directory: tests-integration/fixtures/etcd uses: taiki-e/install-action@cargo-llvm-cov
run: docker compose -f docker-compose-standalone.yml up -d --wait - name: Collect coverage data
- name: Run nextest cases
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend -F dashboard run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F pyo3_backend -F dashboard
env: env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld" CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
@@ -185,7 +179,6 @@ jobs:
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }} GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }} GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
GT_S3_REGION: ${{ secrets.S3_REGION }} GT_S3_REGION: ${{ secrets.S3_REGION }}
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
UNITTEST_LOG_DIR: "__unittest_logs" UNITTEST_LOG_DIR: "__unittest_logs"
- name: Codecov upload - name: Codecov upload
uses: codecov/codecov-action@v2 uses: codecov/codecov-action@v2
@@ -195,3 +188,43 @@ jobs:
flags: rust flags: rust
fail_ci_if_error: false fail_ci_if_error: false
verbose: true verbose: true
test-on-windows:
if: github.event.pull_request.draft == false
runs-on: windows-latest-8-cores
timeout-minutes: 60
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
components: llvm-tools-preview
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install Cargo Nextest
uses: taiki-e/install-action@nextest
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install PyArrow Package
run: pip install pyarrow
- name: Install WSL distribution
uses: Vampire/setup-wsl@v2
with:
distribution: Ubuntu-22.04
- name: Running tests
run: cargo nextest run -F pyo3_backend,dashboard
env:
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
GT_S3_REGION: ${{ secrets.S3_REGION }}
UNITTEST_LOG_DIR: "__unittest_logs"

View File

@@ -11,7 +11,7 @@ on:
jobs: jobs:
doc_issue: doc_issue:
if: github.event.label.name == 'doc update required' if: github.event.label.name == 'doc update required'
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- name: create an issue in doc repo - name: create an issue in doc repo
uses: dacbd/create-issue-action@main uses: dacbd/create-issue-action@main
@@ -25,7 +25,7 @@ jobs:
${{ github.event.issue.html_url || github.event.pull_request.html_url }} ${{ github.event.issue.html_url || github.event.pull_request.html_url }}
cloud_issue: cloud_issue:
if: github.event.label.name == 'cloud followup required' if: github.event.label.name == 'cloud followup required'
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- name: create an issue in cloud repo - name: create an issue in cloud repo
uses: dacbd/create-issue-action@main uses: dacbd/create-issue-action@main

View File

@@ -30,7 +30,7 @@ name: CI
jobs: jobs:
typos: typos:
name: Spell Check with Typos name: Spell Check with Typos
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: crate-ci/typos@v1.13.10 - uses: crate-ci/typos@v1.13.10
@@ -38,33 +38,33 @@ jobs:
check: check:
name: Check name: Check
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
fmt: fmt:
name: Rustfmt name: Rustfmt
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
clippy: clippy:
name: Clippy name: Clippy
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
coverage: coverage:
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
sqlness: sqlness:
name: Sqlness Test name: Sqlness Test
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'

View File

@@ -8,9 +8,9 @@ on:
types: [opened, synchronize, reopened, ready_for_review] types: [opened, synchronize, reopened, ready_for_review]
jobs: jobs:
license-header-check: license-header-check:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
name: license-header-check name: license-header-check
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- name: Check License Header - name: Check License Header
uses: korandoru/hawkeye@v3 uses: apache/skywalking-eyes/header@df70871af1a8109c9a5b1dc824faaf65246c5236

View File

@@ -14,11 +14,11 @@ on:
description: The runner uses to build linux-amd64 artifacts description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.2xlarge-amd64 default: ec2-c6i.2xlarge-amd64
options: options:
- ubuntu-20.04 - ubuntu-latest
- ubuntu-20.04-8-cores - ubuntu-latest-8-cores
- ubuntu-20.04-16-cores - ubuntu-latest-16-cores
- ubuntu-20.04-32-cores - ubuntu-latest-32-cores
- ubuntu-20.04-64-cores - ubuntu-latest-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G - ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G - ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G - ec2-c6i.4xlarge-amd64 # 16C32G
@@ -70,7 +70,7 @@ jobs:
allocate-runners: allocate-runners:
name: Allocate runners name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }} linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }} linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -147,6 +147,11 @@ jobs:
cargo-profile: ${{ env.CARGO_PROFILE }} cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }} disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
upload-latest-artifacts: false
build-linux-arm64-artifacts: build-linux-arm64-artifacts:
name: Build linux-arm64 artifacts name: Build linux-arm64 artifacts
@@ -166,6 +171,11 @@ jobs:
cargo-profile: ${{ env.CARGO_PROFILE }} cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }} disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
upload-latest-artifacts: false
release-images-to-dockerhub: release-images-to-dockerhub:
name: Build and push images to DockerHub name: Build and push images to DockerHub
@@ -175,7 +185,7 @@ jobs:
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }} nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
steps: steps:
@@ -198,14 +208,15 @@ jobs:
run: | run: |
echo "nightly-build-result=success" >> $GITHUB_OUTPUT echo "nightly-build-result=success" >> $GITHUB_OUTPUT
release-cn-artifacts: release-images-to-acr:
name: Release artifacts to CN region name: Build and push images to ACR
if: ${{ inputs.release_images || github.event_name == 'schedule' }} if: ${{ inputs.release_images || github.event_name == 'schedule' }}
needs: [ needs: [
allocate-runners, allocate-runners,
release-images-to-dockerhub, build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
# When we push to ACR, it's easy to fail due to some unknown network issues. # When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this. # However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated. # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -215,30 +226,21 @@ jobs:
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Release artifacts to CN region - name: Build and push images to ACR
uses: ./.github/actions/release-cn-artifacts uses: ./.github/actions/build-images
with: with:
src-image-registry: docker.io image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
src-image-namespace: ${{ vars.IMAGE_NAMESPACE }} image-namespace: ${{ vars.IMAGE_NAMESPACE }}
src-image-name: greptimedb image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }} image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
dst-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
aws-cn-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
dev-mode: false
update-version-info: false # Don't update version info in S3.
push-latest-tag: false # Don't push the latest tag to registry. push-latest-tag: false # Don't push the latest tag to registry.
stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released. stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
name: Stop linux-amd64 runner name: Stop linux-amd64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
@@ -263,7 +265,7 @@ jobs:
name: Stop linux-arm64 runner name: Stop linux-arm64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
@@ -290,7 +292,7 @@ jobs:
needs: [ needs: [
release-images-to-dockerhub release-images-to-dockerhub
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
env: env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }} SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps: steps:

View File

@@ -1,98 +0,0 @@
# Nightly CI: runs tests every night for our second tier plaforms (Windows)
on:
schedule:
- cron: '0 23 * * 1-5'
workflow_dispatch:
name: Nightly CI
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
RUST_TOOLCHAIN: nightly-2023-10-21
jobs:
sqlness:
name: Sqlness Test
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ windows-latest-8-cores ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4.1.0
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Run sqlness
run: cargo sqlness
- name: Notify slack if failed
if: failure()
uses: slackapi/slack-github-action@v1.23.0
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
with:
payload: |
{"text": "Nightly CI failed for sqlness tests"}
- name: Upload sqlness logs
if: always()
uses: actions/upload-artifact@v3
with:
name: sqlness-logs
path: ${{ runner.temp }}/greptime-*.log
retention-days: 3
test-on-windows:
runs-on: windows-latest-8-cores
timeout-minutes: 60
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v4.1.0
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
components: llvm-tools-preview
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install Cargo Nextest
uses: taiki-e/install-action@nextest
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install PyArrow Package
run: pip install pyarrow
- name: Install WSL distribution
uses: Vampire/setup-wsl@v2
with:
distribution: Ubuntu-22.04
- name: Running tests
run: cargo nextest run -F pyo3_backend,dashboard
env:
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
GT_S3_REGION: ${{ secrets.S3_REGION }}
UNITTEST_LOG_DIR: "__unittest_logs"
- name: Notify slack if failed
if: failure()
uses: slackapi/slack-github-action@v1.23.0
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
with:
payload: |
{"text": "Nightly CI failed for cargo test"}

View File

@@ -1,26 +0,0 @@
name: Nightly functional tests
on:
schedule:
# At 00:00 on Tuesday.
- cron: '0 0 * * 2'
workflow_dispatch:
jobs:
sqlness-test:
name: Run sqlness test
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Run sqlness test
uses: ./.github/actions/sqlness-test
with:
data-root: sqlness-test
aws-ci-test-bucket: ${{ vars.AWS_CI_TEST_BUCKET }}
aws-region: ${{ vars.AWS_CI_TEST_BUCKET_REGION }}
aws-access-key-id: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}

View File

@@ -10,7 +10,7 @@ on:
jobs: jobs:
check: check:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
timeout-minutes: 10 timeout-minutes: 10
steps: steps:
- uses: thehanimo/pr-title-checker@v1.3.4 - uses: thehanimo/pr-title-checker@v1.3.4
@@ -19,7 +19,7 @@ jobs:
pass_on_octokit_error: false pass_on_octokit_error: false
configuration_path: ".github/pr-title-checker-config.json" configuration_path: ".github/pr-title-checker-config.json"
breaking: breaking:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
timeout-minutes: 10 timeout-minutes: 10
steps: steps:
- uses: thehanimo/pr-title-checker@v1.3.4 - uses: thehanimo/pr-title-checker@v1.3.4

View File

@@ -1,85 +0,0 @@
name: Release dev-builder images
on:
workflow_dispatch: # Allows you to run this workflow manually.
inputs:
version:
description: Version of the dev-builder
required: false
default: latest
release_dev_builder_ubuntu_image:
type: boolean
description: Release dev-builder-ubuntu image
required: false
default: false
release_dev_builder_centos_image:
type: boolean
description: Release dev-builder-centos image
required: false
default: false
release_dev_builder_android_image:
type: boolean
description: Release dev-builder-android image
required: false
default: false
jobs:
release-dev-builder-images:
name: Release dev builder images
if: ${{ inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }} # Only manually trigger this job.
runs-on: ubuntu-20.04-16-cores
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build and push dev builder images
uses: ./.github/actions/build-dev-builder-images
with:
version: ${{ inputs.version }}
dockerhub-image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
dockerhub-image-registry-token: ${{ secrets.DOCKERHUB_TOKEN }}
build-dev-builder-ubuntu: ${{ inputs.release_dev_builder_ubuntu_image }}
build-dev-builder-centos: ${{ inputs.release_dev_builder_centos_image }}
build-dev-builder-android: ${{ inputs.release_dev_builder_android_image }}
release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
name: Release dev builder images to CN region
runs-on: ubuntu-20.04
needs: [
release-dev-builder-images
]
steps:
- name: Push dev-builder-ubuntu image
shell: bash
if: ${{ inputs.release_dev_builder_ubuntu_image }}
env:
DST_REGISTRY_USERNAME: ${{ secrets.ALICLOUD_USERNAME }}
DST_REGISTRY_PASSWORD: ${{ secrets.ALICLOUD_PASSWORD }}
run: |
docker run quay.io/skopeo/stable:latest copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ inputs.version }} \
--dest-creds "$DST_REGISTRY_USERNAME":"$DST_REGISTRY_PASSWORD" \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ inputs.version }}
- name: Push dev-builder-centos image
shell: bash
if: ${{ inputs.release_dev_builder_centos_image }}
env:
DST_REGISTRY_USERNAME: ${{ secrets.ALICLOUD_USERNAME }}
DST_REGISTRY_PASSWORD: ${{ secrets.ALICLOUD_PASSWORD }}
run: |
docker run quay.io/skopeo/stable:latest copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ inputs.version }} \
--dest-creds "$DST_REGISTRY_USERNAME":"$DST_REGISTRY_PASSWORD" \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ inputs.version }}
- name: Push dev-builder-android image
shell: bash
if: ${{ inputs.release_dev_builder_android_image }}
env:
DST_REGISTRY_USERNAME: ${{ secrets.ALICLOUD_USERNAME }}
DST_REGISTRY_PASSWORD: ${{ secrets.ALICLOUD_PASSWORD }}
run: |
docker run quay.io/skopeo/stable:latest copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ inputs.version }} \
--dest-creds "$DST_REGISTRY_USERNAME":"$DST_REGISTRY_PASSWORD" \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ inputs.version }}

View File

@@ -18,11 +18,11 @@ on:
description: The runner uses to build linux-amd64 artifacts description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64 default: ec2-c6i.4xlarge-amd64
options: options:
- ubuntu-20.04 - ubuntu-latest
- ubuntu-20.04-8-cores - ubuntu-latest-8-cores
- ubuntu-20.04-16-cores - ubuntu-latest-16-cores
- ubuntu-20.04-32-cores - ubuntu-latest-32-cores
- ubuntu-20.04-64-cores - ubuntu-latest-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G - ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G - ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G - ec2-c6i.4xlarge-amd64 # 16C32G
@@ -63,12 +63,7 @@ on:
description: Build macos artifacts description: Build macos artifacts
required: false required: false
default: false default: false
build_windows_artifacts: release_artifacts:
type: boolean
description: Build Windows artifacts
required: false
default: false
publish_github_release:
type: boolean type: boolean
description: Create GitHub release and upload artifacts description: Create GitHub release and upload artifacts
required: false required: false
@@ -78,11 +73,16 @@ on:
description: Build and push images to DockerHub and ACR description: Build and push images to DockerHub and ACR
required: false required: false
default: false default: false
release_dev_builder_image:
type: boolean
description: Release dev-builder image
required: false
default: false
# Use env variables to control all the release process. # Use env variables to control all the release process.
env: env:
# The arguments of building greptime. # The arguments of building greptime.
RUST_TOOLCHAIN: nightly-2023-10-21 RUST_TOOLCHAIN: nightly-2023-08-07
CARGO_PROFILE: nightly CARGO_PROFILE: nightly
# Controls whether to run tests, include unit-test, integration-test and sqlness. # Controls whether to run tests, include unit-test, integration-test and sqlness.
@@ -91,18 +91,17 @@ env:
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313; # The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release. # Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.5.0 NEXT_RELEASE_VERSION: v0.4.0
jobs: jobs:
allocate-runners: allocate-runners:
name: Allocate runners name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }} linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }} linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
macos-runner: ${{ inputs.macos_runner || vars.DEFAULT_MACOS_RUNNER }} macos-runner: ${{ inputs.macos_runner || vars.DEFAULT_MACOS_RUNNER }}
windows-runner: windows-latest-8-cores
# The following EC2 resource id will be used for resource releasing. # The following EC2 resource id will be used for resource releasing.
linux-amd64-ec2-runner-label: ${{ steps.start-linux-amd64-runner.outputs.label }} linux-amd64-ec2-runner-label: ${{ steps.start-linux-amd64-runner.outputs.label }}
@@ -178,6 +177,10 @@ jobs:
cargo-profile: ${{ env.CARGO_PROFILE }} cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }} disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
build-linux-arm64-artifacts: build-linux-arm64-artifacts:
name: Build linux-arm64 artifacts name: Build linux-arm64 artifacts
@@ -197,6 +200,10 @@ jobs:
cargo-profile: ${{ env.CARGO_PROFILE }} cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }} disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
build-macos-artifacts: build-macos-artifacts:
name: Build macOS artifacts name: Build macOS artifacts
@@ -238,43 +245,11 @@ jobs:
features: ${{ matrix.features }} features: ${{ matrix.features }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }} disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }} artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
build-windows-artifacts: aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
name: Build Windows artifacts aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
strategy:
fail-fast: false
matrix:
include:
- os: ${{ needs.allocate-runners.outputs.windows-runner }}
arch: x86_64-pc-windows-msvc
features: servers/dashboard
artifacts-dir-prefix: greptime-windows-amd64
- os: ${{ needs.allocate-runners.outputs.windows-runner }}
arch: x86_64-pc-windows-msvc
features: pyo3_backend,servers/dashboard
artifacts-dir-prefix: greptime-windows-amd64-pyo3
runs-on: ${{ matrix.os }}
needs: [
allocate-runners,
]
if: ${{ inputs.build_windows_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: ./.github/actions/build-windows-artifacts
with:
arch: ${{ matrix.arch }}
rust-toolchain: ${{ env.RUST_TOOLCHAIN }}
cargo-profile: ${{ env.CARGO_PROFILE }}
features: ${{ matrix.features }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }}
release-images-to-dockerhub: release-images-to-dockerhub:
name: Build and push images to DockerHub name: Build and push images to DockerHub
@@ -299,18 +274,15 @@ jobs:
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }} image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
release-cn-artifacts: release-images-to-acr:
name: Release artifacts to CN region name: Build and push images to ACR
if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }} if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [ # The job have to wait for all the artifacts are built. needs: [
allocate-runners, allocate-runners,
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
build-macos-artifacts,
build-windows-artifacts,
release-images-to-dockerhub,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-2004-16-cores
# When we push to ACR, it's easy to fail due to some unknown network issues. # When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this. # However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated. # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -320,47 +292,55 @@ jobs:
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Release artifacts to CN region - name: Build and push images to ACR
uses: ./.github/actions/release-cn-artifacts uses: ./.github/actions/build-images
with: with:
src-image-registry: docker.io image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
src-image-namespace: ${{ vars.IMAGE_NAMESPACE }} image-namespace: ${{ vars.IMAGE_NAMESPACE }}
src-image-name: greptimedb image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }} image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
dst-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
aws-cn-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
dev-mode: false
update-version-info: true
push-latest-tag: true
publish-github-release: release-artifacts:
name: Create GitHub release and upload artifacts name: Create GitHub release and upload artifacts
if: ${{ inputs.publish_github_release || github.event_name == 'push' || github.event_name == 'schedule' }} if: ${{ inputs.release_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [ # The job have to wait for all the artifacts are built. needs: [
allocate-runners, allocate-runners,
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
build-macos-artifacts, build-macos-artifacts,
build-windows-artifacts,
release-images-to-dockerhub, release-images-to-dockerhub,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Publish GitHub release - name: Release artifacts
uses: ./.github/actions/publish-github-release uses: ./.github/actions/release-artifacts
with: with:
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
release-dev-builder-image:
name: Release dev builder image
if: ${{ inputs.release_dev_builder_image }} # Only manually trigger this job.
runs-on: ubuntu-latest-16-cores
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build and push dev builder image
uses: ./.github/actions/build-dev-builder-image
with:
dockerhub-image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
dockerhub-image-registry-token: ${{ secrets.DOCKERHUB_TOKEN }}
acr-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
acr-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
acr-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
### Stop runners ### ### Stop runners ###
# It's very necessary to split the job of releasing runners into 'stop-linux-amd64-runner' and 'stop-linux-arm64-runner'. # It's very necessary to split the job of releasing runners into 'stop-linux-amd64-runner' and 'stop-linux-arm64-runner'.
# Because we can terminate the specified EC2 instance immediately after the job is finished without uncessary waiting. # Because we can terminate the specified EC2 instance immediately after the job is finished without uncessary waiting.
@@ -368,7 +348,7 @@ jobs:
name: Stop linux-amd64 runner name: Stop linux-amd64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
@@ -393,7 +373,7 @@ jobs:
name: Stop linux-arm64 runner name: Stop linux-arm64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,

View File

@@ -1,26 +0,0 @@
name: size-labeler
on: [pull_request]
jobs:
labeler:
runs-on: ubuntu-latest
name: Label the PR size
steps:
- uses: codelytv/pr-size-labeler@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
s_label: 'Size: S'
s_max_size: '100'
m_label: 'Size: M'
m_max_size: '500'
l_label: 'Size: L'
l_max_size: '1000'
xl_label: 'Size: XL'
fail_if_xl: 'false'
message_if_xl: >
This PR exceeds the recommended size of 1000 lines.
Please make sure you are NOT addressing multiple issues with one PR.
Note this PR might be rejected due to its size.
github_api_url: 'api.github.com'
files_to_ignore: 'Cargo.lock'

14
.licenserc.yaml Normal file
View File

@@ -0,0 +1,14 @@
header:
license:
spdx-id: Apache-2.0
copyright-owner: Greptime Team
paths:
- "**/*.rs"
- "**/*.py"
comment: on-failure
dependency:
files:
- Cargo.toml

View File

@@ -2,7 +2,7 @@
Thanks a lot for considering contributing to GreptimeDB. We believe people like you would make GreptimeDB a great product. We intend to build a community where individuals can have open talks, show respect for one another, and speak with true ❤️. Meanwhile, we are to keep transparency and make your effort count here. Thanks a lot for considering contributing to GreptimeDB. We believe people like you would make GreptimeDB a great product. We intend to build a community where individuals can have open talks, show respect for one another, and speak with true ❤️. Meanwhile, we are to keep transparency and make your effort count here.
Please read the guidelines, and they can help you get started. Communicate with respect to developers maintaining and developing the project. In return, they should reciprocate that respect by addressing your issue, reviewing changes, as well as helping finalize and merge your pull requests. Read the guidelines, and they can help you get started. Communicate with respect to developers maintaining and developing the project. In return, they should reciprocate that respect by addressing your issue, reviewing changes, as well as helping finalize and merge your pull requests.
Follow our [README](https://github.com/GreptimeTeam/greptimedb#readme) to get the whole picture of the project. To learn about the design of GreptimeDB, please refer to the [design docs](https://github.com/GrepTimeTeam/docs). Follow our [README](https://github.com/GreptimeTeam/greptimedb#readme) to get the whole picture of the project. To learn about the design of GreptimeDB, please refer to the [design docs](https://github.com/GrepTimeTeam/docs).
@@ -21,7 +21,7 @@ Pull requests are great, but we accept all kinds of other help if you like. Such
- Write tutorials or blog posts. Blog, speak about, or create tutorials about one of GreptimeDB's many features. Mention [@greptime](https://twitter.com/greptime) on Twitter and email info@greptime.com so we can give pointers and tips and help you spread the word by promoting your content on Greptime communication channels. - Write tutorials or blog posts. Blog, speak about, or create tutorials about one of GreptimeDB's many features. Mention [@greptime](https://twitter.com/greptime) on Twitter and email info@greptime.com so we can give pointers and tips and help you spread the word by promoting your content on Greptime communication channels.
- Improve the documentation. [Submit documentation](http://github.com/greptimeTeam/docs/) updates, enhancements, designs, or bug fixes, and fixing any spelling or grammar errors will be very much appreciated. - Improve the documentation. [Submit documentation](http://github.com/greptimeTeam/docs/) updates, enhancements, designs, or bug fixes, and fixing any spelling or grammar errors will be very much appreciated.
- Present at meetups and conferences about your GreptimeDB projects. Your unique challenges and successes in building things with GreptimeDB can provide great speaking material. We'd love to review your talk abstract, so get in touch with us if you'd like some help! - Present at meetups and conferences about your GreptimeDB projects. Your unique challenges and successes in building things with GreptimeDB can provide great speaking material. We'd love to review your talk abstract, so get in touch with us if you'd like some help!
- Submitting bug reports. To report a bug or a security issue, you can [open a new GitHub issue](https://github.com/GrepTimeTeam/greptimedb/issues/new). - Submit bug reports. To report a bug or a security issue, you can [open a new GitHub issue](https://github.com/GrepTimeTeam/greptimedb/issues/new).
- Speak up feature requests. Send feedback is a great way for us to understand your different use cases of GreptimeDB better. If you want to share your experience with GreptimeDB, or if you want to discuss any ideas, you can start a discussion on [GitHub discussions](https://github.com/GreptimeTeam/greptimedb/discussions), chat with the Greptime team on [Slack](https://greptime.com/slack), or you can tweet [@greptime](https://twitter.com/greptime) on Twitter. - Speak up feature requests. Send feedback is a great way for us to understand your different use cases of GreptimeDB better. If you want to share your experience with GreptimeDB, or if you want to discuss any ideas, you can start a discussion on [GitHub discussions](https://github.com/GreptimeTeam/greptimedb/discussions), chat with the Greptime team on [Slack](https://greptime.com/slack), or you can tweet [@greptime](https://twitter.com/greptime) on Twitter.
## Code of Conduct ## Code of Conduct
@@ -49,7 +49,6 @@ GreptimeDB uses the [Apache 2.0 license](https://github.com/GreptimeTeam/greptim
### Before PR ### Before PR
- To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process. - To ensure that community is free and confident in its ability to use your contributions, please sign the Contributor License Agreement (CLA) which will be incorporated in the pull request process.
- Make sure all files have proper license header (running `docker run --rm -v $(pwd):/github/workspace ghcr.io/korandoru/hawkeye-native:v3 format` from the project root).
- Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/). - Make sure all your codes are formatted and follow the [coding style](https://pingcap.github.io/style-guide/rust/).
- Make sure all unit tests are passed (using `cargo test --workspace` or [nextest](https://nexte.st/index.html) `cargo nextest run`). - Make sure all unit tests are passed (using `cargo test --workspace` or [nextest](https://nexte.st/index.html) `cargo nextest run`).
- Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings`). - Make sure all clippy warnings are fixed (you can check it locally by running `cargo clippy --workspace --all-targets -- -D warnings`).
@@ -82,7 +81,7 @@ Now, `pre-commit` will run automatically on `git commit`.
### Title ### Title
The titles of pull requests should be prefixed with category names listed in [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0) The titles of pull requests should be prefixed with category names listed in [Conventional Commits specification](https://www.conventionalcommits.org/en/v1.0.0)
like `feat`/`fix`/`docs`, with a concise summary of code change following. AVOID using the last commit message as pull request title. like `feat`/`fix`/`docs`, with a concise summary of code change following. DO NOT use last commit message as pull request title.
### Description ### Description
@@ -101,7 +100,7 @@ of what you were trying to do and what went wrong. You can also reach for help i
## Community ## Community
The core team will be thrilled if you would like to participate in any way you like. When you are stuck, try to ask for help by filing an issue, with a detailed description of what you were trying to do and what went wrong. If you have any questions or if you would like to get involved in our community, please check out: The core team will be thrilled if you participate in any way you like. When you are stuck, try ask for help by filing an issue, with a detailed description of what you were trying to do and what went wrong. If you have any questions or if you would like to get involved in our community, please check out:
- [GreptimeDB Community Slack](https://greptime.com/slack) - [GreptimeDB Community Slack](https://greptime.com/slack)
- [GreptimeDB Github Discussions](https://github.com/GreptimeTeam/greptimedb/discussions) - [GreptimeDB Github Discussions](https://github.com/GreptimeTeam/greptimedb/discussions)

4434
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -8,11 +8,10 @@ members = [
"src/cmd", "src/cmd",
"src/common/base", "src/common/base",
"src/common/catalog", "src/common/catalog",
"src/common/config",
"src/common/datasource", "src/common/datasource",
"src/common/error", "src/common/error",
"src/common/function", "src/common/function",
"src/common/macro", "src/common/function-macro",
"src/common/greptimedb-telemetry", "src/common/greptimedb-telemetry",
"src/common/grpc", "src/common/grpc",
"src/common/grpc-expr", "src/common/grpc-expr",
@@ -27,101 +26,73 @@ members = [
"src/common/telemetry", "src/common/telemetry",
"src/common/test-util", "src/common/test-util",
"src/common/time", "src/common/time",
"src/common/decimal",
"src/common/version", "src/common/version",
"src/datanode", "src/datanode",
"src/datatypes", "src/datatypes",
"src/file-engine", "src/file-table-engine",
"src/frontend", "src/frontend",
"src/log-store", "src/log-store",
"src/meta-client", "src/meta-client",
"src/meta-srv", "src/meta-srv",
"src/metric-engine", "src/mito",
"src/mito2", "src/mito2",
"src/object-store", "src/object-store",
"src/operator",
"src/partition", "src/partition",
"src/plugins",
"src/promql", "src/promql",
"src/puffin",
"src/query", "src/query",
"src/script", "src/script",
"src/servers", "src/servers",
"src/session", "src/session",
"src/sql", "src/sql",
"src/storage",
"src/store-api", "src/store-api",
"src/flow",
"src/table", "src/table",
"src/index", "src/table-procedure",
"tests-integration", "tests-integration",
"tests/runner", "tests/runner",
] ]
resolver = "2" resolver = "2"
[workspace.package] [workspace.package]
version = "0.4.4" version = "0.4.0-nightly"
edition = "2021" edition = "2021"
license = "Apache-2.0" license = "Apache-2.0"
[workspace.dependencies] [workspace.dependencies]
ahash = { version = "0.8", features = ["compile-time-rng"] } arrow = { version = "43.0" }
aquamarine = "0.3" etcd-client = "0.11"
arrow = { version = "47.0" } arrow-array = "43.0"
arrow-array = "47.0" arrow-flight = "43.0"
arrow-flight = "47.0" arrow-schema = { version = "43.0", features = ["serde"] }
arrow-schema = { version = "47.0", features = ["serde"] }
async-stream = "0.3" async-stream = "0.3"
async-trait = "0.1" async-trait = "0.1"
base64 = "0.21"
bigdecimal = "0.4.2"
bitflags = "2.4.1"
bytemuck = "1.12"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-substrait = { git = "https://github.com/apache/arrow-datafusion.git", rev = "26e43acac3a96cec8dd4c8365f22dfb1a84306e9" } datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
derive_builder = "0.12" derive_builder = "0.12"
etcd-client = "0.12"
fst = "0.4.7"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "b1d403088f02136bcebde53d604f491c260ca8e2" } greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "4a277f27caa035a801d5b9c020a0449777736614" }
humantime-serde = "1.1" humantime-serde = "1.1"
itertools = "0.10" itertools = "0.10"
lazy_static = "1.4" lazy_static = "1.4"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" }
mockall = "0.11.4"
moka = "0.12"
once_cell = "1.18" once_cell = "1.18"
opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [ opentelemetry-proto = { version = "0.2", features = ["gen-tonic", "metrics"] }
"gen-tonic", parquet = "43.0"
"metrics",
"trace",
] }
parquet = "47.0"
paste = "1.0" paste = "1.0"
pin-project = "1.0" prost = "0.11"
prometheus = { version = "0.13.3", features = ["process"] }
prost = "0.12"
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
rand = "0.8" rand = "0.8"
regex = "1.8" regex = "1.8"
regex-automata = { version = "0.1", features = ["transducer"] }
reqwest = { version = "0.11", default-features = false, features = [
"json",
"rustls-tls-native-roots",
"stream",
] }
rust_decimal = "1.33"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
smallvec = "1" snafu = { version = "0.7", features = ["backtraces"] }
snafu = "0.7" sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "296a4f6c73b129d6f565a42a2e5e53c6bc2b9da4", features = [
# on branch v0.38.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [
"visitor", "visitor",
] } ] }
strum = { version = "0.25", features = ["derive"] } strum = { version = "0.25", features = ["derive"] }
@@ -129,9 +100,10 @@ tempfile = "3"
tokio = { version = "1.28", features = ["full"] } tokio = { version = "1.28", features = ["full"] }
tokio-util = { version = "0.7", features = ["io-util", "compat"] } tokio-util = { version = "0.7", features = ["io-util", "compat"] }
toml = "0.7" toml = "0.7"
tonic = { version = "0.10", features = ["tls"] } tonic = { version = "0.9", features = ["tls"] }
uuid = { version = "1", features = ["serde", "v4", "fast-rng"] } uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }
metrics = "0.20"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" }
## workspaces members ## workspaces members
api = { path = "src/api" } api = { path = "src/api" }
auth = { path = "src/auth" } auth = { path = "src/auth" }
@@ -140,30 +112,29 @@ client = { path = "src/client" }
cmd = { path = "src/cmd" } cmd = { path = "src/cmd" }
common-base = { path = "src/common/base" } common-base = { path = "src/common/base" }
common-catalog = { path = "src/common/catalog" } common-catalog = { path = "src/common/catalog" }
common-config = { path = "src/common/config" }
common-datasource = { path = "src/common/datasource" } common-datasource = { path = "src/common/datasource" }
common-decimal = { path = "src/common/decimal" }
common-error = { path = "src/common/error" } common-error = { path = "src/common/error" }
common-function = { path = "src/common/function" } common-function = { path = "src/common/function" }
common-function-macro = { path = "src/common/function-macro" }
common-greptimedb-telemetry = { path = "src/common/greptimedb-telemetry" } common-greptimedb-telemetry = { path = "src/common/greptimedb-telemetry" }
common-grpc = { path = "src/common/grpc" } common-grpc = { path = "src/common/grpc" }
common-grpc-expr = { path = "src/common/grpc-expr" } common-grpc-expr = { path = "src/common/grpc-expr" }
common-macro = { path = "src/common/macro" }
common-mem-prof = { path = "src/common/mem-prof" } common-mem-prof = { path = "src/common/mem-prof" }
common-meta = { path = "src/common/meta" } common-meta = { path = "src/common/meta" }
common-pprof = { path = "src/common/pprof" }
common-procedure = { path = "src/common/procedure" } common-procedure = { path = "src/common/procedure" }
common-procedure-test = { path = "src/common/procedure-test" } common-procedure-test = { path = "src/common/procedure-test" }
common-pprof = { path = "src/common/pprof" }
common-query = { path = "src/common/query" } common-query = { path = "src/common/query" }
common-recordbatch = { path = "src/common/recordbatch" } common-recordbatch = { path = "src/common/recordbatch" }
common-runtime = { path = "src/common/runtime" } common-runtime = { path = "src/common/runtime" }
substrait = { path = "src/common/substrait" }
common-telemetry = { path = "src/common/telemetry" } common-telemetry = { path = "src/common/telemetry" }
common-test-util = { path = "src/common/test-util" } common-test-util = { path = "src/common/test-util" }
common-time = { path = "src/common/time" } common-time = { path = "src/common/time" }
common-version = { path = "src/common/version" } common-version = { path = "src/common/version" }
datanode = { path = "src/datanode" } datanode = { path = "src/datanode" }
datatypes = { path = "src/datatypes" } datatypes = { path = "src/datatypes" }
file-engine = { path = "src/file-engine" } file-table-engine = { path = "src/file-table-engine" }
frontend = { path = "src/frontend" } frontend = { path = "src/frontend" }
log-store = { path = "src/log-store" } log-store = { path = "src/log-store" }
meta-client = { path = "src/meta-client" } meta-client = { path = "src/meta-client" }
@@ -171,18 +142,17 @@ meta-srv = { path = "src/meta-srv" }
mito = { path = "src/mito" } mito = { path = "src/mito" }
mito2 = { path = "src/mito2" } mito2 = { path = "src/mito2" }
object-store = { path = "src/object-store" } object-store = { path = "src/object-store" }
operator = { path = "src/operator" }
partition = { path = "src/partition" } partition = { path = "src/partition" }
plugins = { path = "src/plugins" }
promql = { path = "src/promql" } promql = { path = "src/promql" }
query = { path = "src/query" } query = { path = "src/query" }
script = { path = "src/script" } script = { path = "src/script" }
servers = { path = "src/servers" } servers = { path = "src/servers" }
session = { path = "src/session" } session = { path = "src/session" }
sql = { path = "src/sql" } sql = { path = "src/sql" }
storage = { path = "src/storage" }
store-api = { path = "src/store-api" } store-api = { path = "src/store-api" }
substrait = { path = "src/common/substrait" }
table = { path = "src/table" } table = { path = "src/table" }
table-procedure = { path = "src/table-procedure" }
[workspace.dependencies.meter-macros] [workspace.dependencies.meter-macros]
git = "https://github.com/GreptimeTeam/greptime-meter.git" git = "https://github.com/GreptimeTeam/greptime-meter.git"

View File

@@ -186,7 +186,7 @@
same "printed page" as the copyright notice for easier same "printed page" as the copyright notice for easier
identification within third-party archives. identification within third-party archives.
Copyright [yyyy] [name of copyright owner] Copyright 2022 Greptime Team
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.

View File

@@ -55,15 +55,11 @@ else
BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
endif endif
ifneq ($(strip $(CARGO_BUILD_EXTRA_OPTS)),)
CARGO_BUILD_OPTS += ${CARGO_BUILD_EXTRA_OPTS}
endif
##@ Build ##@ Build
.PHONY: build .PHONY: build
build: ## Build debug version greptime. build: ## Build debug version greptime.
cargo ${CARGO_EXTENSION} build ${CARGO_BUILD_OPTS} cargo build ${CARGO_BUILD_OPTS}
.POHNY: build-by-dev-builder .POHNY: build-by-dev-builder
build-by-dev-builder: ## Build greptime by dev-builder. build-by-dev-builder: ## Build greptime by dev-builder.
@@ -71,34 +67,11 @@ build-by-dev-builder: ## Build greptime by dev-builder.
-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \ -v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \ -w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \
make build \ make build \
CARGO_EXTENSION="${CARGO_EXTENSION}" \
CARGO_PROFILE=${CARGO_PROFILE} \ CARGO_PROFILE=${CARGO_PROFILE} \
FEATURES=${FEATURES} \ FEATURES=${FEATURES} \
TARGET_DIR=${TARGET_DIR} \ TARGET_DIR=${TARGET_DIR} \
TARGET=${TARGET} \ TARGET=${TARGET} \
RELEASE=${RELEASE} \ RELEASE=${RELEASE}
CARGO_BUILD_EXTRA_OPTS="${CARGO_BUILD_EXTRA_OPTS}"
.PHONY: build-android-bin
build-android-bin: ## Build greptime binary for android.
docker run --network=host \
-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-android:latest \
make build \
CARGO_EXTENSION="ndk --platform 23 -t aarch64-linux-android" \
CARGO_PROFILE=release \
FEATURES="${FEATURES}" \
TARGET_DIR="${TARGET_DIR}" \
TARGET="${TARGET}" \
RELEASE="${RELEASE}" \
CARGO_BUILD_EXTRA_OPTS="--bin greptime --no-default-features"
.PHONY: strip-android-bin
strip-android-bin: build-android-bin ## Strip greptime binary for android.
docker run --network=host \
-v ${PWD}:/greptimedb \
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-android:latest \
bash -c '$${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-strip /greptimedb/target/aarch64-linux-android/release/greptime'
.PHONY: clean .PHONY: clean
clean: ## Clean the project. clean: ## Clean the project.
@@ -157,11 +130,11 @@ sqlness-test: ## Run sqlness test.
.PHONY: check .PHONY: check
check: ## Cargo check all the targets. check: ## Cargo check all the targets.
cargo check --workspace --all-targets --all-features cargo check --workspace --all-targets
.PHONY: clippy .PHONY: clippy
clippy: ## Check clippy rules. clippy: ## Check clippy rules.
cargo clippy --workspace --all-targets --all-features -- -D warnings cargo clippy --workspace --all-targets -F pyo3_backend -- -D warnings
.PHONY: fmt-check .PHONY: fmt-check
fmt-check: ## Check code format. fmt-check: ## Check code format.

View File

@@ -96,11 +96,11 @@ Or if you built from docker:
docker run -p 4002:4002 -v "$(pwd):/tmp/greptimedb" greptime/greptimedb standalone start docker run -p 4002:4002 -v "$(pwd):/tmp/greptimedb" greptime/greptimedb standalone start
``` ```
Please see the online document site for more installation options and [operations info](https://docs.greptime.com/user-guide/operations/overview). Please see [the online document site](https://docs.greptime.com/getting-started/overview#install-greptimedb) for more installation options and [operations info](https://docs.greptime.com/user-guide/operations/overview).
### Get started ### Get started
Read the [complete getting started guide](https://docs.greptime.com/getting-started/overview) on our [official document site](https://docs.greptime.com/). Read the [complete getting started guide](https://docs.greptime.com/getting-started/overview#connect) on our [official document site](https://docs.greptime.com/).
To write and query data, GreptimeDB is compatible with multiple [protocols and clients](https://docs.greptime.com/user-guide/clients/overview). To write and query data, GreptimeDB is compatible with multiple [protocols and clients](https://docs.greptime.com/user-guide/clients/overview).
@@ -135,7 +135,6 @@ To write and query data, GreptimeDB is compatible with multiple [protocols and c
- [GreptimeDB Java Client](https://github.com/GreptimeTeam/greptimedb-client-java) - [GreptimeDB Java Client](https://github.com/GreptimeTeam/greptimedb-client-java)
- [GreptimeDB Python Client](https://github.com/GreptimeTeam/greptimedb-client-py) (WIP) - [GreptimeDB Python Client](https://github.com/GreptimeTeam/greptimedb-client-py) (WIP)
- [GreptimeDB Rust Client](https://github.com/GreptimeTeam/greptimedb-client-rust) - [GreptimeDB Rust Client](https://github.com/GreptimeTeam/greptimedb-client-rust)
- [GreptimeDB JavaScript Client](https://github.com/GreptimeTeam/greptime-js-sdk)
## Project Status ## Project Status
@@ -177,6 +176,6 @@ Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.
## Acknowledgement ## Acknowledgement
- GreptimeDB uses [Apache Arrow](https://arrow.apache.org/) as the memory model and [Apache Parquet](https://parquet.apache.org/) as the persistent file format. - GreptimeDB uses [Apache Arrow](https://arrow.apache.org/) as the memory model and [Apache Parquet](https://parquet.apache.org/) as the persistent file format.
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion). - GreptimeDB's query engine is powered by [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion).
- [Apache OpenDAL (incubating)](https://opendal.apache.org) gives GreptimeDB a very general and elegant data access abstraction layer. - [OpenDAL](https://github.com/datafuselabs/opendal) from [Datafuse Labs](https://github.com/datafuselabs) gives GreptimeDB a very general and elegant data access abstraction layer.
- GreptimeDB's meta service is based on [etcd](https://etcd.io/). - GreptimeDBs meta service is based on [etcd](https://etcd.io/).
- GreptimeDB uses [RustPython](https://github.com/RustPython/RustPython) for experimental embedded python scripting. - GreptimeDB uses [RustPython](https://github.com/RustPython/RustPython) for experimental embedded python scripting.

View File

@@ -6,10 +6,8 @@ license.workspace = true
[dependencies] [dependencies]
arrow.workspace = true arrow.workspace = true
chrono.workspace = true
clap = { version = "4.0", features = ["derive"] } clap = { version = "4.0", features = ["derive"] }
client.workspace = true client = { workspace = true }
futures-util.workspace = true
indicatif = "0.17.1" indicatif = "0.17.1"
itertools.workspace = true itertools.workspace = true
parquet.workspace = true parquet.workspace = true

View File

@@ -27,16 +27,16 @@ use arrow::record_batch::RecordBatch;
use clap::Parser; use clap::Parser;
use client::api::v1::column::Values; use client::api::v1::column::Values;
use client::api::v1::{ use client::api::v1::{
Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertRequest, InsertRequests, SemanticType, Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertRequest, InsertRequests,
}; };
use client::{Client, Database, Output, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use futures_util::TryStreamExt;
use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use tokio::task::JoinSet; use tokio::task::JoinSet;
const CATALOG_NAME: &str = "greptime"; const CATALOG_NAME: &str = "greptime";
const SCHEMA_NAME: &str = "public"; const SCHEMA_NAME: &str = "public";
const TABLE_NAME: &str = "nyc_taxi";
#[derive(Parser)] #[derive(Parser)]
#[command(name = "NYC benchmark runner")] #[command(name = "NYC benchmark runner")]
@@ -74,12 +74,7 @@ fn get_file_list<P: AsRef<Path>>(path: P) -> Vec<PathBuf> {
.collect() .collect()
} }
fn new_table_name() -> String {
format!("nyc_taxi_{}", chrono::Utc::now().timestamp())
}
async fn write_data( async fn write_data(
table_name: &str,
batch_size: usize, batch_size: usize,
db: &Database, db: &Database,
path: PathBuf, path: PathBuf,
@@ -109,7 +104,8 @@ async fn write_data(
} }
let (columns, row_count) = convert_record_batch(record_batch); let (columns, row_count) = convert_record_batch(record_batch);
let request = InsertRequest { let request = InsertRequest {
table_name: table_name.to_string(), table_name: TABLE_NAME.to_string(),
region_number: 0,
columns, columns,
row_count, row_count,
}; };
@@ -118,7 +114,7 @@ async fn write_data(
}; };
let now = Instant::now(); let now = Instant::now();
db.insert(requests).await.unwrap(); let _ = db.insert(requests).await.unwrap();
let elapsed = now.elapsed(); let elapsed = now.elapsed();
total_rpc_elapsed_ms += elapsed.as_millis(); total_rpc_elapsed_ms += elapsed.as_millis();
progress_bar.inc(row_count as _); progress_bar.inc(row_count as _);
@@ -136,11 +132,6 @@ fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
for (array, field) in record_batch.columns().iter().zip(fields.iter()) { for (array, field) in record_batch.columns().iter().zip(fields.iter()) {
let (values, datatype) = build_values(array); let (values, datatype) = build_values(array);
let semantic_type = match field.name().as_str() {
"VendorID" => SemanticType::Tag,
"tpep_pickup_datetime" => SemanticType::Timestamp,
_ => SemanticType::Field,
};
let column = Column { let column = Column {
column_name: field.name().clone(), column_name: field.name().clone(),
@@ -151,7 +142,7 @@ fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
.map(|bitmap| bitmap.buffer().as_slice().to_vec()) .map(|bitmap| bitmap.buffer().as_slice().to_vec())
.unwrap_or_default(), .unwrap_or_default(),
datatype: datatype.into(), datatype: datatype.into(),
semantic_type: semantic_type as i32, // datatype and semantic_type are set to default
..Default::default() ..Default::default()
}; };
columns.push(column); columns.push(column);
@@ -198,7 +189,7 @@ fn build_values(column: &ArrayRef) -> (Values, ColumnDataType) {
let values = array.values(); let values = array.values();
( (
Values { Values {
timestamp_microsecond_values: values.to_vec(), ts_microsecond_values: values.to_vec(),
..Default::default() ..Default::default()
}, },
ColumnDataType::TimestampMicrosecond, ColumnDataType::TimestampMicrosecond,
@@ -253,212 +244,156 @@ fn is_record_batch_full(batch: &RecordBatch) -> bool {
batch.columns().iter().all(|col| col.null_count() == 0) batch.columns().iter().all(|col| col.null_count() == 0)
} }
fn create_table_expr(table_name: &str) -> CreateTableExpr { fn create_table_expr() -> CreateTableExpr {
CreateTableExpr { CreateTableExpr {
catalog_name: CATALOG_NAME.to_string(), catalog_name: CATALOG_NAME.to_string(),
schema_name: SCHEMA_NAME.to_string(), schema_name: SCHEMA_NAME.to_string(),
table_name: table_name.to_string(), table_name: TABLE_NAME.to_string(),
desc: "".to_string(), desc: "".to_string(),
column_defs: vec![ column_defs: vec![
ColumnDef { ColumnDef {
name: "VendorID".to_string(), name: "VendorID".to_string(),
data_type: ColumnDataType::Int64 as i32, datatype: ColumnDataType::Int64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Tag as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "tpep_pickup_datetime".to_string(), name: "tpep_pickup_datetime".to_string(),
data_type: ColumnDataType::TimestampMicrosecond as i32, datatype: ColumnDataType::TimestampMicrosecond as i32,
is_nullable: false, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Timestamp as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "tpep_dropoff_datetime".to_string(), name: "tpep_dropoff_datetime".to_string(),
data_type: ColumnDataType::TimestampMicrosecond as i32, datatype: ColumnDataType::TimestampMicrosecond as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "passenger_count".to_string(), name: "passenger_count".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "trip_distance".to_string(), name: "trip_distance".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "RatecodeID".to_string(), name: "RatecodeID".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "store_and_fwd_flag".to_string(), name: "store_and_fwd_flag".to_string(),
data_type: ColumnDataType::String as i32, datatype: ColumnDataType::String as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "PULocationID".to_string(), name: "PULocationID".to_string(),
data_type: ColumnDataType::Int64 as i32, datatype: ColumnDataType::Int64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "DOLocationID".to_string(), name: "DOLocationID".to_string(),
data_type: ColumnDataType::Int64 as i32, datatype: ColumnDataType::Int64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "payment_type".to_string(), name: "payment_type".to_string(),
data_type: ColumnDataType::Int64 as i32, datatype: ColumnDataType::Int64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "fare_amount".to_string(), name: "fare_amount".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "extra".to_string(), name: "extra".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "mta_tax".to_string(), name: "mta_tax".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "tip_amount".to_string(), name: "tip_amount".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "tolls_amount".to_string(), name: "tolls_amount".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "improvement_surcharge".to_string(), name: "improvement_surcharge".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "total_amount".to_string(), name: "total_amount".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "congestion_surcharge".to_string(), name: "congestion_surcharge".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "airport_fee".to_string(), name: "airport_fee".to_string(),
data_type: ColumnDataType::Float64 as i32, datatype: ColumnDataType::Float64 as i32,
is_nullable: true, is_nullable: true,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
], ],
time_index: "tpep_pickup_datetime".to_string(), time_index: "tpep_pickup_datetime".to_string(),
primary_keys: vec!["VendorID".to_string()], primary_keys: vec!["VendorID".to_string()],
create_if_not_exists: true, create_if_not_exists: false,
table_options: Default::default(), table_options: Default::default(),
region_numbers: vec![0],
table_id: None, table_id: None,
engine: "mito".to_string(), engine: "mito".to_string(),
} }
} }
fn query_set(table_name: &str) -> HashMap<String, String> { fn query_set() -> HashMap<String, String> {
HashMap::from([ HashMap::from([
( (
"count_all".to_string(), "count_all".to_string(),
format!("SELECT COUNT(*) FROM {table_name};"), format!("SELECT COUNT(*) FROM {TABLE_NAME};"),
), ),
( (
"fare_amt_by_passenger".to_string(), "fare_amt_by_passenger".to_string(),
format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {table_name} GROUP BY passenger_count"), format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {TABLE_NAME} GROUP BY passenger_count"),
) )
]) ])
} }
async fn do_write(args: &Args, db: &Database, table_name: &str) { async fn do_write(args: &Args, db: &Database) {
let mut file_list = get_file_list(args.path.clone().expect("Specify data path in argument")); let mut file_list = get_file_list(args.path.clone().expect("Specify data path in argument"));
let mut write_jobs = JoinSet::new(); let mut write_jobs = JoinSet::new();
let create_table_result = db.create(create_table_expr(table_name)).await; let create_table_result = db.create(create_table_expr()).await;
println!("Create table result: {create_table_result:?}"); println!("Create table result: {create_table_result:?}");
let progress_bar_style = ProgressStyle::with_template( let progress_bar_style = ProgressStyle::with_template(
@@ -476,10 +411,8 @@ async fn do_write(args: &Args, db: &Database, table_name: &str) {
let db = db.clone(); let db = db.clone();
let mpb = multi_progress_bar.clone(); let mpb = multi_progress_bar.clone();
let pb_style = progress_bar_style.clone(); let pb_style = progress_bar_style.clone();
let table_name = table_name.to_string(); let _ = write_jobs
let _ = write_jobs.spawn(async move { .spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
write_data(&table_name, batch_size, &db, path, mpb, pb_style).await
});
} }
} }
while write_jobs.join_next().await.is_some() { while write_jobs.join_next().await.is_some() {
@@ -488,32 +421,24 @@ async fn do_write(args: &Args, db: &Database, table_name: &str) {
let db = db.clone(); let db = db.clone();
let mpb = multi_progress_bar.clone(); let mpb = multi_progress_bar.clone();
let pb_style = progress_bar_style.clone(); let pb_style = progress_bar_style.clone();
let table_name = table_name.to_string(); let _ = write_jobs
let _ = write_jobs.spawn(async move { .spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
write_data(&table_name, batch_size, &db, path, mpb, pb_style).await
});
} }
} }
} }
async fn do_query(num_iter: usize, db: &Database, table_name: &str) { async fn do_query(num_iter: usize, db: &Database) {
for (query_name, query) in query_set(table_name) { for (query_name, query) in query_set() {
println!("Running query: {query}"); println!("Running query: {query}");
for i in 0..num_iter { for i in 0..num_iter {
let now = Instant::now(); let now = Instant::now();
let res = db.sql(&query).await.unwrap(); let _res = db.sql(&query).await.unwrap();
match res {
Output::AffectedRows(_) | Output::RecordBatches(_) => (),
Output::Stream(stream) => {
stream.try_collect::<Vec<_>>().await.unwrap();
}
}
let elapsed = now.elapsed(); let elapsed = now.elapsed();
println!( println!(
"query {}, iteration {}: {}ms", "query {}, iteration {}: {}ms",
query_name, query_name,
i, i,
elapsed.as_millis(), elapsed.as_millis()
); );
} }
} }
@@ -530,14 +455,13 @@ fn main() {
.block_on(async { .block_on(async {
let client = Client::with_urls(vec![&args.endpoint]); let client = Client::with_urls(vec![&args.endpoint]);
let db = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client); let db = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
let table_name = new_table_name();
if !args.skip_write { if !args.skip_write {
do_write(&args, &db, &table_name).await; do_write(&args, &db).await;
} }
if !args.skip_read { if !args.skip_read {
do_query(args.iter_num, &db, &table_name).await; do_query(args.iter_num, &db).await;
} }
}) })
} }

View File

@@ -1,5 +1,7 @@
# Node running mode, see `standalone.example.toml`. # Node running mode, see `standalone.example.toml`.
mode = "distributed" mode = "distributed"
# Whether to use in-memory catalog, see `standalone.example.toml`.
enable_memory_catalog = false
# The datanode identifier, should be unique. # The datanode identifier, should be unique.
node_id = 42 node_id = 42
# gRPC server address, "127.0.0.1:3001" by default. # gRPC server address, "127.0.0.1:3001" by default.
@@ -8,24 +10,19 @@ rpc_addr = "127.0.0.1:3001"
rpc_hostname = "127.0.0.1" rpc_hostname = "127.0.0.1"
# The number of gRPC server worker threads, 8 by default. # The number of gRPC server worker threads, 8 by default.
rpc_runtime_size = 8 rpc_runtime_size = 8
# Start services after regions have obtained leases.
# It will block the datanode start if it can't receive leases in the heartbeat from metasrv.
require_lease_before_startup = false
[heartbeat] [heartbeat]
# Interval for sending heartbeat messages to the Metasrv, 3 seconds by default. # Interval for sending heartbeat messages to the Metasrv in milliseconds, 5000 by default.
interval = "3s" interval_millis = 5000
# Metasrv client options. # Metasrv client options.
[meta_client] [meta_client_options]
# Metasrv address list. # Metasrv address list.
metasrv_addrs = ["127.0.0.1:3002"] metasrv_addrs = ["127.0.0.1:3002"]
# Heartbeat timeout, 500 milliseconds by default. # Operation timeout in milliseconds, 3000 by default.
heartbeat_timeout = "500ms" timeout_millis = 3000
# Operation timeout, 3 seconds by default. # Connect server timeout in milliseconds, 5000 by default.
timeout = "3s" connect_timeout_millis = 5000
# Connect server timeout, 1 second by default.
connect_timeout = "1s"
# `TCP_NODELAY` option for accepted connections, true by default. # `TCP_NODELAY` option for accepted connections, true by default.
tcp_nodelay = true tcp_nodelay = true
@@ -47,43 +44,41 @@ type = "File"
# TTL for all tables. Disabled by default. # TTL for all tables. Disabled by default.
# global_ttl = "7d" # global_ttl = "7d"
# Cache configuration for object storage such as 'S3' etc. # Compaction options, see `standalone.example.toml`.
# The local file cache directory [storage.compaction]
# cache_path = "/path/local_cache" max_inflight_tasks = 4
# The local file cache capacity in bytes. max_files_in_level0 = 8
# cache_capacity = "256MB" max_purge_tasks = 32
# Mito engine options # Storage manifest options
[[region_engine]] [storage.manifest]
[region_engine.mito] # Region checkpoint actions margin.
# Number of region workers # Create a checkpoint every <checkpoint_margin> actions.
num_workers = 8 checkpoint_margin = 10
# Request channel size of each worker # Region manifest logs and checkpoints gc execution duration
worker_channel_size = 128 gc_duration = '10m'
# Max batch size for a worker to handle requests
worker_request_batch_size = 64 # Storage flush options
# Number of meta action updated to trigger a new checkpoint for the manifest [storage.flush]
manifest_checkpoint_distance = 10 # Max inflight flush tasks.
# Whether to compress manifest and checkpoint file by gzip (default false). max_flush_tasks = 8
compress_manifest = false # Default write buffer size for a region.
# Max number of running background jobs region_write_buffer_size = "32MB"
max_background_jobs = 4 # Interval to check whether a region needs flush.
picker_schedule_interval = "5m"
# Interval to auto flush a region if it has not flushed yet. # Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h" auto_flush_interval = "1h"
# Global write buffer size for all regions. # Global write buffer size for all regions.
global_write_buffer_size = "1GB" global_write_buffer_size = "1GB"
# Global write buffer size threshold to reject write requests (default 2G).
global_write_buffer_reject_size = "2GB"
# Cache size for SST metadata (default 128MB). Setting it to 0 to disable the cache.
sst_meta_cache_size = "128MB"
# Cache size for vectors and arrow arrays (default 512MB). Setting it to 0 to disable the cache.
vector_cache_size = "512MB"
# Cache size for pages of SST row groups (default 512MB). Setting it to 0 to disable the cache.
page_cache_size = "512MB"
# Buffer size for SST writing.
sst_write_buffer_size = "8MB"
# Log options, see `standalone.example.toml` # Procedure storage options, see `standalone.example.toml`.
[procedure]
max_retry_times = 3
retry_delay = "500ms"
# Log options
# [logging] # [logging]
# Specify logs directory.
# dir = "/tmp/greptimedb/logs" # dir = "/tmp/greptimedb/logs"
# Specify the log level [info | debug | error | warn]
# level = "info" # level = "info"

View File

@@ -2,67 +2,64 @@
mode = "distributed" mode = "distributed"
[heartbeat] [heartbeat]
# Interval for sending heartbeat task to the Metasrv, 5 seconds by default. # Interval for sending heartbeat task to the Metasrv in milliseconds, 5000 by default.
interval = "5s" interval_millis = 5000
# Interval for retry sending heartbeat task, 5 seconds by default. # Interval for retry sending heartbeat task in milliseconds, 5000 by default.
retry_interval = "5s" retry_interval_millis = 5000
# HTTP server options, see `standalone.example.toml`. # HTTP server options, see `standalone.example.toml`.
[http] [http_options]
addr = "127.0.0.1:4000" addr = "127.0.0.1:4000"
timeout = "30s" timeout = "30s"
body_limit = "64MB" body_limit = "64MB"
# gRPC server options, see `standalone.example.toml`. # gRPC server options, see `standalone.example.toml`.
[grpc] [grpc_options]
addr = "127.0.0.1:4001" addr = "127.0.0.1:4001"
runtime_size = 8 runtime_size = 8
# MySQL server options, see `standalone.example.toml`. # MySQL server options, see `standalone.example.toml`.
[mysql] [mysql_options]
enable = true
addr = "127.0.0.1:4002" addr = "127.0.0.1:4002"
runtime_size = 2 runtime_size = 2
# MySQL server TLS options, see `standalone.example.toml`. # MySQL server TLS options, see `standalone.example.toml`.
[mysql.tls] [mysql_options.tls]
mode = "disable" mode = "disable"
cert_path = "" cert_path = ""
key_path = "" key_path = ""
# PostgresSQL server options, see `standalone.example.toml`. # PostgresSQL server options, see `standalone.example.toml`.
[postgres] [postgres_options]
enable = true
addr = "127.0.0.1:4003" addr = "127.0.0.1:4003"
runtime_size = 2 runtime_size = 2
# PostgresSQL server TLS options, see `standalone.example.toml`. # PostgresSQL server TLS options, see `standalone.example.toml`.
[postgres.tls] [postgres_options.tls]
mode = "disable" mode = "disable"
cert_path = "" cert_path = ""
key_path = "" key_path = ""
# OpenTSDB protocol options, see `standalone.example.toml`. # OpenTSDB protocol options, see `standalone.example.toml`.
[opentsdb] [opentsdb_options]
enable = true
addr = "127.0.0.1:4242" addr = "127.0.0.1:4242"
runtime_size = 2 runtime_size = 2
# InfluxDB protocol options, see `standalone.example.toml`. # InfluxDB protocol options, see `standalone.example.toml`.
[influxdb] [influxdb_options]
enable = true enable = true
# Prometheus remote storage options, see `standalone.example.toml`. # Prometheus remote storage options, see `standalone.example.toml`.
[prom_store] [prom_store_options]
enable = true enable = true
# Metasrv client options, see `datanode.example.toml`. # Metasrv client options, see `datanode.example.toml`.
[meta_client] [meta_client_options]
metasrv_addrs = ["127.0.0.1:3002"] metasrv_addrs = ["127.0.0.1:3002"]
timeout = "3s" timeout_millis = 3000
# DDL timeouts options. # DDL timeouts options.
ddl_timeout = "10s" ddl_timeout_millis = 10000
connect_timeout = "1s" connect_timeout_millis = 5000
tcp_nodelay = true tcp_nodelay = true
# Log options, see `standalone.example.toml` # Log options, see `standalone.example.toml`

View File

@@ -6,6 +6,8 @@ bind_addr = "127.0.0.1:3002"
server_addr = "127.0.0.1:3002" server_addr = "127.0.0.1:3002"
# Etcd server address, "127.0.0.1:2379" by default. # Etcd server address, "127.0.0.1:2379" by default.
store_addr = "127.0.0.1:2379" store_addr = "127.0.0.1:2379"
# Datanode lease in seconds, 15 seconds by default.
datanode_lease_secs = 15
# Datanode selector type. # Datanode selector type.
# - "LeaseBased" (default value). # - "LeaseBased" (default value).
# - "LoadBased" # - "LoadBased"
@@ -28,17 +30,10 @@ max_retry_times = 12
# Initial retry delay of procedures, increases exponentially # Initial retry delay of procedures, increases exponentially
retry_delay = "500ms" retry_delay = "500ms"
# Failure detectors options.
[failure_detector]
threshold = 8.0
min_std_deviation = "100ms"
acceptable_heartbeat_pause = "3000ms"
first_heartbeat_estimate = "1000ms"
# # Datanode options. # # Datanode options.
# [datanode] # [datanode]
# # Datanode client options. # # Datanode client options.
# [datanode.client_options] # [datanode.client_options]
# timeout = "10s" # timeout_millis = 10000
# connect_timeout = "10s" # connect_timeout_millis = 10000
# tcp_nodelay = true # tcp_nodelay = true

View File

@@ -1,10 +1,12 @@
# Node running mode, "standalone" or "distributed". # Node running mode, "standalone" or "distributed".
mode = "standalone" mode = "standalone"
# Whether to use in-memory catalog, `false` by default.
enable_memory_catalog = false
# Whether to enable greptimedb telemetry, true by default. # Whether to enable greptimedb telemetry, true by default.
enable_telemetry = true enable_telemetry = true
# HTTP server options. # HTTP server options.
[http] [http_options]
# Server address, "127.0.0.1:4000" by default. # Server address, "127.0.0.1:4000" by default.
addr = "127.0.0.1:4000" addr = "127.0.0.1:4000"
# HTTP request timeout, 30s by default. # HTTP request timeout, 30s by default.
@@ -14,23 +16,21 @@ timeout = "30s"
body_limit = "64MB" body_limit = "64MB"
# gRPC server options. # gRPC server options.
[grpc] [grpc_options]
# Server address, "127.0.0.1:4001" by default. # Server address, "127.0.0.1:4001" by default.
addr = "127.0.0.1:4001" addr = "127.0.0.1:4001"
# The number of server worker threads, 8 by default. # The number of server worker threads, 8 by default.
runtime_size = 8 runtime_size = 8
# MySQL server options. # MySQL server options.
[mysql] [mysql_options]
# Whether to enable
enable = true
# Server address, "127.0.0.1:4002" by default. # Server address, "127.0.0.1:4002" by default.
addr = "127.0.0.1:4002" addr = "127.0.0.1:4002"
# The number of server worker threads, 2 by default. # The number of server worker threads, 2 by default.
runtime_size = 2 runtime_size = 2
# MySQL server TLS options. # MySQL server TLS options.
[mysql.tls] [mysql_options.tls]
# TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html # TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html
# - "disable" (default value) # - "disable" (default value)
# - "prefer" # - "prefer"
@@ -44,16 +44,14 @@ cert_path = ""
key_path = "" key_path = ""
# PostgresSQL server options. # PostgresSQL server options.
[postgres] [postgres_options]
# Whether to enable
enable = true
# Server address, "127.0.0.1:4003" by default. # Server address, "127.0.0.1:4003" by default.
addr = "127.0.0.1:4003" addr = "127.0.0.1:4003"
# The number of server worker threads, 2 by default. # The number of server worker threads, 2 by default.
runtime_size = 2 runtime_size = 2
# PostgresSQL server TLS options, see `[mysql_options.tls]` section. # PostgresSQL server TLS options, see `[mysql_options.tls]` section.
[postgres.tls] [postgres_options.tls]
# TLS mode. # TLS mode.
mode = "disable" mode = "disable"
# certificate file path. # certificate file path.
@@ -62,21 +60,19 @@ cert_path = ""
key_path = "" key_path = ""
# OpenTSDB protocol options. # OpenTSDB protocol options.
[opentsdb] [opentsdb_options]
# Whether to enable
enable = true
# OpenTSDB telnet API server address, "127.0.0.1:4242" by default. # OpenTSDB telnet API server address, "127.0.0.1:4242" by default.
addr = "127.0.0.1:4242" addr = "127.0.0.1:4242"
# The number of server worker threads, 2 by default. # The number of server worker threads, 2 by default.
runtime_size = 2 runtime_size = 2
# InfluxDB protocol options. # InfluxDB protocol options.
[influxdb] [influxdb_options]
# Whether to enable InfluxDB protocol in HTTP API, true by default. # Whether to enable InfluxDB protocol in HTTP API, true by default.
enable = true enable = true
# Prometheus remote storage options # Prometheus remote storage options
[prom_store] [prom_store_options]
# Whether to enable Prometheus remote write and read in HTTP API, true by default. # Whether to enable Prometheus remote write and read in HTTP API, true by default.
enable = true enable = true
@@ -95,20 +91,6 @@ read_batch_size = 128
# Whether to sync log file after every write. # Whether to sync log file after every write.
sync_write = false sync_write = false
# Metadata storage options.
[metadata_store]
# Kv file size in bytes.
file_size = "256MB"
# Kv purge threshold.
purge_threshold = "4GB"
# Procedure storage options.
[procedure]
# Procedure max retry time.
max_retry_times = 3
# Initial retry delay of procedures, increases exponentially
retry_delay = "500ms"
# Storage options. # Storage options.
[storage] [storage]
# The working home directory. # The working home directory.
@@ -117,40 +99,43 @@ data_home = "/tmp/greptimedb/"
type = "File" type = "File"
# TTL for all tables. Disabled by default. # TTL for all tables. Disabled by default.
# global_ttl = "7d" # global_ttl = "7d"
# Cache configuration for object storage such as 'S3' etc.
# cache_path = "/path/local_cache"
# The local file cache capacity in bytes.
# cache_capacity = "256MB"
# Mito engine options # Compaction options.
[[region_engine]] [storage.compaction]
[region_engine.mito] # Max task number that can concurrently run.
# Number of region workers max_inflight_tasks = 4
num_workers = 8 # Max files in level 0 to trigger compaction.
# Request channel size of each worker max_files_in_level0 = 8
worker_channel_size = 128 # Max task number for SST purge task after compaction.
# Max batch size for a worker to handle requests max_purge_tasks = 32
worker_request_batch_size = 64
# Number of meta action updated to trigger a new checkpoint for the manifest # Storage manifest options
manifest_checkpoint_distance = 10 [storage.manifest]
# Whether to compress manifest and checkpoint file by gzip (default false). # Region checkpoint actions margin.
compress_manifest = false # Create a checkpoint every <checkpoint_margin> actions.
# Max number of running background jobs checkpoint_margin = 10
max_background_jobs = 4 # Region manifest logs and checkpoints gc execution duration
gc_duration = '10m'
# Storage flush options
[storage.flush]
# Max inflight flush tasks.
max_flush_tasks = 8
# Default write buffer size for a region.
region_write_buffer_size = "32MB"
# Interval to check whether a region needs flush.
picker_schedule_interval = "5m"
# Interval to auto flush a region if it has not flushed yet. # Interval to auto flush a region if it has not flushed yet.
auto_flush_interval = "1h" auto_flush_interval = "1h"
# Global write buffer size for all regions. # Global write buffer size for all regions.
global_write_buffer_size = "1GB" global_write_buffer_size = "1GB"
# Global write buffer size threshold to reject write requests (default 2G).
global_write_buffer_reject_size = "2GB" # Procedure storage options.
# Cache size for SST metadata (default 128MB). Setting it to 0 to disable the cache. [procedure]
sst_meta_cache_size = "128MB" # Procedure max retry time.
# Cache size for vectors and arrow arrays (default 512MB). Setting it to 0 to disable the cache. max_retry_times = 3
vector_cache_size = "512MB" # Initial retry delay of procedures, increases exponentially
# Cache size for pages of SST row groups (default 512MB). Setting it to 0 to disable the cache. retry_delay = "500ms"
page_cache_size = "512MB"
# Buffer size for SST writing.
sst_write_buffer_size = "8MB"
# Log options # Log options
# [logging] # [logging]
@@ -158,9 +143,3 @@ sst_write_buffer_size = "8MB"
# dir = "/tmp/greptimedb/logs" # dir = "/tmp/greptimedb/logs"
# Specify the log level [info | debug | error | warn] # Specify the log level [info | debug | error | warn]
# level = "info" # level = "info"
# whether enable tracing, default is false
# enable_otlp_tracing = false
# tracing exporter endpoint with format `ip:port`, we use grpc oltp as exporter, default endpoint is `localhost:4317`
# otlp_endpoint = "localhost:4317"
# The percentage of tracing will be sampled and exported. Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. ratio > 1 are treated as 1. Fractions < 0 are treated as 0
# tracing_sample_ratio = 1.0

View File

@@ -1,4 +1,4 @@
FROM ubuntu:20.04 as builder FROM ubuntu:22.04 as builder
ARG CARGO_PROFILE ARG CARGO_PROFILE
ARG FEATURES ARG FEATURES
@@ -7,11 +7,6 @@ ARG OUTPUT_DIR
ENV LANG en_US.utf8 ENV LANG en_US.utf8
WORKDIR /greptimedb WORKDIR /greptimedb
# Add PPA for Python 3.10.
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa -y
# Install dependencies. # Install dependencies.
RUN --mount=type=cache,target=/var/cache/apt \ RUN --mount=type=cache,target=/var/cache/apt \
apt-get update && apt-get install -y \ apt-get update && apt-get install -y \

View File

@@ -1,41 +0,0 @@
FROM --platform=linux/amd64 saschpe/android-ndk:34-jdk17.0.8_7-ndk25.2.9519653-cmake3.22.1
ENV LANG en_US.utf8
WORKDIR /greptimedb
# Rename libunwind to libgcc
RUN cp ${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.7/lib/linux/aarch64/libunwind.a ${NDK_ROOT}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/14.0.7/lib/linux/aarch64/libgcc.a
# Install dependencies.
RUN apt-get update && apt-get install -y \
libssl-dev \
protobuf-compiler \
curl \
git \
build-essential \
pkg-config \
python3 \
python3-dev \
python3-pip \
&& pip3 install --upgrade pip \
&& pip3 install pyarrow
# Trust workdir
RUN git config --global --add safe.directory /greptimedb
# Install Rust.
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /root/.cargo/bin/:$PATH
# Add android toolchains
ARG RUST_TOOLCHAIN
RUN rustup toolchain install ${RUST_TOOLCHAIN}
RUN rustup target add aarch64-linux-android
# Install cargo-ndk
RUN cargo install cargo-ndk
ENV ANDROID_NDK_HOME $NDK_ROOT
# Builder entrypoint.
CMD ["cargo", "ndk", "--platform", "23", "-t", "aarch64-linux-android", "build", "--bin", "greptime", "--profile", "release", "--no-default-features"]

View File

@@ -1,13 +1,8 @@
FROM ubuntu:20.04 FROM ubuntu:22.04
ENV LANG en_US.utf8 ENV LANG en_US.utf8
WORKDIR /greptimedb WORKDIR /greptimedb
# Add PPA for Python 3.10.
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa -y
# Install dependencies. # Install dependencies.
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libssl-dev \ libssl-dev \
@@ -19,13 +14,8 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
build-essential \ build-essential \
pkg-config \ pkg-config \
python3.10 \ python3.10 \
python3.10-dev python3.10-dev \
python3-pip
# Remove Python 3.8 and install pip.
RUN apt-get -y purge python3.8 && \
apt-get -y autoremove && \
ln -s /usr/bin/python3.10 /usr/bin/python3 && \
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
RUN git config --global --add safe.directory /greptimedb RUN git config --global --add safe.directory /greptimedb

View File

@@ -1,47 +0,0 @@
# Use the legacy glibc 2.28.
FROM ubuntu:18.10
ENV LANG en_US.utf8
WORKDIR /greptimedb
# Use old-releases.ubuntu.com to avoid 404s: https://help.ubuntu.com/community/EOLUpgrades.
RUN echo "deb http://old-releases.ubuntu.com/ubuntu/ cosmic main restricted universe multiverse\n\
deb http://old-releases.ubuntu.com/ubuntu/ cosmic-updates main restricted universe multiverse\n\
deb http://old-releases.ubuntu.com/ubuntu/ cosmic-security main restricted universe multiverse" > /etc/apt/sources.list
# Install dependencies.
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libssl-dev \
tzdata \
curl \
ca-certificates \
git \
build-essential \
unzip \
pkg-config
# Install protoc.
ENV PROTOC_VERSION=25.1
RUN if [ "$(uname -m)" = "x86_64" ]; then \
PROTOC_ZIP=protoc-${PROTOC_VERSION}-linux-x86_64.zip; \
elif [ "$(uname -m)" = "aarch64" ]; then \
PROTOC_ZIP=protoc-${PROTOC_VERSION}-linux-aarch_64.zip; \
else \
echo "Unsupported architecture"; exit 1; \
fi && \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP} && \
unzip -o ${PROTOC_ZIP} -d /usr/local bin/protoc && \
unzip -o ${PROTOC_ZIP} -d /usr/local 'include/*' && \
rm -f ${PROTOC_ZIP}
# Install Rust.
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /root/.cargo/bin/:$PATH
# Install Rust toolchains.
ARG RUST_TOOLCHAIN
RUN rustup toolchain install ${RUST_TOOLCHAIN}
# Install nextest.
RUN cargo install cargo-nextest --locked

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

View File

@@ -1,61 +0,0 @@
# TSBS benchmark - v0.4.0
## Environment
### Local
| | |
| ------ | ---------------------------------- |
| CPU | AMD Ryzen 7 7735HS (8 core 3.2GHz) |
| Memory | 32GB |
| Disk | SOLIDIGM SSDPFKNU010TZ |
| OS | Ubuntu 22.04.2 LTS |
### Aliyun amd64
| | |
| ------- | -------------- |
| Machine | ecs.g7.4xlarge |
| CPU | 16 core |
| Memory | 64GB |
| Disk | 100G |
| OS | Ubuntu 22.04 |
### Aliyun arm64
| | |
| ------- | ----------------- |
| Machine | ecs.g8y.4xlarge |
| CPU | 16 core |
| Memory | 64GB |
| Disk | 100G |
| OS | Ubuntu 22.04 ARM |
## Write performance
| Environment | Ingest raterows/s |
| ------------------ | --------------------- |
| Local | 365280.60 |
| Aliyun g7.4xlarge | 341368.72 |
| Aliyun g8y.4xlarge | 320907.29 |
## Query performance
| Query type | Local (ms) | Aliyun g7.4xlarge (ms) | Aliyun g8y.4xlarge (ms) |
| --------------------- | ---------- | ---------------------- | ----------------------- |
| cpu-max-all-1 | 50.70 | 31.46 | 47.61 |
| cpu-max-all-8 | 262.16 | 129.26 | 152.43 |
| double-groupby-1 | 2512.71 | 1408.19 | 1586.10 |
| double-groupby-5 | 3896.15 | 2304.29 | 2585.29 |
| double-groupby-all | 5404.67 | 3337.61 | 3773.91 |
| groupby-orderby-limit | 3786.98 | 2065.72 | 2312.57 |
| high-cpu-1 | 71.96 | 37.29 | 54.01 |
| high-cpu-all | 9468.75 | 7595.69 | 8467.46 |
| lastpoint | 13379.43 | 11253.76 | 12949.40 |
| single-groupby-1-1-1 | 20.72 | 12.16 | 13.35 |
| single-groupby-1-1-12 | 28.53 | 15.67 | 21.62 |
| single-groupby-1-8-1 | 72.23 | 37.90 | 43.52 |
| single-groupby-5-1-1 | 26.75 | 15.59 | 17.48 |
| single-groupby-5-1-12 | 45.41 | 22.90 | 31.96 |
| single-groupby-5-8-1 | 107.96 | 59.76 | 69.58 |

View File

@@ -1,113 +0,0 @@
---
Feature Name: Inverted Index for SST File
Tracking Issue: TBD
Date: 2023-11-03
Author: "Zhong Zhenchi <zhongzc_arch@outlook.com>"
---
# Summary
This RFC proposes an optimization towards the storage engine by introducing an inverted indexing methodology aimed at optimizing label selection queries specifically pertaining to Metrics with tag columns as the target for optimization.
# Introduction
In the current system setup, in the Mito Engine, the first column of Primary Keys has a Min-Max index, which significantly optimizes the outcome. However, there are limitations when it comes to other columns, primarily tags. This RFC suggests the implementation of an inverted index to provide enhanced filtering benefits to bridge these limitations and improve overall system performance.
# Design Detail
## Inverted Index
The primary aim of the proposed inverted index is to optimize tag columns in the SST Parquet Files within the Mito Engine. The mapping and construction of an inverted index, from Tag Values to Row Groups, enables efficient logical structures that provide faster and more flexible queries.
When scanning SST Files, pushed-down filters applied to a respective Tag's inverted index, determine the final Row Groups to be indexed and scanned, further bolstering the speed and efficiency of data retrieval processes.
## Index Format
The Inverted Index for each SST file follows the format shown below:
```
inverted_index₀ inverted_index₁ ... inverted_indexₙ footer
```
The structure inside each Inverted Index is as followed:
```
bitmap₀ bitmap₁ bitmap₂ ... bitmapₙ null_bitmap fst
```
The format is encapsulated by a footer:
```
footer_payload footer_payload_size
```
The `footer_payload` is presented in protobuf encoding of `InvertedIndexFooter`.
The complete format is containerized in [Puffin](https://iceberg.apache.org/puffin-spec/) with the type defined as `greptime-inverted-index-v1`.
## Protobuf Details
The `InvertedIndexFooter` is defined in the following protobuf structure:
```protobuf
message InvertedIndexFooter {
repeated InvertedIndexMeta metas;
}
message InvertedIndexMeta {
string name;
uint64 row_count_in_group;
uint64 fst_offset;
uint64 fst_size;
uint64 null_bitmap_offset;
uint64 null_bitmap_size;
InvertedIndexStats stats;
}
message InvertedIndexStats {
uint64 null_count;
uint64 distinct_count;
bytes min_value;
bytes max_value;
}
```
## Bitmap
Bitmaps are used to represent indices of fixed-size groups. Rows are divided into groups of a fixed size, defined in the `InvertedIndexMeta` as `row_count_in_group`.
For example, when `row_count_in_group` is `4096`, it means each group has `4096` rows. If there are a total of `10000` rows, there will be `3` groups in total. The first two groups will have `4096` rows each, and the last group will have `1808` rows. If the indexed values are found in row `200` and `9000`, they will correspond to groups `0` and `2`, respectively. Therefore, the bitmap should show `0` and `2`.
Bitmap is implemented using [BitVec](https://docs.rs/bitvec/latest/bitvec/), selected due to its efficient representation of dense data arrays typical of indices of groups.
## Finite State Transducer (FST)
[FST](https://docs.rs/fst/latest/fst/) is a highly efficient data structure ideal for in-memory indexing. It represents ordered sets or maps where the keys are bytes. The choice of the FST effectively balances the need for performance, space efficiency, and the ability to perform complex analyses such as regular expression matching.
The conventional usage of FST and `u64` values has been adapted to facilitate indirect indexing to row groups. As the row groups are represented as Bitmaps, we utilize the `u64` values split into bitmap's offset (higher 32 bits) and size (lower 32 bits) to represent the location of these Bitmaps.
## API Design
Two APIs `InvertedIndexBuilder` for building indexes and `InvertedIndexSearcher` for querying indexes are designed:
```rust
type Bytes = Vec<u8>;
type GroupId = u64;
trait InvertedIndexBuilder {
fn add(&mut self, name: &str, value: Option<&Bytes>, group_id: GroupId) -> Result<()>;
fn finish(&mut self) -> Result<()>;
}
enum Predicate {
Gt(Bytes),
GtEq(Bytes),
Lt(Bytes),
LtEq(Bytes),
InList(Vec<Bytes>),
RegexMatch(String),
}
trait InvertedIndexSearcher {
fn search(&mut self, name: &str, predicates: &[Predicate]) -> Result<impl IntoIterator<GroupId>>;
}
```

View File

@@ -1,169 +0,0 @@
---
Feature Name: Region Migration Procedure
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/2700
Date: 2023-11-03
Author: "Xu Wenkang <wenymedia@gmail.com>"
---
# Summary
This RFC proposes a way that brings the ability of Meta Server to move regions between the Datanodes.
# Motivation
Typically, We need this ability in the following scenarios:
- Migrate hot-spot Regions to idle Datanode
- Move the failure Regions to an available Datanode
# Details
```mermaid
flowchart TD
style Start fill:#85CB90,color:#fff
style End fill:#85CB90,color:#fff
style SelectCandidate fill:#F38488,color:#fff
style OpenCandidate fill:#F38488,color:#fff
style UpdateMetadataDown fill:#F38488,color:#fff
style UpdateMetadataUp fill:#F38488,color:#fff
style UpdateMetadataRollback fill:#F38488,color:#fff
style DowngradeLeader fill:#F38488,color:#fff
style UpgradeCandidate fill:#F38488,color:#fff
Start[Start]
SelectCandidate[Select Candidate]
UpdateMetadataDown["`Update Metadata(Down)
1. Downgrade Leader
`"]
DowngradeLeader["`Downgrade Leader
1. Become Follower
2. Return **last_entry_id**
`"]
UpgradeCandidate["`Upgrade Candidate
1. Replay to **last_entry_id**
2. Become Leader
`"]
UpdateMetadataUp["`Update Metadata(Up)
1. Switch Leader
2.1. Remove Old Leader(Opt.)
2.2. Move Old Leader to Follower(Opt.)
`"]
UpdateMetadataRollback["`Update Metadata(Rollback)
1. Upgrade old Leader
`"]
End
AnyCandidate{Available?}
OpenCandidate["Open Candidate"]
CloseOldLeader["Close Old Leader"]
Start
--> SelectCandidate
--> AnyCandidate
--> |Yes| UpdateMetadataDown
--> I1["Invalid Frontend Cache"]
--> DowngradeLeader
--> UpgradeCandidate
--> UpdateMetadataUp
--> I2["Invalid Frontend Cache"]
--> End
UpgradeCandidate
--> UpdateMetadataRollback
--> I3["Invalid Frontend Cache"]
--> End
I2
--> CloseOldLeader
--> End
AnyCandidate
--> |No| OpenCandidate
--> UpdateMetadataDown
```
**Only the red nodes will persist state after it has succeeded**, and other nodes won't persist state. (excluding the Start and End nodes).
## Steps
**The persistent context:** It's shared in each step and available after recovering. It will only be updated/stored after the Red node has succeeded.
Values:
- `region_id`: The target leader region.
- `peer`: The target datanode.
- `close_old_leader`: Indicates whether close the region.
- `leader_may_unreachable`: It's used to support the failover procedure.
**The Volatile context:** It's shared in each step and available in executing (including retrying). It will be dropped if the procedure runner crashes.
### Select Candidate
The Persistent state: Selected Candidate Region.
### Update Metadata(Down)
**The Persistent context:**
- The (latest/updated) `version` of `TableRouteValue`, It will be used in the step of `Update Metadata(Up)`.
### Downgrade Leader
This step sends an instruction via heartbeat and performs:
1. Downgrades leader region.
2. Retrieves the `last_entry_id` (if available).
If the target leader region is not found:
- Sets `close_old_leader` to true.
- Sets `leader_may_unreachable` to true.
If the target Datanode is unreachable:
- Waits for region lease expired.
- Sets `close_old_leader` to true.
- Sets `leader_may_unreachable` to true.
**The Persistent context:**
None
**The Persistent state:**
- `last_entry_id`
*Passes to next step.
### Upgrade Candidate
This step sends an instruction via heartbeat and performs:
1. Replays the WAL to latest(`last_entry_id`).
2. Upgrades the candidate region.
If the target region is not found:
- Rollbacks.
- Notifies the failover detector if `leader_may_unreachable` == true.
- Exits procedure.
If the target Datanode is unreachable:
- Rollbacks.
- Notifies the failover detector if `leader_may_unreachable` == true.
- Exits procedure.
**The Persistent context:**
None
### Update Metadata(Up)
This step performs
1. Switches Leader.
2. Removes Old Leader(Opt.).
3. Moves Old Leader to follower(Opt.).
The `TableRouteValue` version should equal the `TableRouteValue`'s `version` in Persistent context. Otherwise, verifies whether `TableRouteValue` already updated.
**The Persistent context:**
None
### Close Old Leader(Opt.)
This step sends a close region instruction via heartbeat.
If the target leader region is not found:
- Ignore.
If the target Datanode is unreachable:
- Ignore.
### Open Candidate(Opt.)
This step sends an open region instruction via heartbeat and waits for conditions to be met (typically, the condition is that the `last_entry_id` of the Candidate Region is very close to that of the Leader Region or the latest).
If the target Datanode is unreachable:
- Exits procedure.

View File

@@ -1,24 +0,0 @@
# Copyright 2023 Greptime Team
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
headerPath = "Apache-2.0.txt"
includes = [
"*.rs",
"*.py",
]
[properties]
inceptionYear = 2023
copyrightOwner = "Greptime Team"

View File

@@ -1,2 +1,2 @@
[toolchain] [toolchain]
channel = "nightly-2023-10-21" channel = "nightly-2023-08-07"

View File

@@ -1,157 +0,0 @@
#!/bin/bash
# This script configures the environment to run 'greptime' with the required Python version
# This script should be compatible both in Linux and macOS
OS_TYPE="$(uname)"
readonly OS_TYPE
check_command_existence() {
command -v "$1" &> /dev/null
}
get_python_version() {
case "$OS_TYPE" in
Darwin)
otool -L $GREPTIME_BIN_PATH | grep -o 'Python.framework/Versions/3.[0-9]\+/Python' | grep -o '3.[0-9]\+'
;;
Linux)
ldd $GREPTIME_BIN_PATH | grep -o 'libpython3\.[0-9]\+' | grep -o '3\.[0-9]\+'
;;
*)
echo "Unsupported OS type: $OS_TYPE"
exit 1
;;
esac
}
setup_virtualenv() {
local req_py_version="$1"
local env_name="GreptimeTmpVenv$req_py_version"
virtualenv --python=python"$req_py_version" "$env_name"
source "$env_name/bin/activate"
}
setup_conda_env() {
local req_py_version="$1"
local conda_base
conda_base=$(conda info --base) || { echo "Error obtaining conda base directory"; exit 1; }
. "$conda_base/etc/profile.d/conda.sh"
if ! conda list --name "GreptimeTmpPyO3Env$req_py_version" &> /dev/null; then
conda create --yes --name "GreptimeTmpPyO3Env$req_py_version" python="$req_py_version"
fi
conda activate "GreptimeTmpPyO3Env$req_py_version"
}
GREPTIME_BIN_PATH="./greptime"
YES="false"
usage() {
echo "Usage:"
echo " $0 -f <greptime-bin-path> [-y] <args-pass-to-greptime>"
echo "Set $PY_ENV_MAN to 1 to use virtualenv, 2 to use conda"
exit 1
}
function parse_args() {
while getopts ":f:y" opt; do
case $opt in
f)
GREPTIME_BIN_PATH=$OPTARG
;;
y)
YES="yes"
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
shift $((OPTIND -1))
REST_ARGS=$*
if [ -z "$GREPTIME_BIN_PATH" ]; then
usage
fi
echo "Run greptime binary at '$GREPTIME_BIN_PATH' (yes=$YES)..."
echo "The args pass to greptime: '$REST_ARGS'"
}
# Set library path and pass all arguments to greptime to run it
execute_greptime() {
if [[ "$OS_TYPE" == "Darwin" ]]; then
DYLD_LIBRARY_PATH="${CONDA_PREFIX:-$PREFIX}/lib:${LD_LIBRARY_PATH:-}" $GREPTIME_BIN_PATH $@
elif [[ "$OS_TYPE" == "Linux" ]]; then
LD_LIBRARY_PATH="${CONDA_PREFIX:-$PREFIX}/lib:${LD_LIBRARY_PATH:-}" $GREPTIME_BIN_PATH $@
fi
}
main() {
parse_args $@
local req_py_version
req_py_version=$(get_python_version)
readonly req_py_version
if [[ -z "$req_py_version" ]]; then
if $GREPTIME_BIN_PATH --version &> /dev/null; then
$GREPTIME_BIN_PATH $REST_ARGS
else
echo "The 'greptime' binary is not valid or encountered an error."
$GREPTIME_BIN_PATH --version
exit 1
fi
return
fi
echo "The required version of Python shared library is $req_py_version"
# if YES exist, assign it to yn, else read from stdin
if [[ -z "$YES" ]]; then
echo "Now this script will try to install or find correct Python Version"
echo "Do you want to continue? (yes/no): "
read -r yn
else
yn="$YES"
fi
case $yn in
[Yy]* ) ;;
[Nn]* ) exit;;
* ) echo "Please answer yes or no.";;
esac
# if USE_ENV exist, assign it to option
# else read from stdin
if [[ -z "$PY_ENV_MAN" ]]; then
echo "Do you want to use virtualenv or conda? (virtualenv(1)/conda(2)): "
read -r option
else
option="$PY_ENV_MAN"
fi
case $option in
1)
setup_virtualenv "$req_py_version"
;;
2)
setup_conda_env "$req_py_version"
;;
*)
echo "Please input 1 or 2"; exit 1
;;
esac
execute_greptime $REST_ARGS
}
main "$@"

View File

@@ -5,16 +5,13 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
common-base.workspace = true common-base = { workspace = true }
common-decimal.workspace = true common-error = { workspace = true }
common-error.workspace = true common-time = { workspace = true }
common-macro.workspace = true datatypes = { workspace = true }
common-time.workspace = true
datatypes.workspace = true
greptime-proto.workspace = true greptime-proto.workspace = true
paste = "1.0"
prost.workspace = true prost.workspace = true
snafu.workspace = true snafu = { version = "0.7", features = ["backtraces"] }
tonic.workspace = true tonic.workspace = true
[build-dependencies] [build-dependencies]

View File

@@ -16,24 +16,17 @@ use std::any::Any;
use common_error::ext::ErrorExt; use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode; use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use datatypes::prelude::ConcreteDataType; use datatypes::prelude::ConcreteDataType;
use snafu::prelude::*; use snafu::prelude::*;
use snafu::Location; use snafu::Location;
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;
#[derive(Snafu)] #[derive(Debug, Snafu)]
#[snafu(visibility(pub))] #[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error { pub enum Error {
#[snafu(display("Unknown proto column datatype: {}", datatype))] #[snafu(display("Unknown proto column datatype: {}", datatype))]
UnknownColumnDataType { UnknownColumnDataType { datatype: i32, location: Location },
datatype: i32,
location: Location,
#[snafu(source)]
error: prost::DecodeError,
},
#[snafu(display("Failed to create column datatype from {:?}", from))] #[snafu(display("Failed to create column datatype from {:?}", from))]
IntoColumnDataType { IntoColumnDataType {
@@ -41,14 +34,22 @@ pub enum Error {
location: Location, location: Location,
}, },
#[snafu(display("Failed to convert column default constraint, column: {}", column))] #[snafu(display(
"Failed to convert column default constraint, column: {}, source: {}",
column,
source
))]
ConvertColumnDefaultConstraint { ConvertColumnDefaultConstraint {
column: String, column: String,
location: Location, location: Location,
source: datatypes::error::Error, source: datatypes::error::Error,
}, },
#[snafu(display("Invalid column default constraint, column: {}", column))] #[snafu(display(
"Invalid column default constraint, column: {}, source: {}",
column,
source
))]
InvalidColumnDefaultConstraint { InvalidColumnDefaultConstraint {
column: String, column: String,
location: Location, location: Location,

File diff suppressed because it is too large Load Diff

View File

@@ -12,9 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::collections::HashMap; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, COMMENT_KEY};
use snafu::ResultExt; use snafu::ResultExt;
use crate::error::{self, Result}; use crate::error::{self, Result};
@@ -22,10 +20,7 @@ use crate::helper::ColumnDataTypeWrapper;
use crate::v1::ColumnDef; use crate::v1::ColumnDef;
pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> { pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
let data_type = ColumnDataTypeWrapper::try_new( let data_type = ColumnDataTypeWrapper::try_new(column_def.datatype)?;
column_def.data_type,
column_def.datatype_extension.clone(),
)?;
let constraint = if column_def.default_constraint.is_empty() { let constraint = if column_def.default_constraint.is_empty() {
None None
@@ -39,17 +34,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
) )
}; };
let mut metadata = HashMap::new();
if !column_def.comment.is_empty() {
metadata.insert(COMMENT_KEY.to_string(), column_def.comment.clone());
}
Ok(
ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable) ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
.with_default_constraint(constraint) .with_default_constraint(constraint)
.context(error::InvalidColumnDefaultConstraintSnafu { .context(error::InvalidColumnDefaultConstraintSnafu {
column: &column_def.name, column: &column_def.name,
})? })
.with_metadata(metadata),
)
} }

View File

@@ -4,6 +4,8 @@ version.workspace = true
edition.workspace = true edition.workspace = true
license.workspace = true license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features] [features]
default = [] default = []
testing = [] testing = []
@@ -12,7 +14,6 @@ testing = []
api.workspace = true api.workspace = true
async-trait.workspace = true async-trait.workspace = true
common-error.workspace = true common-error.workspace = true
common-macro.workspace = true
digest = "0.10" digest = "0.10"
hex = { version = "0.4" } hex = { version = "0.4" }
secrecy = { version = "0.8", features = ["serde", "alloc"] } secrecy = { version = "0.8", features = ["serde", "alloc"] }

View File

@@ -26,7 +26,7 @@ use crate::{UserInfoRef, UserProviderRef};
pub(crate) const DEFAULT_USERNAME: &str = "greptime"; pub(crate) const DEFAULT_USERNAME: &str = "greptime";
/// construct a [`UserInfo`](crate::user_info::UserInfo) impl with name /// construct a [`UserInfo`] impl with name
/// use default username `greptime` if None is provided /// use default username `greptime` if None is provided
pub fn userinfo_by_name(username: Option<String>) -> UserInfoRef { pub fn userinfo_by_name(username: Option<String>) -> UserInfoRef {
DefaultUserInfo::with_name(username.unwrap_or_else(|| DEFAULT_USERNAME.to_string())) DefaultUserInfo::with_name(username.unwrap_or_else(|| DEFAULT_USERNAME.to_string()))

View File

@@ -14,12 +14,10 @@
use common_error::ext::{BoxedError, ErrorExt}; use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode; use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu}; use snafu::{Location, Snafu};
#[derive(Snafu)] #[derive(Debug, Snafu)]
#[snafu(visibility(pub))] #[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error { pub enum Error {
#[snafu(display("Invalid config value: {}, {}", value, msg))] #[snafu(display("Invalid config value: {}, {}", value, msg))]
InvalidConfig { value: String, msg: String }, InvalidConfig { value: String, msg: String },
@@ -30,14 +28,13 @@ pub enum Error {
#[snafu(display("Internal state error: {}", msg))] #[snafu(display("Internal state error: {}", msg))]
InternalState { msg: String }, InternalState { msg: String },
#[snafu(display("IO error"))] #[snafu(display("IO error, source: {}", source))]
Io { Io {
#[snafu(source)] source: std::io::Error,
error: std::io::Error,
location: Location, location: Location,
}, },
#[snafu(display("Auth failed"))] #[snafu(display("Auth failed, source: {}", source))]
AuthBackend { AuthBackend {
location: Location, location: Location,
source: BoxedError, source: BoxedError,

View File

@@ -11,7 +11,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use secrecy::ExposeSecret; use secrecy::ExposeSecret;
use crate::error::{ use crate::error::{

View File

@@ -22,15 +22,15 @@ use crate::UserInfoRef;
pub trait UserProvider: Send + Sync { pub trait UserProvider: Send + Sync {
fn name(&self) -> &str; fn name(&self) -> &str;
/// Checks whether a user is valid and allowed to access the database. /// [`authenticate`] checks whether a user is valid and allowed to access the database.
async fn authenticate(&self, id: Identity<'_>, password: Password<'_>) -> Result<UserInfoRef>; async fn authenticate(&self, id: Identity<'_>, password: Password<'_>) -> Result<UserInfoRef>;
/// Checks whether a connection request /// [`authorize`] checks whether a connection request
/// from a certain user to a certain catalog/schema is legal. /// from a certain user to a certain catalog/schema is legal.
/// This method should be called after [authenticate()](UserProvider::authenticate()). /// This method should be called after [`authenticate`].
async fn authorize(&self, catalog: &str, schema: &str, user_info: &UserInfoRef) -> Result<()>; async fn authorize(&self, catalog: &str, schema: &str, user_info: &UserInfoRef) -> Result<()>;
/// Combination of [authenticate()](UserProvider::authenticate()) and [authorize()](UserProvider::authorize()). /// [`auth`] is a combination of [`authenticate`] and [`authorize`].
/// In most cases it's preferred for both convenience and performance. /// In most cases it's preferred for both convenience and performance.
async fn auth( async fn auth(
&self, &self,

View File

@@ -8,45 +8,45 @@ license.workspace = true
testing = [] testing = []
[dependencies] [dependencies]
api.workspace = true api = { workspace = true }
arc-swap = "1.0" arc-swap = "1.0"
arrow-schema.workspace = true arrow-schema.workspace = true
async-stream.workspace = true async-stream.workspace = true
async-trait = "0.1" async-trait = "0.1"
common-catalog.workspace = true common-catalog = { workspace = true }
common-error.workspace = true common-error = { workspace = true }
common-grpc.workspace = true common-grpc = { workspace = true }
common-macro.workspace = true common-meta = { workspace = true }
common-meta.workspace = true common-query = { workspace = true }
common-query.workspace = true common-recordbatch = { workspace = true }
common-recordbatch.workspace = true common-runtime = { workspace = true }
common-runtime.workspace = true common-telemetry = { workspace = true }
common-telemetry.workspace = true common-time = { workspace = true }
common-time.workspace = true
dashmap = "5.4" dashmap = "5.4"
datafusion.workspace = true datafusion.workspace = true
datatypes.workspace = true datatypes = { workspace = true }
futures = "0.3" futures = "0.3"
futures-util.workspace = true futures-util.workspace = true
lazy_static.workspace = true lazy_static.workspace = true
meta-client.workspace = true meta-client = { workspace = true }
moka = { workspace = true, features = ["future"] } metrics.workspace = true
moka = { version = "0.11", features = ["future"] }
parking_lot = "0.12" parking_lot = "0.12"
partition.workspace = true
prometheus.workspace = true
regex.workspace = true regex.workspace = true
serde.workspace = true serde.workspace = true
serde_json = "1.0" serde_json = "1.0"
session.workspace = true session = { workspace = true }
snafu.workspace = true snafu = { version = "0.7", features = ["backtraces"] }
store-api.workspace = true store-api = { workspace = true }
table.workspace = true table = { workspace = true }
tokio.workspace = true tokio.workspace = true
[dev-dependencies] [dev-dependencies]
catalog = { workspace = true, features = ["testing"] } catalog = { workspace = true, features = ["testing"] }
chrono.workspace = true chrono.workspace = true
common-test-util.workspace = true common-test-util = { workspace = true }
log-store.workspace = true log-store = { workspace = true }
object-store.workspace = true mito = { workspace = true, features = ["test"] }
object-store = { workspace = true }
storage = { workspace = true }
tokio.workspace = true tokio.workspace = true

View File

@@ -17,48 +17,54 @@ use std::fmt::Debug;
use common_error::ext::{BoxedError, ErrorExt}; use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode; use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use datafusion::error::DataFusionError; use datafusion::error::DataFusionError;
use datatypes::prelude::ConcreteDataType; use datatypes::prelude::ConcreteDataType;
use snafu::{Location, Snafu}; use snafu::{Location, Snafu};
use table::metadata::TableId;
use tokio::task::JoinError; use tokio::task::JoinError;
#[derive(Snafu)] use crate::DeregisterTableRequest;
#[derive(Debug, Snafu)]
#[snafu(visibility(pub))] #[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error { pub enum Error {
#[snafu(display("Failed to list catalogs"))] #[snafu(display("Failed to list catalogs, source: {}", source))]
ListCatalogs { ListCatalogs {
location: Location, location: Location,
source: BoxedError, source: BoxedError,
}, },
#[snafu(display("Failed to list {}'s schemas", catalog))] #[snafu(display("Failed to list {}'s schemas, source: {}", catalog, source))]
ListSchemas { ListSchemas {
location: Location, location: Location,
catalog: String, catalog: String,
source: BoxedError, source: BoxedError,
}, },
#[snafu(display("Failed to re-compile script due to internal error"))] #[snafu(display(
"Failed to re-compile script due to internal error, source: {}",
source
))]
CompileScriptInternal { CompileScriptInternal {
location: Location, location: Location,
source: BoxedError, source: BoxedError,
}, },
#[snafu(display("Failed to open system catalog table"))] #[snafu(display("Failed to open system catalog table, source: {}", source))]
OpenSystemCatalog { OpenSystemCatalog {
location: Location, location: Location,
source: table::error::Error, source: table::error::Error,
}, },
#[snafu(display("Failed to create system catalog table"))] #[snafu(display("Failed to create system catalog table, source: {}", source))]
CreateSystemCatalog { CreateSystemCatalog {
location: Location, location: Location,
source: table::error::Error, source: table::error::Error,
}, },
#[snafu(display("Failed to create table, table info: {}", table_info))] #[snafu(display(
"Failed to create table, table info: {}, source: {}",
table_info,
source
))]
CreateTable { CreateTable {
table_info: String, table_info: String,
location: Location, location: Location,
@@ -92,14 +98,13 @@ pub enum Error {
#[snafu(display("Catalog value is not present"))] #[snafu(display("Catalog value is not present"))]
EmptyValue { location: Location }, EmptyValue { location: Location },
#[snafu(display("Failed to deserialize value"))] #[snafu(display("Failed to deserialize value, source: {}", source))]
ValueDeserialize { ValueDeserialize {
#[snafu(source)] source: serde_json::error::Error,
error: serde_json::error::Error,
location: Location, location: Location,
}, },
#[snafu(display("Table engine not found: {}", engine_name))] #[snafu(display("Table engine not found: {}, source: {}", engine_name, source))]
TableEngineNotFound { TableEngineNotFound {
engine_name: String, engine_name: String,
location: Location, location: Location,
@@ -137,18 +142,15 @@ pub enum Error {
#[snafu(display("Operation {} not supported", op))] #[snafu(display("Operation {} not supported", op))]
NotSupported { op: String, location: Location }, NotSupported { op: String, location: Location },
#[snafu(display("Failed to open table {table_id}"))] #[snafu(display("Failed to open table, table info: {}, source: {}", table_info, source))]
OpenTable { OpenTable {
table_id: TableId, table_info: String,
location: Location, location: Location,
source: table::error::Error, source: table::error::Error,
}, },
#[snafu(display("Failed to open table in parallel"))] #[snafu(display("Failed to open table in parallel, source: {}", source))]
ParallelOpenTable { ParallelOpenTable { source: JoinError },
#[snafu(source)]
error: JoinError,
},
#[snafu(display("Table not found while opening table, table info: {}", table_info))] #[snafu(display("Table not found while opening table, table info: {}", table_info))]
TableNotFound { TableNotFound {
@@ -162,52 +164,72 @@ pub enum Error {
source: common_recordbatch::error::Error, source: common_recordbatch::error::Error,
}, },
#[snafu(display("Failed to create recordbatch"))] #[snafu(display("Failed to create recordbatch, source: {}", source))]
CreateRecordBatch { CreateRecordBatch {
location: Location, location: Location,
source: common_recordbatch::error::Error, source: common_recordbatch::error::Error,
}, },
#[snafu(display("Failed to insert table creation record to system catalog"))] #[snafu(display(
"Failed to insert table creation record to system catalog, source: {}",
source
))]
InsertCatalogRecord { InsertCatalogRecord {
location: Location, location: Location,
source: table::error::Error, source: table::error::Error,
}, },
#[snafu(display("Failed to scan system catalog table"))] #[snafu(display(
"Failed to deregister table, request: {:?}, source: {}",
request,
source
))]
DeregisterTable {
request: DeregisterTableRequest,
location: Location,
source: table::error::Error,
},
#[snafu(display("Illegal catalog manager state: {}", msg))]
IllegalManagerState { location: Location, msg: String },
#[snafu(display("Failed to scan system catalog table, source: {}", source))]
SystemCatalogTableScan { SystemCatalogTableScan {
location: Location, location: Location,
source: table::error::Error, source: table::error::Error,
}, },
#[snafu(display("Internal error"))] #[snafu(display("{source}"))]
Internal { Internal {
location: Location, location: Location,
source: BoxedError, source: BoxedError,
}, },
#[snafu(display("Failed to upgrade weak catalog manager reference"))] #[snafu(display(
"Failed to upgrade weak catalog manager reference. location: {}",
location
))]
UpgradeWeakCatalogManagerRef { location: Location }, UpgradeWeakCatalogManagerRef { location: Location },
#[snafu(display("Failed to execute system catalog table scan"))] #[snafu(display("Failed to execute system catalog table scan, source: {}", source))]
SystemCatalogTableScanExec { SystemCatalogTableScanExec {
location: Location, location: Location,
source: common_query::error::Error, source: common_query::error::Error,
}, },
#[snafu(display("Cannot parse catalog value"))] #[snafu(display("Cannot parse catalog value, source: {}", source))]
InvalidCatalogValue { InvalidCatalogValue {
location: Location, location: Location,
source: common_catalog::error::Error, source: common_catalog::error::Error,
}, },
#[snafu(display("Failed to perform metasrv operation"))] #[snafu(display("Failed to perform metasrv operation, source: {}", source))]
MetaSrv { MetaSrv {
location: Location, location: Location,
source: meta_client::error::Error, source: meta_client::error::Error,
}, },
#[snafu(display("Invalid table info in catalog"))] #[snafu(display("Invalid table info in catalog, source: {}", source))]
InvalidTableInfoInCatalog { InvalidTableInfoInCatalog {
location: Location, location: Location,
source: datatypes::error::Error, source: datatypes::error::Error,
@@ -216,14 +238,17 @@ pub enum Error {
#[snafu(display("Illegal access to catalog: {} and schema: {}", catalog, schema))] #[snafu(display("Illegal access to catalog: {} and schema: {}", catalog, schema))]
QueryAccessDenied { catalog: String, schema: String }, QueryAccessDenied { catalog: String, schema: String },
#[snafu(display("DataFusion error"))] #[snafu(display("Invalid system table definition: {err_msg}"))]
InvalidSystemTableDef { err_msg: String, location: Location },
#[snafu(display("{}: {}", msg, source))]
Datafusion { Datafusion {
#[snafu(source)] msg: String,
error: DataFusionError, source: DataFusionError,
location: Location, location: Location,
}, },
#[snafu(display("Table schema mismatch"))] #[snafu(display("Table schema mismatch, source: {}", source))]
TableSchemaMismatch { TableSchemaMismatch {
location: Location, location: Location,
source: table::error::Error, source: table::error::Error,
@@ -232,7 +257,7 @@ pub enum Error {
#[snafu(display("A generic error has occurred, msg: {}", msg))] #[snafu(display("A generic error has occurred, msg: {}", msg))]
Generic { msg: String, location: Location }, Generic { msg: String, location: Location },
#[snafu(display("Table metadata manager error"))] #[snafu(display("Table metadata manager error: {}", source))]
TableMetadataManager { TableMetadataManager {
source: common_meta::error::Error, source: common_meta::error::Error,
location: Location, location: Location,
@@ -247,8 +272,10 @@ impl ErrorExt for Error {
Error::InvalidKey { .. } Error::InvalidKey { .. }
| Error::SchemaNotFound { .. } | Error::SchemaNotFound { .. }
| Error::TableNotFound { .. } | Error::TableNotFound { .. }
| Error::IllegalManagerState { .. }
| Error::CatalogNotFound { .. } | Error::CatalogNotFound { .. }
| Error::InvalidEntryType { .. } | Error::InvalidEntryType { .. }
| Error::InvalidSystemTableDef { .. }
| Error::ParallelOpenTable { .. } => StatusCode::Unexpected, | Error::ParallelOpenTable { .. } => StatusCode::Unexpected,
Error::SystemCatalog { .. } Error::SystemCatalog { .. }
@@ -279,6 +306,7 @@ impl ErrorExt for Error {
| Error::InsertCatalogRecord { source, .. } | Error::InsertCatalogRecord { source, .. }
| Error::OpenTable { source, .. } | Error::OpenTable { source, .. }
| Error::CreateTable { source, .. } | Error::CreateTable { source, .. }
| Error::DeregisterTable { source, .. }
| Error::TableSchemaMismatch { source, .. } => source.status_code(), | Error::TableSchemaMismatch { source, .. } => source.status_code(),
Error::MetaSrv { source, .. } => source.status_code(), Error::MetaSrv { source, .. } => source.status_code(),

View File

@@ -158,7 +158,7 @@ impl InformationSchemaColumnsBuilder {
for schema_name in catalog_manager.schema_names(&catalog_name).await? { for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager if !catalog_manager
.schema_exists(&catalog_name, &schema_name) .schema_exist(&catalog_name, &schema_name)
.await? .await?
{ {
continue; continue;
@@ -202,7 +202,7 @@ impl InformationSchemaColumnsBuilder {
&schema_name, &schema_name,
&table_name, &table_name,
&column.name, &column.name,
&column.data_type.name(), column.data_type.name(),
semantic_type, semantic_type,
); );
} }

View File

@@ -154,7 +154,7 @@ impl InformationSchemaTablesBuilder {
for schema_name in catalog_manager.schema_names(&catalog_name).await? { for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager if !catalog_manager
.schema_exists(&catalog_name, &schema_name) .schema_exist(&catalog_name, &schema_name)
.await? .await?
{ {
continue; continue;

View File

@@ -1,281 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::collections::BTreeSet;
use std::sync::{Arc, Weak};
use common_catalog::consts::{DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, NUMBERS_TABLE_ID};
use common_error::ext::BoxedError;
use common_meta::cache_invalidator::{CacheInvalidator, CacheInvalidatorRef, Context};
use common_meta::error::Result as MetaResult;
use common_meta::key::catalog_name::CatalogNameKey;
use common_meta::key::schema_name::SchemaNameKey;
use common_meta::key::table_name::TableNameKey;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::KvBackendRef;
use common_meta::table_name::TableName;
use futures_util::TryStreamExt;
use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
use snafu::prelude::*;
use table::dist_table::DistTable;
use table::metadata::TableId;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use table::TableRef;
use crate::error::{
self as catalog_err, ListCatalogsSnafu, ListSchemasSnafu, Result as CatalogResult,
TableMetadataManagerSnafu,
};
use crate::information_schema::{InformationSchemaProvider, COLUMNS, TABLES};
use crate::CatalogManager;
/// Access all existing catalog, schema and tables.
///
/// The result comes from two source, all the user tables are presented in
/// a kv-backend which persists the metadata of a table. And system tables
/// comes from `SystemCatalog`, which is static and read-only.
#[derive(Clone)]
pub struct KvBackendCatalogManager {
// TODO(LFC): Maybe use a real implementation for Standalone mode.
// Now we use `NoopKvCacheInvalidator` for Standalone mode. In Standalone mode, the KV backend
// is implemented by RaftEngine. Maybe we need a cache for it?
cache_invalidator: CacheInvalidatorRef,
partition_manager: PartitionRuleManagerRef,
table_metadata_manager: TableMetadataManagerRef,
/// A sub-CatalogManager that handles system tables
system_catalog: SystemCatalog,
}
#[async_trait::async_trait]
impl CacheInvalidator for KvBackendCatalogManager {
async fn invalidate_table_name(&self, ctx: &Context, table_name: TableName) -> MetaResult<()> {
self.cache_invalidator
.invalidate_table_name(ctx, table_name)
.await
}
async fn invalidate_table_id(&self, ctx: &Context, table_id: TableId) -> MetaResult<()> {
self.cache_invalidator
.invalidate_table_id(ctx, table_id)
.await
}
}
impl KvBackendCatalogManager {
pub fn new(backend: KvBackendRef, cache_invalidator: CacheInvalidatorRef) -> Arc<Self> {
Arc::new_cyclic(|me| Self {
partition_manager: Arc::new(PartitionRuleManager::new(backend.clone())),
table_metadata_manager: Arc::new(TableMetadataManager::new(backend)),
cache_invalidator,
system_catalog: SystemCatalog {
catalog_manager: me.clone(),
},
})
}
pub fn partition_manager(&self) -> PartitionRuleManagerRef {
self.partition_manager.clone()
}
pub fn table_metadata_manager_ref(&self) -> &TableMetadataManagerRef {
&self.table_metadata_manager
}
}
#[async_trait::async_trait]
impl CatalogManager for KvBackendCatalogManager {
async fn catalog_names(&self) -> CatalogResult<Vec<String>> {
let stream = self
.table_metadata_manager
.catalog_manager()
.catalog_names()
.await;
let keys = stream
.try_collect::<Vec<_>>()
.await
.map_err(BoxedError::new)
.context(ListCatalogsSnafu)?;
Ok(keys)
}
async fn schema_names(&self, catalog: &str) -> CatalogResult<Vec<String>> {
let stream = self
.table_metadata_manager
.schema_manager()
.schema_names(catalog)
.await;
let mut keys = stream
.try_collect::<BTreeSet<_>>()
.await
.map_err(BoxedError::new)
.context(ListSchemasSnafu { catalog })?
.into_iter()
.collect::<Vec<_>>();
keys.extend_from_slice(&self.system_catalog.schema_names());
Ok(keys)
}
async fn table_names(&self, catalog: &str, schema: &str) -> CatalogResult<Vec<String>> {
let mut tables = self
.table_metadata_manager
.table_name_manager()
.tables(catalog, schema)
.await
.context(TableMetadataManagerSnafu)?
.into_iter()
.map(|(k, _)| k)
.collect::<Vec<String>>();
tables.extend_from_slice(&self.system_catalog.table_names(schema));
Ok(tables)
}
async fn catalog_exists(&self, catalog: &str) -> CatalogResult<bool> {
self.table_metadata_manager
.catalog_manager()
.exists(CatalogNameKey::new(catalog))
.await
.context(TableMetadataManagerSnafu)
}
async fn schema_exists(&self, catalog: &str, schema: &str) -> CatalogResult<bool> {
if self.system_catalog.schema_exist(schema) {
return Ok(true);
}
self.table_metadata_manager
.schema_manager()
.exists(SchemaNameKey::new(catalog, schema))
.await
.context(TableMetadataManagerSnafu)
}
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> CatalogResult<bool> {
if self.system_catalog.table_exist(schema, table) {
return Ok(true);
}
let key = TableNameKey::new(catalog, schema, table);
self.table_metadata_manager
.table_name_manager()
.get(key)
.await
.context(TableMetadataManagerSnafu)
.map(|x| x.is_some())
}
async fn table(
&self,
catalog: &str,
schema: &str,
table_name: &str,
) -> CatalogResult<Option<TableRef>> {
if let Some(table) = self.system_catalog.table(catalog, schema, table_name) {
return Ok(Some(table));
}
let key = TableNameKey::new(catalog, schema, table_name);
let Some(table_name_value) = self
.table_metadata_manager
.table_name_manager()
.get(key)
.await
.context(TableMetadataManagerSnafu)?
else {
return Ok(None);
};
let table_id = table_name_value.table_id();
let Some(table_info_value) = self
.table_metadata_manager
.table_info_manager()
.get(table_id)
.await
.context(TableMetadataManagerSnafu)?
.map(|v| v.into_inner())
else {
return Ok(None);
};
let table_info = Arc::new(
table_info_value
.table_info
.try_into()
.context(catalog_err::InvalidTableInfoInCatalogSnafu)?,
);
Ok(Some(DistTable::table(table_info)))
}
fn as_any(&self) -> &dyn Any {
self
}
}
// TODO: This struct can hold a static map of all system tables when
// the upper layer (e.g., procedure) can inform the catalog manager
// a new catalog is created.
/// Existing system tables:
/// - public.numbers
/// - information_schema.tables
/// - information_schema.columns
#[derive(Clone)]
struct SystemCatalog {
catalog_manager: Weak<KvBackendCatalogManager>,
}
impl SystemCatalog {
fn schema_names(&self) -> Vec<String> {
vec![INFORMATION_SCHEMA_NAME.to_string()]
}
fn table_names(&self, schema: &str) -> Vec<String> {
if schema == INFORMATION_SCHEMA_NAME {
vec![TABLES.to_string(), COLUMNS.to_string()]
} else if schema == DEFAULT_SCHEMA_NAME {
vec![NUMBERS_TABLE_NAME.to_string()]
} else {
vec![]
}
}
fn schema_exist(&self, schema: &str) -> bool {
schema == INFORMATION_SCHEMA_NAME
}
fn table_exist(&self, schema: &str, table: &str) -> bool {
if schema == INFORMATION_SCHEMA_NAME {
table == TABLES || table == COLUMNS
} else if schema == DEFAULT_SCHEMA_NAME {
table == NUMBERS_TABLE_NAME
} else {
false
}
}
fn table(&self, catalog: &str, schema: &str, table_name: &str) -> Option<TableRef> {
if schema == INFORMATION_SCHEMA_NAME {
let information_schema_provider =
InformationSchemaProvider::new(catalog.to_string(), self.catalog_manager.clone());
information_schema_provider.table(table_name)
} else if schema == DEFAULT_SCHEMA_NAME && table_name == NUMBERS_TABLE_NAME {
Some(NumbersTable::table(NUMBERS_TABLE_ID))
} else {
None
}
}
}

View File

@@ -17,38 +17,79 @@
#![feature(try_blocks)] #![feature(try_blocks)]
use std::any::Any; use std::any::Any;
use std::collections::HashMap;
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Formatter};
use std::sync::Arc; use std::sync::Arc;
use futures::future::BoxFuture; use api::v1::meta::{RegionStat, TableIdent, TableName};
use table::metadata::TableId; use common_telemetry::{info, warn};
use snafu::ResultExt;
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::{TableId, TableType};
use table::requests::CreateTableRequest; use table::requests::CreateTableRequest;
use table::TableRef; use table::TableRef;
use crate::error::Result; use crate::error::{CreateTableSnafu, Result};
pub mod error; pub mod error;
pub mod information_schema; pub mod information_schema;
pub mod kvbackend; pub mod local;
pub mod memory;
mod metrics; mod metrics;
pub mod remote;
pub mod system;
pub mod table_source; pub mod table_source;
pub mod tables;
#[async_trait::async_trait] #[async_trait::async_trait]
pub trait CatalogManager: Send + Sync { pub trait CatalogManager: Send + Sync {
fn as_any(&self) -> &dyn Any; fn as_any(&self) -> &dyn Any;
/// Starts a catalog manager.
async fn start(&self) -> Result<()>;
/// Registers a catalog to catalog manager, returns whether the catalog exist before.
async fn register_catalog(self: Arc<Self>, name: String) -> Result<bool>;
/// Register a schema with catalog name and schema name. Retuens whether the
/// schema registered.
///
/// # Errors
///
/// This method will/should fail if catalog not exist
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool>;
/// Deregisters a database within given catalog/schema to catalog manager
async fn deregister_schema(&self, request: DeregisterSchemaRequest) -> Result<bool>;
/// Registers a table within given catalog/schema to catalog manager,
/// returns whether the table registered.
///
/// # Errors
///
/// This method will/should fail if catalog or schema not exist
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool>;
/// Deregisters a table within given catalog/schema to catalog manager
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()>;
/// Rename a table to [RenameTableRequest::new_table_name], returns whether the table is renamed.
async fn rename_table(&self, request: RenameTableRequest) -> Result<bool>;
/// Register a system table, should be called before starting the manager.
async fn register_system_table(&self, request: RegisterSystemTableRequest)
-> error::Result<()>;
async fn catalog_names(&self) -> Result<Vec<String>>; async fn catalog_names(&self) -> Result<Vec<String>>;
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>>; async fn schema_names(&self, catalog: &str) -> Result<Vec<String>>;
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>>; async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>>;
async fn catalog_exists(&self, catalog: &str) -> Result<bool>; async fn catalog_exist(&self, catalog: &str) -> Result<bool>;
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool>; async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool>;
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool>; async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool>;
/// Returns the table by catalog, schema and table name. /// Returns the table by catalog, schema and table name.
async fn table( async fn table(
@@ -62,8 +103,7 @@ pub trait CatalogManager: Send + Sync {
pub type CatalogManagerRef = Arc<dyn CatalogManager>; pub type CatalogManagerRef = Arc<dyn CatalogManager>;
/// Hook called after system table opening. /// Hook called after system table opening.
pub type OpenSystemTableHook = pub type OpenSystemTableHook = Arc<dyn Fn(TableRef) -> Result<()> + Send + Sync>;
Box<dyn Fn(TableRef) -> BoxFuture<'static, Result<()>> + Send + Sync>;
/// Register system table request: /// Register system table request:
/// - When system table is already created and registered, the hook will be called /// - When system table is already created and registered, the hook will be called
@@ -122,3 +162,120 @@ pub struct RegisterSchemaRequest {
pub catalog: String, pub catalog: String,
pub schema: String, pub schema: String,
} }
pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
manager: &'a M,
engine: TableEngineRef,
sys_table_requests: &'a mut Vec<RegisterSystemTableRequest>,
) -> Result<()> {
for req in sys_table_requests.drain(..) {
let catalog_name = &req.create_table_request.catalog_name;
let schema_name = &req.create_table_request.schema_name;
let table_name = &req.create_table_request.table_name;
let table_id = req.create_table_request.id;
let table = manager.table(catalog_name, schema_name, table_name).await?;
let table = if let Some(table) = table {
table
} else {
let table = engine
.create_table(&EngineContext::default(), req.create_table_request.clone())
.await
.with_context(|_| CreateTableSnafu {
table_info: common_catalog::format_full_table_name(
catalog_name,
schema_name,
table_name,
),
})?;
let _ = manager
.register_table(RegisterTableRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table_name: table_name.clone(),
table_id,
table: table.clone(),
})
.await?;
info!("Created and registered system table: {table_name}");
table
};
if let Some(hook) = req.open_hook {
(hook)(table)?;
}
}
Ok(())
}
/// The stat of regions in the datanode node.
/// The number of regions can be got from len of vec.
///
/// Ignores any errors occurred during iterating regions. The intention of this method is to
/// collect region stats that will be carried in Datanode's heartbeat to Metasrv, so it's a
/// "try our best" job.
pub async fn datanode_stat(catalog_manager: &CatalogManagerRef) -> (u64, Vec<RegionStat>) {
let mut region_number: u64 = 0;
let mut region_stats = Vec::new();
let Ok(catalog_names) = catalog_manager.catalog_names().await else {
return (region_number, region_stats);
};
for catalog_name in catalog_names {
let Ok(schema_names) = catalog_manager.schema_names(&catalog_name).await else {
continue;
};
for schema_name in schema_names {
let Ok(table_names) = catalog_manager
.table_names(&catalog_name, &schema_name)
.await
else {
continue;
};
for table_name in table_names {
let Ok(Some(table)) = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await
else {
continue;
};
if table.table_type() != TableType::Base {
continue;
}
let table_info = table.table_info();
let region_numbers = &table_info.meta.region_numbers;
region_number += region_numbers.len() as u64;
let engine = &table_info.meta.engine;
let table_id = table_info.ident.table_id;
match table.region_stats() {
Ok(stats) => {
let stats = stats.into_iter().map(|stat| RegionStat {
region_id: stat.region_id,
table_ident: Some(TableIdent {
table_id,
table_name: Some(TableName {
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
}),
engine: engine.clone(),
}),
approximate_bytes: stat.disk_usage_bytes as i64,
attrs: HashMap::from([("engine_name".to_owned(), engine.clone())]),
..Default::default()
});
region_stats.extend(stats);
}
Err(e) => {
warn!("Failed to get region status, err: {:?}", e);
}
};
}
}
}
(region_number, region_stats)
}

View File

@@ -13,5 +13,7 @@
// limitations under the License. // limitations under the License.
pub mod manager; pub mod manager;
pub mod memory;
pub use manager::{new_memory_catalog_manager, MemoryCatalogManager}; pub use manager::LocalCatalogManager;
pub use memory::{new_memory_catalog_manager, MemoryCatalogManager};

View File

@@ -0,0 +1,633 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID,
MITO_ENGINE, NUMBERS_TABLE_ID, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID,
SYSTEM_CATALOG_TABLE_NAME,
};
use common_catalog::format_full_table_name;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_telemetry::{error, info};
use datatypes::prelude::ScalarVector;
use datatypes::vectors::{BinaryVector, UInt8Vector};
use futures_util::lock::Mutex;
use metrics::increment_gauge;
use snafu::{ensure, OptionExt, ResultExt};
use table::engine::manager::TableEngineManagerRef;
use table::engine::EngineContext;
use table::metadata::TableId;
use table::requests::OpenTableRequest;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use table::table::TableIdProvider;
use table::TableRef;
use crate::error::{
self, CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu,
Result, SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu,
SystemCatalogTypeMismatchSnafu, TableEngineNotFoundSnafu, TableExistsSnafu, TableNotExistSnafu,
TableNotFoundSnafu, UnimplementedSnafu,
};
use crate::local::memory::MemoryCatalogManager;
use crate::system::{
decode_system_catalog, Entry, SystemCatalogTable, TableEntry, ENTRY_TYPE_INDEX, KEY_INDEX,
VALUE_INDEX,
};
use crate::tables::SystemCatalog;
use crate::{
handle_system_table_request, CatalogManager, DeregisterSchemaRequest, DeregisterTableRequest,
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest,
};
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
pub struct LocalCatalogManager {
system: Arc<SystemCatalog>,
catalogs: Arc<MemoryCatalogManager>,
engine_manager: TableEngineManagerRef,
next_table_id: AtomicU32,
init_lock: Mutex<bool>,
register_lock: Mutex<()>,
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
}
impl LocalCatalogManager {
/// Create a new [CatalogManager] with given user catalogs and mito engine
pub async fn try_new(engine_manager: TableEngineManagerRef) -> Result<Self> {
let engine = engine_manager
.engine(MITO_ENGINE)
.context(TableEngineNotFoundSnafu {
engine_name: MITO_ENGINE,
})?;
let table = SystemCatalogTable::new(engine.clone()).await?;
let memory_catalog_manager = crate::local::memory::new_memory_catalog_manager()?;
let system_catalog = Arc::new(SystemCatalog::new(table));
Ok(Self {
system: system_catalog,
catalogs: memory_catalog_manager,
engine_manager,
next_table_id: AtomicU32::new(MIN_USER_TABLE_ID),
init_lock: Mutex::new(false),
register_lock: Mutex::new(()),
system_table_requests: Mutex::new(Vec::default()),
})
}
/// Scan all entries from system catalog table
pub async fn init(&self) -> Result<()> {
self.init_system_catalog().await?;
let system_records = self.system.information_schema.system.records().await?;
let entries = self.collect_system_catalog_entries(system_records).await?;
let max_table_id = self.handle_system_catalog_entries(entries).await?;
info!(
"All system catalog entries processed, max table id: {}",
max_table_id
);
self.next_table_id
.store((max_table_id + 1).max(MIN_USER_TABLE_ID), Ordering::Relaxed);
*self.init_lock.lock().await = true;
// Processing system table hooks
let mut sys_table_requests = self.system_table_requests.lock().await;
let engine = self
.engine_manager
.engine(MITO_ENGINE)
.context(TableEngineNotFoundSnafu {
engine_name: MITO_ENGINE,
})?;
handle_system_table_request(self, engine, &mut sys_table_requests).await?;
Ok(())
}
async fn init_system_catalog(&self) -> Result<()> {
// register default catalog and default schema
self.catalogs
.register_catalog_sync(DEFAULT_CATALOG_NAME.to_string())?;
self.catalogs.register_schema_sync(RegisterSchemaRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
})?;
// register SystemCatalogTable
self.catalogs
.register_catalog_sync(SYSTEM_CATALOG_NAME.to_string())?;
self.catalogs.register_schema_sync(RegisterSchemaRequest {
catalog: SYSTEM_CATALOG_NAME.to_string(),
schema: INFORMATION_SCHEMA_NAME.to_string(),
})?;
let register_table_req = RegisterTableRequest {
catalog: SYSTEM_CATALOG_NAME.to_string(),
schema: INFORMATION_SCHEMA_NAME.to_string(),
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
table_id: SYSTEM_CATALOG_TABLE_ID,
table: self.system.information_schema.system.as_table_ref(),
};
self.catalogs.register_table(register_table_req).await?;
// Add numbers table for test
let register_number_table_req = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: NUMBERS_TABLE_NAME.to_string(),
table_id: NUMBERS_TABLE_ID,
table: NumbersTable::table(NUMBERS_TABLE_ID),
};
self.catalogs
.register_table(register_number_table_req)
.await?;
Ok(())
}
/// Collect stream of system catalog entries to `Vec<Entry>`
async fn collect_system_catalog_entries(
&self,
stream: SendableRecordBatchStream,
) -> Result<Vec<Entry>> {
let record_batch = common_recordbatch::util::collect(stream)
.await
.context(ReadSystemCatalogSnafu)?;
let rbs = record_batch
.into_iter()
.map(Self::record_batch_to_entry)
.collect::<Result<Vec<_>>>()?;
Ok(rbs.into_iter().flat_map(Vec::into_iter).collect::<_>())
}
/// Convert `RecordBatch` to a vector of `Entry`.
fn record_batch_to_entry(rb: RecordBatch) -> Result<Vec<Entry>> {
ensure!(
rb.num_columns() >= 6,
SystemCatalogSnafu {
msg: format!("Length mismatch: {}", rb.num_columns())
}
);
let entry_type = rb
.column(ENTRY_TYPE_INDEX)
.as_any()
.downcast_ref::<UInt8Vector>()
.with_context(|| SystemCatalogTypeMismatchSnafu {
data_type: rb.column(ENTRY_TYPE_INDEX).data_type(),
})?;
let key = rb
.column(KEY_INDEX)
.as_any()
.downcast_ref::<BinaryVector>()
.with_context(|| SystemCatalogTypeMismatchSnafu {
data_type: rb.column(KEY_INDEX).data_type(),
})?;
let value = rb
.column(VALUE_INDEX)
.as_any()
.downcast_ref::<BinaryVector>()
.with_context(|| SystemCatalogTypeMismatchSnafu {
data_type: rb.column(VALUE_INDEX).data_type(),
})?;
let mut res = Vec::with_capacity(rb.num_rows());
for ((t, k), v) in entry_type
.iter_data()
.zip(key.iter_data())
.zip(value.iter_data())
{
let entry = decode_system_catalog(t, k, v)?;
res.push(entry);
}
Ok(res)
}
/// Processes records from system catalog table and returns the max table id persisted
/// in system catalog table.
async fn handle_system_catalog_entries(&self, entries: Vec<Entry>) -> Result<TableId> {
let entries = Self::sort_entries(entries);
let mut max_table_id = 0;
for entry in entries {
match entry {
Entry::Catalog(c) => {
self.catalogs
.register_catalog_sync(c.catalog_name.clone())?;
info!("Register catalog: {}", c.catalog_name);
}
Entry::Schema(s) => {
let req = RegisterSchemaRequest {
catalog: s.catalog_name.clone(),
schema: s.schema_name.clone(),
};
let _ = self.catalogs.register_schema_sync(req)?;
info!("Registered schema: {:?}", s);
}
Entry::Table(t) => {
max_table_id = max_table_id.max(t.table_id);
if t.is_deleted {
continue;
}
self.open_and_register_table(&t).await?;
info!("Registered table: {:?}", t);
}
}
}
Ok(max_table_id)
}
/// Sort catalog entries to ensure catalog entries comes first, then schema entries,
/// and table entries is the last.
fn sort_entries(mut entries: Vec<Entry>) -> Vec<Entry> {
entries.sort();
entries
}
async fn open_and_register_table(&self, t: &TableEntry) -> Result<()> {
self.check_catalog_schema_exist(&t.catalog_name, &t.schema_name)
.await?;
let context = EngineContext {};
let open_request = OpenTableRequest {
catalog_name: t.catalog_name.clone(),
schema_name: t.schema_name.clone(),
table_name: t.table_name.clone(),
table_id: t.table_id,
region_numbers: vec![0],
};
let engine = self
.engine_manager
.engine(&t.engine)
.context(TableEngineNotFoundSnafu {
engine_name: &t.engine,
})?;
let table_ref = engine
.open_table(&context, open_request)
.await
.with_context(|_| OpenTableSnafu {
table_info: format!(
"{}.{}.{}, id: {}",
&t.catalog_name, &t.schema_name, &t.table_name, t.table_id
),
})?
.with_context(|| TableNotFoundSnafu {
table_info: format!(
"{}.{}.{}, id: {}",
&t.catalog_name, &t.schema_name, &t.table_name, t.table_id
),
})?;
let register_request = RegisterTableRequest {
catalog: t.catalog_name.clone(),
schema: t.schema_name.clone(),
table_name: t.table_name.clone(),
table_id: t.table_id,
table: table_ref,
};
let _ = self.catalogs.register_table(register_request).await?;
Ok(())
}
async fn check_state(&self) -> Result<()> {
let started = self.init_lock.lock().await;
ensure!(
*started,
IllegalManagerStateSnafu {
msg: "Catalog manager not started",
}
);
Ok(())
}
async fn check_catalog_schema_exist(
&self,
catalog_name: &str,
schema_name: &str,
) -> Result<()> {
if !self.catalogs.catalog_exist(catalog_name).await? {
return CatalogNotFoundSnafu { catalog_name }.fail()?;
}
if !self
.catalogs
.schema_exist(catalog_name, schema_name)
.await?
{
return SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
}
.fail()?;
}
Ok(())
}
}
#[async_trait::async_trait]
impl TableIdProvider for LocalCatalogManager {
async fn next_table_id(&self) -> table::Result<TableId> {
Ok(self.next_table_id.fetch_add(1, Ordering::Relaxed))
}
}
#[async_trait::async_trait]
impl CatalogManager for LocalCatalogManager {
/// Start [LocalCatalogManager] to load all information from system catalog table.
/// Make sure table engine is initialized before starting [MemoryCatalogManager].
async fn start(&self) -> Result<()> {
self.init().await
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
self.check_state().await?;
let catalog_name = request.catalog.clone();
let schema_name = request.schema.clone();
self.check_catalog_schema_exist(&catalog_name, &schema_name)
.await?;
{
let _lock = self.register_lock.lock().await;
if let Some(existing) = self
.catalogs
.table(&request.catalog, &request.schema, &request.table_name)
.await?
{
if existing.table_info().ident.table_id != request.table_id {
error!(
"Unexpected table register request: {:?}, existing: {:?}",
request,
existing.table_info()
);
return TableExistsSnafu {
table: format_full_table_name(
&catalog_name,
&schema_name,
&request.table_name,
),
}
.fail();
}
// Try to register table with same table id, just ignore.
Ok(false)
} else {
// table does not exist
let engine = request.table.table_info().meta.engine.to_string();
let table_name = request.table_name.clone();
let table_id = request.table_id;
let _ = self.catalogs.register_table(request).await?;
let _ = self
.system
.register_table(
catalog_name.clone(),
schema_name.clone(),
table_name,
table_id,
engine,
)
.await?;
increment_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
1.0,
&[crate::metrics::db_label(&catalog_name, &schema_name)],
);
Ok(true)
}
}
}
async fn rename_table(&self, request: RenameTableRequest) -> Result<bool> {
self.check_state().await?;
let catalog_name = &request.catalog;
let schema_name = &request.schema;
self.check_catalog_schema_exist(catalog_name, schema_name)
.await?;
ensure!(
self.catalogs
.table(catalog_name, schema_name, &request.new_table_name)
.await?
.is_none(),
TableExistsSnafu {
table: &request.new_table_name
}
);
let _lock = self.register_lock.lock().await;
let old_table = self
.catalogs
.table(catalog_name, schema_name, &request.table_name)
.await?
.context(TableNotExistSnafu {
table: &request.table_name,
})?;
let engine = old_table.table_info().meta.engine.to_string();
// rename table in system catalog
let _ = self
.system
.register_table(
catalog_name.clone(),
schema_name.clone(),
request.new_table_name.clone(),
request.table_id,
engine,
)
.await?;
self.catalogs.rename_table(request).await
}
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()> {
self.check_state().await?;
{
let _ = self.register_lock.lock().await;
let DeregisterTableRequest {
catalog,
schema,
table_name,
} = &request;
let table_id = self
.catalogs
.table(catalog, schema, table_name)
.await?
.with_context(|| error::TableNotExistSnafu {
table: format_full_table_name(catalog, schema, table_name),
})?
.table_info()
.ident
.table_id;
self.system.deregister_table(&request, table_id).await?;
self.catalogs.deregister_table(request).await
}
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
self.check_state().await?;
let catalog_name = &request.catalog;
let schema_name = &request.schema;
if !self.catalogs.catalog_exist(catalog_name).await? {
return CatalogNotFoundSnafu { catalog_name }.fail()?;
}
{
let _lock = self.register_lock.lock().await;
ensure!(
!self
.catalogs
.schema_exist(catalog_name, schema_name)
.await?,
SchemaExistsSnafu {
schema: schema_name,
}
);
let _ = self
.system
.register_schema(request.catalog.clone(), schema_name.clone())
.await?;
self.catalogs.register_schema_sync(request)
}
}
async fn deregister_schema(&self, _request: DeregisterSchemaRequest) -> Result<bool> {
UnimplementedSnafu {
operation: "deregister schema",
}
.fail()
}
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
let catalog_name = request.create_table_request.catalog_name.clone();
let schema_name = request.create_table_request.schema_name.clone();
let mut sys_table_requests = self.system_table_requests.lock().await;
sys_table_requests.push(request);
increment_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
1.0,
&[crate::metrics::db_label(&catalog_name, &schema_name)],
);
Ok(())
}
async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool> {
self.catalogs.schema_exist(catalog, schema).await
}
async fn table(
&self,
catalog_name: &str,
schema_name: &str,
table_name: &str,
) -> Result<Option<TableRef>> {
self.catalogs
.table(catalog_name, schema_name, table_name)
.await
}
async fn catalog_exist(&self, catalog: &str) -> Result<bool> {
if catalog.eq_ignore_ascii_case(SYSTEM_CATALOG_NAME) {
Ok(true)
} else {
self.catalogs.catalog_exist(catalog).await
}
}
async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
self.catalogs.table_exist(catalog, schema, table).await
}
async fn catalog_names(&self) -> Result<Vec<String>> {
self.catalogs.catalog_names().await
}
async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> {
self.catalogs.schema_names(catalog_name).await
}
async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> {
self.catalogs.table_names(catalog_name, schema_name).await
}
async fn register_catalog(self: Arc<Self>, name: String) -> Result<bool> {
self.catalogs.clone().register_catalog(name).await
}
fn as_any(&self) -> &dyn Any {
self
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use mito::engine::MITO_ENGINE;
use super::*;
use crate::system::{CatalogEntry, SchemaEntry};
#[test]
fn test_sort_entry() {
let vec = vec![
Entry::Table(TableEntry {
catalog_name: "C1".to_string(),
schema_name: "S1".to_string(),
table_name: "T1".to_string(),
table_id: 1,
engine: MITO_ENGINE.to_string(),
is_deleted: false,
}),
Entry::Catalog(CatalogEntry {
catalog_name: "C2".to_string(),
}),
Entry::Schema(SchemaEntry {
catalog_name: "C1".to_string(),
schema_name: "S1".to_string(),
}),
Entry::Schema(SchemaEntry {
catalog_name: "C2".to_string(),
schema_name: "S2".to_string(),
}),
Entry::Catalog(CatalogEntry {
catalog_name: "".to_string(),
}),
Entry::Table(TableEntry {
catalog_name: "C1".to_string(),
schema_name: "S1".to_string(),
table_name: "T2".to_string(),
table_id: 2,
engine: MITO_ENGINE.to_string(),
is_deleted: false,
}),
];
let res = LocalCatalogManager::sort_entries(vec);
assert_matches!(res[0], Entry::Catalog(..));
assert_matches!(res[1], Entry::Catalog(..));
assert_matches!(res[2], Entry::Schema(..));
assert_matches!(res[3], Entry::Schema(..));
assert_matches!(res[4], Entry::Table(..));
assert_matches!(res[5], Entry::Table(..));
}
}

View File

@@ -0,0 +1,630 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, RwLock, Weak};
use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID,
};
use metrics::{decrement_gauge, increment_gauge};
use snafu::OptionExt;
use table::metadata::TableId;
use table::table::TableIdProvider;
use table::TableRef;
use crate::error::{
CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, TableNotFoundSnafu,
};
use crate::information_schema::InformationSchemaProvider;
use crate::{
CatalogManager, DeregisterSchemaRequest, DeregisterTableRequest, RegisterSchemaRequest,
RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest,
};
type SchemaEntries = HashMap<String, HashMap<String, TableRef>>;
/// Simple in-memory list of catalogs
pub struct MemoryCatalogManager {
/// Collection of catalogs containing schemas and ultimately Tables
pub catalogs: RwLock<HashMap<String, SchemaEntries>>,
pub table_id: AtomicU32,
}
#[async_trait::async_trait]
impl TableIdProvider for MemoryCatalogManager {
async fn next_table_id(&self) -> table::error::Result<TableId> {
Ok(self.table_id.fetch_add(1, Ordering::Relaxed))
}
}
#[async_trait::async_trait]
impl CatalogManager for MemoryCatalogManager {
async fn start(&self) -> Result<()> {
self.table_id.store(MIN_USER_TABLE_ID, Ordering::Relaxed);
Ok(())
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
self.register_table_sync(request)
}
async fn rename_table(&self, request: RenameTableRequest) -> Result<bool> {
let mut catalogs = self.catalogs.write().unwrap();
let schema = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?
.get_mut(&request.schema)
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?;
// check old and new table names
if !schema.contains_key(&request.table_name) {
return TableNotFoundSnafu {
table_info: request.table_name.to_string(),
}
.fail()?;
}
if schema.contains_key(&request.new_table_name) {
return TableExistsSnafu {
table: &request.new_table_name,
}
.fail();
}
let table = schema.remove(&request.table_name).unwrap();
let _ = schema.insert(request.new_table_name, table);
Ok(true)
}
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()> {
self.deregister_table_sync(request)
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
self.register_schema_sync(request)
}
async fn deregister_schema(&self, request: DeregisterSchemaRequest) -> Result<bool> {
let mut catalogs = self.catalogs.write().unwrap();
let schemas = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?;
let table_count = schemas
.remove(&request.schema)
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?
.len();
decrement_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
table_count as f64,
&[crate::metrics::db_label(&request.catalog, &request.schema)],
);
decrement_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT,
1.0,
&[crate::metrics::db_label(&request.catalog, &request.schema)],
);
Ok(true)
}
async fn register_system_table(&self, _request: RegisterSystemTableRequest) -> Result<()> {
// TODO(ruihang): support register system table request
Ok(())
}
async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool> {
self.schema_exist_sync(catalog, schema)
}
async fn table(
&self,
catalog: &str,
schema: &str,
table_name: &str,
) -> Result<Option<TableRef>> {
let result = try {
self.catalogs
.read()
.unwrap()
.get(catalog)?
.get(schema)?
.get(table_name)
.cloned()?
};
Ok(result)
}
async fn catalog_exist(&self, catalog: &str) -> Result<bool> {
self.catalog_exist_sync(catalog)
}
async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
let catalogs = self.catalogs.read().unwrap();
Ok(catalogs
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.get(schema)
.with_context(|| SchemaNotFoundSnafu { catalog, schema })?
.contains_key(table))
}
async fn catalog_names(&self) -> Result<Vec<String>> {
Ok(self.catalogs.read().unwrap().keys().cloned().collect())
}
async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog_name)
.with_context(|| CatalogNotFoundSnafu { catalog_name })?
.keys()
.cloned()
.collect())
}
async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog_name)
.with_context(|| CatalogNotFoundSnafu { catalog_name })?
.get(schema_name)
.with_context(|| SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?
.keys()
.cloned()
.collect())
}
async fn register_catalog(self: Arc<Self>, name: String) -> Result<bool> {
self.register_catalog_sync(name)
}
fn as_any(&self) -> &dyn Any {
self
}
}
impl MemoryCatalogManager {
/// Creates a manager with some default setups
/// (e.g. default catalog/schema and information schema)
pub fn with_default_setup() -> Arc<Self> {
let manager = Arc::new(Self {
table_id: AtomicU32::new(MIN_USER_TABLE_ID),
catalogs: Default::default(),
});
// Safety: default catalog/schema is registered in order so no CatalogNotFound error will occur
manager
.register_catalog_sync(DEFAULT_CATALOG_NAME.to_string())
.unwrap();
manager
.register_schema_sync(RegisterSchemaRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
})
.unwrap();
manager
}
fn schema_exist_sync(&self, catalog: &str, schema: &str) -> Result<bool> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.contains_key(schema))
}
fn catalog_exist_sync(&self, catalog: &str) -> Result<bool> {
Ok(self.catalogs.read().unwrap().get(catalog).is_some())
}
/// Registers a catalog if it does not exist and returns false if the schema exists.
pub fn register_catalog_sync(self: &Arc<Self>, name: String) -> Result<bool> {
let mut catalogs = self.catalogs.write().unwrap();
match catalogs.entry(name.clone()) {
Entry::Vacant(e) => {
let catalog = self.create_catalog_entry(name);
e.insert(catalog);
increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_CATALOG_COUNT, 1.0);
Ok(true)
}
Entry::Occupied(_) => Ok(false),
}
}
pub fn deregister_table_sync(&self, request: DeregisterTableRequest) -> Result<()> {
let mut catalogs = self.catalogs.write().unwrap();
let schema = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?
.get_mut(&request.schema)
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?;
let result = schema.remove(&request.table_name);
if result.is_some() {
decrement_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
1.0,
&[crate::metrics::db_label(&request.catalog, &request.schema)],
);
}
Ok(())
}
/// Registers a schema if it does not exist.
/// It returns an error if the catalog does not exist,
/// and returns false if the schema exists.
pub fn register_schema_sync(&self, request: RegisterSchemaRequest) -> Result<bool> {
let mut catalogs = self.catalogs.write().unwrap();
let catalog = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?;
match catalog.entry(request.schema) {
Entry::Vacant(e) => {
e.insert(HashMap::new());
increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT, 1.0);
Ok(true)
}
Entry::Occupied(_) => Ok(false),
}
}
/// Registers a schema and returns an error if the catalog or schema does not exist.
pub fn register_table_sync(&self, request: RegisterTableRequest) -> Result<bool> {
let mut catalogs = self.catalogs.write().unwrap();
let schema = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?
.get_mut(&request.schema)
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?;
if schema.contains_key(&request.table_name) {
return TableExistsSnafu {
table: &request.table_name,
}
.fail();
}
schema.insert(request.table_name, request.table);
increment_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
1.0,
&[crate::metrics::db_label(&request.catalog, &request.schema)],
);
Ok(true)
}
fn create_catalog_entry(self: &Arc<Self>, catalog: String) -> SchemaEntries {
let information_schema = InformationSchemaProvider::build(
catalog,
Arc::downgrade(self) as Weak<dyn CatalogManager>,
);
let mut catalog = HashMap::new();
catalog.insert(INFORMATION_SCHEMA_NAME.to_string(), information_schema);
catalog
}
#[cfg(any(test, feature = "testing"))]
pub fn new_with_table(table: TableRef) -> Arc<Self> {
let manager = Self::with_default_setup();
let catalog = &table.table_info().catalog_name;
let schema = &table.table_info().schema_name;
if !manager.catalog_exist_sync(catalog).unwrap() {
manager.register_catalog_sync(catalog.to_string()).unwrap();
}
if !manager.schema_exist_sync(catalog, schema).unwrap() {
manager
.register_schema_sync(RegisterSchemaRequest {
catalog: catalog.to_string(),
schema: schema.to_string(),
})
.unwrap();
}
let request = RegisterTableRequest {
catalog: catalog.to_string(),
schema: schema.to_string(),
table_name: table.table_info().name.clone(),
table_id: table.table_info().ident.table_id,
table,
};
let _ = manager.register_table_sync(request).unwrap();
manager
}
}
/// Create a memory catalog list contains a numbers table for test
pub fn new_memory_catalog_manager() -> Result<Arc<MemoryCatalogManager>> {
Ok(MemoryCatalogManager::with_default_setup())
}
#[cfg(test)]
mod tests {
use common_catalog::consts::*;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use super::*;
#[tokio::test]
async fn test_new_memory_catalog_list() {
let catalog_list = new_memory_catalog_manager().unwrap();
let register_request = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: NUMBERS_TABLE_NAME.to_string(),
table_id: NUMBERS_TABLE_ID,
table: NumbersTable::table(NUMBERS_TABLE_ID),
};
let _ = catalog_list.register_table(register_request).await.unwrap();
let table = catalog_list
.table(
DEFAULT_CATALOG_NAME,
DEFAULT_SCHEMA_NAME,
NUMBERS_TABLE_NAME,
)
.await
.unwrap();
let _ = table.unwrap();
assert!(catalog_list
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists")
.await
.unwrap()
.is_none());
}
#[tokio::test]
async fn test_mem_manager_rename_table() {
let catalog = MemoryCatalogManager::with_default_setup();
let table_name = "test_table";
assert!(!catalog
.table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap());
// register test table
let table_id = 2333;
let register_request = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
table_id,
table: NumbersTable::table(table_id),
};
assert!(catalog.register_table(register_request).await.unwrap());
assert!(catalog
.table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap());
// rename test table
let new_table_name = "test_table_renamed";
let rename_request = RenameTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
new_table_name: new_table_name.to_string(),
table_id,
};
let _ = catalog.rename_table(rename_request).await.unwrap();
// test old table name not exist
assert!(!catalog
.table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap());
// test new table name exists
assert!(catalog
.table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
.await
.unwrap());
let registered_table = catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
.await
.unwrap()
.unwrap();
assert_eq!(registered_table.table_info().ident.table_id, table_id);
let dup_register_request = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: new_table_name.to_string(),
table_id: table_id + 1,
table: NumbersTable::table(table_id + 1),
};
let result = catalog.register_table(dup_register_request).await;
let err = result.err().unwrap();
assert_eq!(StatusCode::TableAlreadyExists, err.status_code());
}
#[tokio::test]
async fn test_catalog_rename_table() {
let catalog = MemoryCatalogManager::with_default_setup();
let table_name = "num";
let table_id = 2333;
let table = NumbersTable::table(table_id);
// register table
let register_table_req = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
table_id,
table,
};
assert!(catalog.register_table(register_table_req).await.unwrap());
assert!(catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap()
.is_some());
// rename table
let new_table_name = "numbers_new";
let rename_table_req = RenameTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
new_table_name: new_table_name.to_string(),
table_id,
};
assert!(catalog.rename_table(rename_table_req).await.unwrap());
assert!(catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap()
.is_none());
assert!(catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
.await
.unwrap()
.is_some());
let registered_table = catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
.await
.unwrap()
.unwrap();
assert_eq!(registered_table.table_info().ident.table_id, table_id);
}
#[test]
pub fn test_register_catalog_sync() {
let list = MemoryCatalogManager::with_default_setup();
assert!(list
.register_catalog_sync("test_catalog".to_string())
.unwrap());
assert!(!list
.register_catalog_sync("test_catalog".to_string())
.unwrap());
}
#[tokio::test]
pub async fn test_catalog_deregister_table() {
let catalog = MemoryCatalogManager::with_default_setup();
let table_name = "foo_table";
let register_table_req = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
table_id: 2333,
table: NumbersTable::table(2333),
};
let _ = catalog.register_table(register_table_req).await.unwrap();
assert!(catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap()
.is_some());
let deregister_table_req = DeregisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
};
catalog
.deregister_table(deregister_table_req)
.await
.unwrap();
assert!(catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap()
.is_none());
}
#[tokio::test]
async fn test_catalog_deregister_schema() {
let catalog = MemoryCatalogManager::with_default_setup();
// Registers a catalog, a schema, and a table.
let catalog_name = "foo_catalog".to_string();
let schema_name = "foo_schema".to_string();
let table_name = "foo_table".to_string();
let schema = RegisterSchemaRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
};
let table = RegisterTableRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table_name,
table_id: 0,
table: NumbersTable::table(0),
};
catalog
.clone()
.register_catalog(catalog_name.clone())
.await
.unwrap();
catalog.register_schema(schema).await.unwrap();
catalog.register_table(table).await.unwrap();
let request = DeregisterSchemaRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
};
assert!(catalog.deregister_schema(request).await.unwrap());
assert!(!catalog
.schema_exist(&catalog_name, &schema_name)
.await
.unwrap());
}
}

View File

@@ -1,365 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::{Arc, RwLock, Weak};
use common_catalog::build_db_string;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME};
use snafu::OptionExt;
use table::TableRef;
use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
use crate::information_schema::InformationSchemaProvider;
use crate::{CatalogManager, DeregisterTableRequest, RegisterSchemaRequest, RegisterTableRequest};
type SchemaEntries = HashMap<String, HashMap<String, TableRef>>;
/// Simple in-memory list of catalogs
#[derive(Clone)]
pub struct MemoryCatalogManager {
/// Collection of catalogs containing schemas and ultimately Tables
catalogs: Arc<RwLock<HashMap<String, SchemaEntries>>>,
}
#[async_trait::async_trait]
impl CatalogManager for MemoryCatalogManager {
async fn schema_exists(&self, catalog: &str, schema: &str) -> Result<bool> {
self.schema_exist_sync(catalog, schema)
}
async fn table(
&self,
catalog: &str,
schema: &str,
table_name: &str,
) -> Result<Option<TableRef>> {
let result = try {
self.catalogs
.read()
.unwrap()
.get(catalog)?
.get(schema)?
.get(table_name)
.cloned()?
};
Ok(result)
}
async fn catalog_exists(&self, catalog: &str) -> Result<bool> {
self.catalog_exist_sync(catalog)
}
async fn table_exists(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
let catalogs = self.catalogs.read().unwrap();
Ok(catalogs
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.get(schema)
.with_context(|| SchemaNotFoundSnafu { catalog, schema })?
.contains_key(table))
}
async fn catalog_names(&self) -> Result<Vec<String>> {
Ok(self.catalogs.read().unwrap().keys().cloned().collect())
}
async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog_name)
.with_context(|| CatalogNotFoundSnafu { catalog_name })?
.keys()
.cloned()
.collect())
}
async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog_name)
.with_context(|| CatalogNotFoundSnafu { catalog_name })?
.get(schema_name)
.with_context(|| SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?
.keys()
.cloned()
.collect())
}
fn as_any(&self) -> &dyn Any {
self
}
}
impl MemoryCatalogManager {
pub fn new() -> Arc<Self> {
Arc::new(Self {
catalogs: Default::default(),
})
}
/// Creates a manager with some default setups
/// (e.g. default catalog/schema and information schema)
pub fn with_default_setup() -> Arc<Self> {
let manager = Arc::new(Self {
catalogs: Default::default(),
});
// Safety: default catalog/schema is registered in order so no CatalogNotFound error will occur
manager.register_catalog_sync(DEFAULT_CATALOG_NAME).unwrap();
manager
.register_schema_sync(RegisterSchemaRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
})
.unwrap();
manager
}
fn schema_exist_sync(&self, catalog: &str, schema: &str) -> Result<bool> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.contains_key(schema))
}
fn catalog_exist_sync(&self, catalog: &str) -> Result<bool> {
Ok(self.catalogs.read().unwrap().get(catalog).is_some())
}
/// Registers a catalog if it does not exist and returns false if the schema exists.
pub fn register_catalog_sync(&self, name: &str) -> Result<bool> {
let name = name.to_string();
let mut catalogs = self.catalogs.write().unwrap();
match catalogs.entry(name.clone()) {
Entry::Vacant(e) => {
let arc_self = Arc::new(self.clone());
let catalog = arc_self.create_catalog_entry(name);
e.insert(catalog);
crate::metrics::METRIC_CATALOG_MANAGER_CATALOG_COUNT.inc();
Ok(true)
}
Entry::Occupied(_) => Ok(false),
}
}
pub fn deregister_table_sync(&self, request: DeregisterTableRequest) -> Result<()> {
let mut catalogs = self.catalogs.write().unwrap();
let schema = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?
.get_mut(&request.schema)
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?;
let result = schema.remove(&request.table_name);
if result.is_some() {
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT
.with_label_values(&[build_db_string(&request.catalog, &request.schema).as_str()])
.dec();
}
Ok(())
}
/// Registers a schema if it does not exist.
/// It returns an error if the catalog does not exist,
/// and returns false if the schema exists.
pub fn register_schema_sync(&self, request: RegisterSchemaRequest) -> Result<bool> {
let mut catalogs = self.catalogs.write().unwrap();
let catalog = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?;
match catalog.entry(request.schema) {
Entry::Vacant(e) => {
e.insert(HashMap::new());
crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT.inc();
Ok(true)
}
Entry::Occupied(_) => Ok(false),
}
}
/// Registers a schema and returns an error if the catalog or schema does not exist.
pub fn register_table_sync(&self, request: RegisterTableRequest) -> Result<bool> {
let mut catalogs = self.catalogs.write().unwrap();
let schema = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?
.get_mut(&request.schema)
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?;
if schema.contains_key(&request.table_name) {
return TableExistsSnafu {
table: &request.table_name,
}
.fail();
}
schema.insert(request.table_name, request.table);
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT
.with_label_values(&[build_db_string(&request.catalog, &request.schema).as_str()])
.inc();
Ok(true)
}
fn create_catalog_entry(self: &Arc<Self>, catalog: String) -> SchemaEntries {
let information_schema = InformationSchemaProvider::build(
catalog,
Arc::downgrade(self) as Weak<dyn CatalogManager>,
);
let mut catalog = HashMap::new();
catalog.insert(INFORMATION_SCHEMA_NAME.to_string(), information_schema);
catalog
}
#[cfg(any(test, feature = "testing"))]
pub fn new_with_table(table: TableRef) -> Arc<Self> {
let manager = Self::with_default_setup();
let catalog = &table.table_info().catalog_name;
let schema = &table.table_info().schema_name;
if !manager.catalog_exist_sync(catalog).unwrap() {
manager.register_catalog_sync(catalog).unwrap();
}
if !manager.schema_exist_sync(catalog, schema).unwrap() {
manager
.register_schema_sync(RegisterSchemaRequest {
catalog: catalog.to_string(),
schema: schema.to_string(),
})
.unwrap();
}
let request = RegisterTableRequest {
catalog: catalog.to_string(),
schema: schema.to_string(),
table_name: table.table_info().name.clone(),
table_id: table.table_info().ident.table_id,
table,
};
let _ = manager.register_table_sync(request).unwrap();
manager
}
}
/// Create a memory catalog list contains a numbers table for test
pub fn new_memory_catalog_manager() -> Result<Arc<MemoryCatalogManager>> {
Ok(MemoryCatalogManager::with_default_setup())
}
#[cfg(test)]
mod tests {
use common_catalog::consts::*;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use super::*;
#[tokio::test]
async fn test_new_memory_catalog_list() {
let catalog_list = new_memory_catalog_manager().unwrap();
let register_request = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: NUMBERS_TABLE_NAME.to_string(),
table_id: NUMBERS_TABLE_ID,
table: NumbersTable::table(NUMBERS_TABLE_ID),
};
catalog_list.register_table_sync(register_request).unwrap();
let table = catalog_list
.table(
DEFAULT_CATALOG_NAME,
DEFAULT_SCHEMA_NAME,
NUMBERS_TABLE_NAME,
)
.await
.unwrap();
let _ = table.unwrap();
assert!(catalog_list
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists")
.await
.unwrap()
.is_none());
}
#[test]
pub fn test_register_catalog_sync() {
let list = MemoryCatalogManager::with_default_setup();
assert!(list.register_catalog_sync("test_catalog").unwrap());
assert!(!list.register_catalog_sync("test_catalog").unwrap());
}
#[tokio::test]
pub async fn test_catalog_deregister_table() {
let catalog = MemoryCatalogManager::with_default_setup();
let table_name = "foo_table";
let register_table_req = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
table_id: 2333,
table: NumbersTable::table(2333),
};
catalog.register_table_sync(register_table_req).unwrap();
assert!(catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap()
.is_some());
let deregister_table_req = DeregisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
};
catalog.deregister_table_sync(deregister_table_req).unwrap();
assert!(catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap()
.is_none());
}
}

View File

@@ -12,24 +12,18 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use common_catalog::build_db_string;
pub(crate) const METRIC_DB_LABEL: &str = "db"; pub(crate) const METRIC_DB_LABEL: &str = "db";
use lazy_static::lazy_static; pub(crate) const METRIC_CATALOG_MANAGER_CATALOG_COUNT: &str = "catalog.catalog_count";
use prometheus::*; pub(crate) const METRIC_CATALOG_MANAGER_SCHEMA_COUNT: &str = "catalog.schema_count";
pub(crate) const METRIC_CATALOG_MANAGER_TABLE_COUNT: &str = "catalog.table_count";
lazy_static! { pub(crate) const METRIC_CATALOG_KV_REMOTE_GET: &str = "catalog.kv.get.remote";
pub static ref METRIC_CATALOG_MANAGER_CATALOG_COUNT: IntGauge = pub(crate) const METRIC_CATALOG_KV_GET: &str = "catalog.kv.get";
register_int_gauge!("catalog_catalog_count", "catalog catalog count").unwrap();
pub static ref METRIC_CATALOG_MANAGER_SCHEMA_COUNT: IntGauge = #[inline]
register_int_gauge!("catalog_schema_count", "catalog schema count").unwrap(); pub(crate) fn db_label(catalog: &str, schema: &str) -> (&'static str, String) {
pub static ref METRIC_CATALOG_MANAGER_TABLE_COUNT: IntGaugeVec = register_int_gauge_vec!( (METRIC_DB_LABEL, build_db_string(catalog, schema))
"catalog_table_count",
"catalog table count",
&[METRIC_DB_LABEL]
)
.unwrap();
pub static ref METRIC_CATALOG_KV_REMOTE_GET: Histogram =
register_histogram!("catalog_kv_get_remote", "catalog kv get remote").unwrap();
pub static ref METRIC_CATALOG_KV_GET: Histogram =
register_histogram!("catalog_kv_get", "catalog kv get").unwrap();
} }

View File

@@ -12,11 +12,21 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::sync::Arc;
pub use client::{CachedMetaKvBackend, MetaKvBackend}; pub use client::{CachedMetaKvBackend, MetaKvBackend};
pub use manager::RemoteCatalogManager;
mod client; mod client;
mod manager; mod manager;
#[cfg(feature = "testing")] #[cfg(feature = "testing")]
pub mod mock; pub mod mock;
pub use manager::KvBackendCatalogManager; pub mod region_alive_keeper;
#[async_trait::async_trait]
pub trait KvCacheInvalidator: Send + Sync {
async fn invalidate_key(&self, key: &[u8]);
}
pub type KvCacheInvalidatorRef = Arc<dyn KvCacheInvalidator>;

View File

@@ -18,21 +18,22 @@ use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use common_error::ext::BoxedError; use common_error::ext::BoxedError;
use common_meta::cache_invalidator::KvCacheInvalidator;
use common_meta::error::Error::{CacheNotGet, GetKvCache}; use common_meta::error::Error::{CacheNotGet, GetKvCache};
use common_meta::error::{CacheNotGetSnafu, Error, ExternalSnafu, Result}; use common_meta::error::{CacheNotGetSnafu, Error, MetaSrvSnafu, Result};
use common_meta::kv_backend::{KvBackend, KvBackendRef, TxnService}; use common_meta::kv_backend::{KvBackend, KvBackendRef, TxnService};
use common_meta::rpc::store::{ use common_meta::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest, BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest,
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
DeleteRangeResponse, PutRequest, PutResponse, RangeRequest, RangeResponse, DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest, PutResponse,
RangeRequest, RangeResponse,
}; };
use common_meta::rpc::KeyValue; use common_meta::rpc::KeyValue;
use common_telemetry::debug; use common_telemetry::timer;
use meta_client::client::MetaClient; use meta_client::client::MetaClient;
use moka::future::{Cache, CacheBuilder}; use moka::future::{Cache, CacheBuilder};
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
use super::KvCacheInvalidator;
use crate::metrics::{METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET}; use crate::metrics::{METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET};
const CACHE_MAX_CAPACITY: u64 = 10000; const CACHE_MAX_CAPACITY: u64 = 10000;
@@ -151,11 +152,25 @@ impl KvBackend for CachedMetaKvBackend {
} }
} }
async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse> {
let from_key = &req.from_key.clone();
let to_key = &req.to_key.clone();
let ret = self.kv_backend.move_value(req).await;
if ret.is_ok() {
self.invalidate_key(from_key).await;
self.invalidate_key(to_key).await;
}
ret
}
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> { async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
let _timer = METRIC_CATALOG_KV_GET.start_timer(); let _timer = timer!(METRIC_CATALOG_KV_GET);
let init = async { let init = async {
let _timer = METRIC_CATALOG_KV_REMOTE_GET.start_timer(); let _timer = timer!(METRIC_CATALOG_KV_REMOTE_GET);
self.kv_backend.get(key).await.map(|val| { self.kv_backend.get(key).await.map(|val| {
val.with_context(|| CacheNotGetSnafu { val.with_context(|| CacheNotGetSnafu {
key: String::from_utf8_lossy(key), key: String::from_utf8_lossy(key),
@@ -182,8 +197,7 @@ impl KvBackend for CachedMetaKvBackend {
#[async_trait::async_trait] #[async_trait::async_trait]
impl KvCacheInvalidator for CachedMetaKvBackend { impl KvCacheInvalidator for CachedMetaKvBackend {
async fn invalidate_key(&self, key: &[u8]) { async fn invalidate_key(&self, key: &[u8]) {
self.cache.invalidate(key).await; self.cache.invalidate(key).await
debug!("invalidated cache key: {}", String::from_utf8_lossy(key));
} }
} }
@@ -237,7 +251,7 @@ impl KvBackend for MetaKvBackend {
.range(req) .range(req)
.await .await
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu) .context(MetaSrvSnafu)
} }
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> { async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
@@ -246,7 +260,7 @@ impl KvBackend for MetaKvBackend {
.range(RangeRequest::new().with_key(key)) .range(RangeRequest::new().with_key(key))
.await .await
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu)?; .context(MetaSrvSnafu)?;
Ok(response.take_kvs().get_mut(0).map(|kv| KeyValue { Ok(response.take_kvs().get_mut(0).map(|kv| KeyValue {
key: kv.take_key(), key: kv.take_key(),
value: kv.take_value(), value: kv.take_value(),
@@ -258,7 +272,7 @@ impl KvBackend for MetaKvBackend {
.batch_put(req) .batch_put(req)
.await .await
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu) .context(MetaSrvSnafu)
} }
async fn put(&self, req: PutRequest) -> Result<PutResponse> { async fn put(&self, req: PutRequest) -> Result<PutResponse> {
@@ -266,7 +280,7 @@ impl KvBackend for MetaKvBackend {
.put(req) .put(req)
.await .await
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu) .context(MetaSrvSnafu)
} }
async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> { async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> {
@@ -274,7 +288,7 @@ impl KvBackend for MetaKvBackend {
.delete_range(req) .delete_range(req)
.await .await
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu) .context(MetaSrvSnafu)
} }
async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> { async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
@@ -282,7 +296,7 @@ impl KvBackend for MetaKvBackend {
.batch_delete(req) .batch_delete(req)
.await .await
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu) .context(MetaSrvSnafu)
} }
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> { async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
@@ -290,7 +304,7 @@ impl KvBackend for MetaKvBackend {
.batch_get(req) .batch_get(req)
.await .await
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu) .context(MetaSrvSnafu)
} }
async fn compare_and_put( async fn compare_and_put(
@@ -301,7 +315,15 @@ impl KvBackend for MetaKvBackend {
.compare_and_put(request) .compare_and_put(request)
.await .await
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(ExternalSnafu) .context(MetaSrvSnafu)
}
async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse> {
self.client
.move_value(req)
.await
.map_err(BoxedError::new)
.context(MetaSrvSnafu)
} }
fn as_any(&self) -> &dyn Any { fn as_any(&self) -> &dyn Any {

View File

@@ -0,0 +1,436 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::sync::Arc;
use async_trait::async_trait;
use common_catalog::consts::MITO_ENGINE;
use common_meta::ident::TableIdent;
use common_meta::key::catalog_name::CatalogNameKey;
use common_meta::key::datanode_table::DatanodeTableValue;
use common_meta::key::schema_name::SchemaNameKey;
use common_meta::key::TableMetadataManagerRef;
use common_telemetry::{error, info, warn};
use futures_util::TryStreamExt;
use metrics::increment_gauge;
use snafu::{ensure, OptionExt, ResultExt};
use table::engine::manager::TableEngineManagerRef;
use table::engine::EngineContext;
use table::requests::OpenTableRequest;
use table::TableRef;
use tokio::sync::Mutex;
use crate::error::{
OpenTableSnafu, ParallelOpenTableSnafu, Result, TableEngineNotFoundSnafu, TableExistsSnafu,
TableMetadataManagerSnafu, TableNotFoundSnafu, UnimplementedSnafu,
};
use crate::local::MemoryCatalogManager;
use crate::remote::region_alive_keeper::RegionAliveKeepers;
use crate::{
handle_system_table_request, CatalogManager, DeregisterSchemaRequest, DeregisterTableRequest,
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest,
};
/// Catalog manager based on metasrv.
pub struct RemoteCatalogManager {
node_id: u64,
engine_manager: TableEngineManagerRef,
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
region_alive_keepers: Arc<RegionAliveKeepers>,
memory_catalog_manager: Arc<MemoryCatalogManager>,
table_metadata_manager: TableMetadataManagerRef,
}
impl RemoteCatalogManager {
pub fn new(
engine_manager: TableEngineManagerRef,
node_id: u64,
region_alive_keepers: Arc<RegionAliveKeepers>,
table_metadata_manager: TableMetadataManagerRef,
) -> Self {
Self {
engine_manager,
node_id,
system_table_requests: Default::default(),
region_alive_keepers,
memory_catalog_manager: MemoryCatalogManager::with_default_setup(),
table_metadata_manager,
}
}
async fn initiate_catalogs(&self) -> Result<()> {
let tables = self
.table_metadata_manager
.datanode_table_manager()
.tables(self.node_id)
.try_collect::<Vec<_>>()
.await
.context(TableMetadataManagerSnafu)?;
let joins = tables
.into_iter()
.map(|datanode_table_value| {
let engine_manager = self.engine_manager.clone();
let memory_catalog_manager = self.memory_catalog_manager.clone();
let table_metadata_manager = self.table_metadata_manager.clone();
let region_alive_keepers = self.region_alive_keepers.clone();
common_runtime::spawn_bg(async move {
let table_id = datanode_table_value.table_id;
if let Err(e) = open_and_register_table(
engine_manager,
datanode_table_value,
memory_catalog_manager,
table_metadata_manager,
region_alive_keepers,
)
.await
{
// Note that we don't return error here if table opened failed. This is because
// we don't want those broken tables to impede the startup of Datanode.
// However, this could be changed in the future.
error!(e; "Failed to open or register table, id = {table_id}")
}
})
})
.collect::<Vec<_>>();
let _ = futures::future::try_join_all(joins)
.await
.context(ParallelOpenTableSnafu)?;
Ok(())
}
}
async fn open_and_register_table(
engine_manager: TableEngineManagerRef,
datanode_table_value: DatanodeTableValue,
memory_catalog_manager: Arc<MemoryCatalogManager>,
table_metadata_manager: TableMetadataManagerRef,
region_alive_keepers: Arc<RegionAliveKeepers>,
) -> Result<()> {
let context = EngineContext {};
let table_id = datanode_table_value.table_id;
let region_numbers = datanode_table_value.regions;
let table_info_value = table_metadata_manager
.table_info_manager()
.get(table_id)
.await
.context(TableMetadataManagerSnafu)?
.context(TableNotFoundSnafu {
table_info: format!("table id: {table_id}"),
})?;
let table_info = &table_info_value.table_info;
let catalog_name = table_info.catalog_name.clone();
let schema_name = table_info.schema_name.clone();
let table_name = table_info.name.clone();
let request = OpenTableRequest {
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
table_id,
region_numbers: region_numbers.clone(),
};
let engine =
engine_manager
.engine(&table_info.meta.engine)
.context(TableEngineNotFoundSnafu {
engine_name: &table_info.meta.engine,
})?;
let table_ident = TableIdent {
catalog: catalog_name,
schema: schema_name,
table: table_name,
table_id,
engine: table_info.meta.engine.clone(),
};
let table = engine
.open_table(&context, request)
.await
.with_context(|_| OpenTableSnafu {
table_info: table_ident.to_string(),
})?
.with_context(|| TableNotFoundSnafu {
table_info: table_ident.to_string(),
})?;
info!("Successfully opened table, {table_ident}");
if !memory_catalog_manager
.catalog_exist(&table_ident.catalog)
.await?
{
memory_catalog_manager.register_catalog_sync(table_ident.catalog.clone())?;
}
if !memory_catalog_manager
.schema_exist(&table_ident.catalog, &table_ident.schema)
.await?
{
memory_catalog_manager.register_schema_sync(RegisterSchemaRequest {
catalog: table_ident.catalog.clone(),
schema: table_ident.schema.clone(),
})?;
}
let request = RegisterTableRequest {
catalog: table_ident.catalog.clone(),
schema: table_ident.schema.clone(),
table_name: table_ident.table.clone(),
table_id,
table,
};
let registered =
register_table(&memory_catalog_manager, &region_alive_keepers, request).await?;
ensure!(
registered,
TableExistsSnafu {
table: table_ident.to_string(),
}
);
info!("Successfully registered table, {table_ident}");
Ok(())
}
async fn register_table(
memory_catalog_manager: &Arc<MemoryCatalogManager>,
region_alive_keepers: &Arc<RegionAliveKeepers>,
request: RegisterTableRequest,
) -> Result<bool> {
let table = request.table.clone();
let registered = memory_catalog_manager.register_table_sync(request)?;
if registered {
let table_info = table.table_info();
let table_ident = TableIdent {
catalog: table_info.catalog_name.clone(),
schema: table_info.schema_name.clone(),
table: table_info.name.clone(),
table_id: table_info.table_id(),
engine: table_info.meta.engine.clone(),
};
region_alive_keepers
.register_table(table_ident, table, memory_catalog_manager.clone())
.await?;
}
Ok(registered)
}
#[async_trait]
impl CatalogManager for RemoteCatalogManager {
async fn start(&self) -> Result<()> {
self.initiate_catalogs().await?;
let mut system_table_requests = self.system_table_requests.lock().await;
let engine = self
.engine_manager
.engine(MITO_ENGINE)
.context(TableEngineNotFoundSnafu {
engine_name: MITO_ENGINE,
})?;
handle_system_table_request(self, engine, &mut system_table_requests).await?;
info!("All system table opened");
Ok(())
}
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
register_table(
&self.memory_catalog_manager,
&self.region_alive_keepers,
request,
)
.await
}
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()> {
let Some(table) = self
.memory_catalog_manager
.table(&request.catalog, &request.schema, &request.table_name)
.await?
else {
return Ok(());
};
let table_info = table.table_info();
let table_ident = TableIdent {
catalog: request.catalog.clone(),
schema: request.schema.clone(),
table: request.table_name.clone(),
table_id: table_info.ident.table_id,
engine: table_info.meta.engine.clone(),
};
if let Some(keeper) = self
.region_alive_keepers
.deregister_table(&table_ident)
.await
{
warn!(
"Table {} is deregistered from region alive keepers",
keeper.table_ident(),
);
}
self.memory_catalog_manager.deregister_table(request).await
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
self.memory_catalog_manager.register_schema_sync(request)
}
async fn deregister_schema(&self, _request: DeregisterSchemaRequest) -> Result<bool> {
UnimplementedSnafu {
operation: "deregister schema",
}
.fail()
}
async fn rename_table(&self, request: RenameTableRequest) -> Result<bool> {
self.memory_catalog_manager.rename_table(request).await
}
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
let catalog_name = request.create_table_request.catalog_name.clone();
let schema_name = request.create_table_request.schema_name.clone();
let mut requests = self.system_table_requests.lock().await;
requests.push(request);
increment_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
1.0,
&[crate::metrics::db_label(&catalog_name, &schema_name)],
);
Ok(())
}
async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool> {
if !self.catalog_exist(catalog).await? {
return Ok(false);
}
if self
.memory_catalog_manager
.schema_exist(catalog, schema)
.await?
{
return Ok(true);
}
let remote_schema_exists = self
.table_metadata_manager
.schema_manager()
.exist(SchemaNameKey::new(catalog, schema))
.await
.context(TableMetadataManagerSnafu)?;
// Create schema locally if remote schema exists. Since local schema is managed by memory
// catalog manager, creating a local schema is relatively cheap (just a HashMap).
// Besides, if this method ("schema_exist) is called, it's very likely that someone wants to
// create a table in this schema. We should create the schema now.
if remote_schema_exists
&& self
.memory_catalog_manager
.register_schema(RegisterSchemaRequest {
catalog: catalog.to_string(),
schema: schema.to_string(),
})
.await?
{
info!("register schema '{catalog}/{schema}' on demand");
}
Ok(remote_schema_exists)
}
async fn table(
&self,
catalog_name: &str,
schema_name: &str,
table_name: &str,
) -> Result<Option<TableRef>> {
self.memory_catalog_manager
.table(catalog_name, schema_name, table_name)
.await
}
async fn catalog_exist(&self, catalog: &str) -> Result<bool> {
if self.memory_catalog_manager.catalog_exist(catalog).await? {
return Ok(true);
}
let key = CatalogNameKey::new(catalog);
let remote_catalog_exists = self
.table_metadata_manager
.catalog_manager()
.exist(key)
.await
.context(TableMetadataManagerSnafu)?;
// Create catalog locally if remote catalog exists. Since local catalog is managed by memory
// catalog manager, creating a local catalog is relatively cheap (just a HashMap).
// Besides, if this method ("catalog_exist) is called, it's very likely that someone wants to
// create a table in this catalog. We should create the catalog now.
if remote_catalog_exists
&& self
.memory_catalog_manager
.clone()
.register_catalog(catalog.to_string())
.await?
{
info!("register catalog '{catalog}' on demand");
}
Ok(remote_catalog_exists)
}
async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
if !self.catalog_exist(catalog).await? {
return Ok(false);
}
if !self.schema_exist(catalog, schema).await? {
return Ok(false);
}
self.memory_catalog_manager
.table_exist(catalog, schema, table)
.await
}
async fn catalog_names(&self) -> Result<Vec<String>> {
self.memory_catalog_manager.catalog_names().await
}
async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> {
self.memory_catalog_manager.schema_names(catalog_name).await
}
async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> {
self.memory_catalog_manager
.table_names(catalog_name, schema_name)
.await
}
async fn register_catalog(self: Arc<Self>, name: String) -> Result<bool> {
self.memory_catalog_manager.register_catalog_sync(name)
}
fn as_any(&self) -> &dyn Any {
self
}
}

View File

@@ -55,14 +55,14 @@ impl TableEngine for MockTableEngine {
let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _]; let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _];
let record_batch = RecordBatch::new(schema, data).unwrap(); let record_batch = RecordBatch::new(schema, data).unwrap();
let table = MemTable::new_with_catalog( let table: TableRef = Arc::new(MemTable::new_with_catalog(
&request.table_name, &request.table_name,
record_batch, record_batch,
table_id, table_id,
request.catalog_name, request.catalog_name,
request.schema_name, request.schema_name,
vec![0], vec![0],
); )) as Arc<_>;
let mut tables = self.tables.write().unwrap(); let mut tables = self.tables.write().unwrap();
let _ = tables.insert(table_id, table.clone() as TableRef); let _ = tables.insert(table_id, table.clone() as TableRef);

View File

@@ -0,0 +1,867 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::future::Future;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use async_trait::async_trait;
use common_meta::error::InvalidProtoMsgSnafu;
use common_meta::heartbeat::handler::{
HandleControl, HeartbeatResponseHandler, HeartbeatResponseHandlerContext,
};
use common_meta::ident::TableIdent;
use common_meta::RegionIdent;
use common_telemetry::{debug, error, info, warn};
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionNumber;
use table::engine::manager::TableEngineManagerRef;
use table::engine::{CloseTableResult, EngineContext, TableEngineRef};
use table::metadata::TableId;
use table::requests::CloseTableRequest;
use table::TableRef;
use tokio::sync::{mpsc, oneshot, Mutex};
use tokio::task::JoinHandle;
use tokio::time::{Duration, Instant};
use crate::error::{Result, TableEngineNotFoundSnafu};
use crate::local::MemoryCatalogManager;
use crate::DeregisterTableRequest;
/// [RegionAliveKeepers] manages all [RegionAliveKeeper] in a scope of tables.
pub struct RegionAliveKeepers {
table_engine_manager: TableEngineManagerRef,
keepers: Arc<Mutex<HashMap<TableId, Arc<RegionAliveKeeper>>>>,
heartbeat_interval_millis: u64,
started: AtomicBool,
/// The epoch when [RegionAliveKeepers] is created. It's used to get a monotonically non-decreasing
/// elapsed time when submitting heartbeats to Metasrv (because [Instant] is monotonically
/// non-decreasing). The heartbeat request will carry the duration since this epoch, and the
/// duration acts like an "invariant point" for region's keep alive lease.
epoch: Instant,
}
impl RegionAliveKeepers {
pub fn new(
table_engine_manager: TableEngineManagerRef,
heartbeat_interval_millis: u64,
) -> Self {
Self {
table_engine_manager,
keepers: Arc::new(Mutex::new(HashMap::new())),
heartbeat_interval_millis,
started: AtomicBool::new(false),
epoch: Instant::now(),
}
}
pub async fn find_keeper(&self, table_id: TableId) -> Option<Arc<RegionAliveKeeper>> {
self.keepers.lock().await.get(&table_id).cloned()
}
pub async fn register_table(
&self,
table_ident: TableIdent,
table: TableRef,
catalog_manager: Arc<MemoryCatalogManager>,
) -> Result<()> {
let table_id = table_ident.table_id;
let keeper = self.find_keeper(table_id).await;
if keeper.is_some() {
return Ok(());
}
let table_engine = self
.table_engine_manager
.engine(&table_ident.engine)
.context(TableEngineNotFoundSnafu {
engine_name: &table_ident.engine,
})?;
let keeper = Arc::new(RegionAliveKeeper::new(
table_engine,
catalog_manager,
table_ident.clone(),
self.heartbeat_interval_millis,
));
for r in table.table_info().meta.region_numbers.iter() {
keeper.register_region(*r).await;
}
let mut keepers = self.keepers.lock().await;
let _ = keepers.insert(table_id, keeper.clone());
if self.started.load(Ordering::Relaxed) {
keeper.start().await;
info!("RegionAliveKeeper for table {table_ident} is started!");
} else {
info!("RegionAliveKeeper for table {table_ident} is registered but not started yet!");
}
Ok(())
}
pub async fn deregister_table(
&self,
table_ident: &TableIdent,
) -> Option<Arc<RegionAliveKeeper>> {
let table_id = table_ident.table_id;
self.keepers.lock().await.remove(&table_id).map(|x| {
info!("Deregister RegionAliveKeeper for table {table_ident}");
x
})
}
pub async fn register_region(&self, region_ident: &RegionIdent) {
let table_id = region_ident.table_ident.table_id;
let Some(keeper) = self.find_keeper(table_id).await else {
// Alive keeper could be affected by lagging msg, just warn and ignore.
warn!("Alive keeper for region {region_ident} is not found!");
return;
};
keeper.register_region(region_ident.region_number).await
}
pub async fn deregister_region(&self, region_ident: &RegionIdent) {
let table_id = region_ident.table_ident.table_id;
let Some(keeper) = self.find_keeper(table_id).await else {
// Alive keeper could be affected by lagging msg, just warn and ignore.
warn!("Alive keeper for region {region_ident} is not found!");
return;
};
let _ = keeper.deregister_region(region_ident.region_number).await;
}
pub async fn start(&self) {
let keepers = self.keepers.lock().await;
for keeper in keepers.values() {
keeper.start().await;
}
self.started.store(true, Ordering::Relaxed);
info!(
"RegionAliveKeepers for tables {:?} are started!",
keepers.keys().map(|x| x.to_string()).collect::<Vec<_>>(),
);
}
pub fn epoch(&self) -> Instant {
self.epoch
}
}
#[async_trait]
impl HeartbeatResponseHandler for RegionAliveKeepers {
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
!ctx.response.region_leases.is_empty()
}
async fn handle(
&self,
ctx: &mut HeartbeatResponseHandlerContext,
) -> common_meta::error::Result<HandleControl> {
let leases = ctx.response.region_leases.drain(..).collect::<Vec<_>>();
for lease in leases {
let table_ident: TableIdent = match lease
.table_ident
.context(InvalidProtoMsgSnafu {
err_msg: "'table_ident' is missing in RegionLease",
})
.and_then(|x| x.try_into())
{
Ok(x) => x,
Err(e) => {
error!(e; "");
continue;
}
};
let table_id = table_ident.table_id;
let Some(keeper) = self.keepers.lock().await.get(&table_id).cloned() else {
// Alive keeper could be affected by lagging msg, just warn and ignore.
warn!("Alive keeper for table {table_ident} is not found!");
continue;
};
let start_instant = self.epoch + Duration::from_millis(lease.duration_since_epoch);
let deadline = start_instant + Duration::from_secs(lease.lease_seconds);
keeper.keep_lived(lease.regions, deadline).await;
}
Ok(HandleControl::Continue)
}
}
/// [RegionAliveKeeper] starts a countdown for each region in a table. When deadline is reached,
/// the region will be closed.
/// The deadline is controlled by Metasrv. It works like "lease" for regions: a Datanode submits its
/// opened regions to Metasrv, in heartbeats. If Metasrv decides some region could be resided in this
/// Datanode, it will "extend" the region's "lease", with a deadline for [RegionAliveKeeper] to
/// countdown.
pub struct RegionAliveKeeper {
catalog_manager: Arc<MemoryCatalogManager>,
table_engine: TableEngineRef,
table_ident: TableIdent,
countdown_task_handles: Arc<Mutex<HashMap<RegionNumber, Arc<CountdownTaskHandle>>>>,
heartbeat_interval_millis: u64,
started: AtomicBool,
}
impl RegionAliveKeeper {
fn new(
table_engine: TableEngineRef,
catalog_manager: Arc<MemoryCatalogManager>,
table_ident: TableIdent,
heartbeat_interval_millis: u64,
) -> Self {
Self {
catalog_manager,
table_engine,
table_ident,
countdown_task_handles: Arc::new(Mutex::new(HashMap::new())),
heartbeat_interval_millis,
started: AtomicBool::new(false),
}
}
async fn find_handle(&self, region: &RegionNumber) -> Option<Arc<CountdownTaskHandle>> {
self.countdown_task_handles
.lock()
.await
.get(region)
.cloned()
}
async fn register_region(&self, region: RegionNumber) {
if self.find_handle(&region).await.is_some() {
return;
}
let countdown_task_handles = Arc::downgrade(&self.countdown_task_handles);
let on_task_finished = async move {
if let Some(x) = countdown_task_handles.upgrade() {
let _ = x.lock().await.remove(&region);
} // Else the countdown task handles map could be dropped because the keeper is dropped.
};
let catalog_manager = self.catalog_manager.clone();
let ident = self.table_ident.clone();
let handle = Arc::new(CountdownTaskHandle::new(
self.table_engine.clone(),
self.table_ident.clone(),
region,
move |result: Option<CloseTableResult>| {
if matches!(result, Some(CloseTableResult::Released(_))) {
let result = catalog_manager.deregister_table_sync(DeregisterTableRequest {
catalog: ident.catalog.to_string(),
schema: ident.schema.to_string(),
table_name: ident.table.to_string(),
});
info!(
"Deregister table: {} after countdown task finished, result: {result:?}",
ident.table_id
);
} else {
debug!("Countdown task returns: {result:?}");
}
on_task_finished
},
));
let mut handles = self.countdown_task_handles.lock().await;
let _ = handles.insert(region, handle.clone());
if self.started.load(Ordering::Relaxed) {
handle.start(self.heartbeat_interval_millis).await;
info!(
"Region alive countdown for region {region} in table {} is started!",
self.table_ident
);
} else {
info!(
"Region alive countdown for region {region} in table {} is registered but not started yet!",
self.table_ident
);
}
}
async fn deregister_region(&self, region: RegionNumber) -> Option<Arc<CountdownTaskHandle>> {
self.countdown_task_handles
.lock()
.await
.remove(&region)
.map(|x| {
info!(
"Deregister alive countdown for region {region} in table {}",
self.table_ident
);
x
})
}
async fn start(&self) {
let handles = self.countdown_task_handles.lock().await;
for handle in handles.values() {
handle.start(self.heartbeat_interval_millis).await;
}
self.started.store(true, Ordering::Relaxed);
info!(
"Region alive countdowns for regions {:?} in table {} are started!",
handles.keys().copied().collect::<Vec<_>>(),
self.table_ident
);
}
async fn keep_lived(&self, designated_regions: Vec<RegionNumber>, deadline: Instant) {
for region in designated_regions {
if let Some(handle) = self.find_handle(&region).await {
handle.reset_deadline(deadline).await;
}
// Else the region alive keeper might be triggered by lagging messages, we can safely ignore it.
}
}
pub async fn deadline(&self, region: RegionNumber) -> Option<Instant> {
let mut deadline = None;
if let Some(handle) = self.find_handle(&region).await {
let (s, r) = oneshot::channel();
if handle.tx.send(CountdownCommand::Deadline(s)).await.is_ok() {
deadline = r.await.ok()
}
}
deadline
}
pub fn table_ident(&self) -> &TableIdent {
&self.table_ident
}
}
#[derive(Debug)]
enum CountdownCommand {
Start(u64),
Reset(Instant),
Deadline(oneshot::Sender<Instant>),
}
struct CountdownTaskHandle {
tx: mpsc::Sender<CountdownCommand>,
handler: JoinHandle<()>,
table_ident: TableIdent,
region: RegionNumber,
}
impl CountdownTaskHandle {
/// Creates a new [CountdownTaskHandle] and starts the countdown task.
/// # Params
/// - `on_task_finished`: a callback to be invoked when the task is finished. Note that it will not
/// be invoked if the task is cancelled (by dropping the handle). This is because we want something
/// meaningful to be done when the task is finished, e.g. deregister the handle from the map.
/// While dropping the handle does not necessarily mean the task is finished.
fn new<Fut>(
table_engine: TableEngineRef,
table_ident: TableIdent,
region: RegionNumber,
on_task_finished: impl FnOnce(Option<CloseTableResult>) -> Fut + Send + 'static,
) -> Self
where
Fut: Future<Output = ()> + Send,
{
let (tx, rx) = mpsc::channel(1024);
let mut countdown_task = CountdownTask {
table_engine,
table_ident: table_ident.clone(),
region,
rx,
};
let handler = common_runtime::spawn_bg(async move {
let result = countdown_task.run().await;
on_task_finished(result).await;
});
Self {
tx,
handler,
table_ident,
region,
}
}
async fn start(&self, heartbeat_interval_millis: u64) {
if let Err(e) = self
.tx
.send(CountdownCommand::Start(heartbeat_interval_millis))
.await
{
warn!(
"Failed to start region alive keeper countdown: {e}. \
Maybe the task is stopped due to region been closed."
);
}
}
async fn reset_deadline(&self, deadline: Instant) {
if let Err(e) = self.tx.send(CountdownCommand::Reset(deadline)).await {
warn!(
"Failed to reset region alive keeper deadline: {e}. \
Maybe the task is stopped due to region been closed."
);
}
}
}
impl Drop for CountdownTaskHandle {
fn drop(&mut self) {
debug!(
"Aborting region alive countdown task for region {} in table {}",
self.region, self.table_ident,
);
self.handler.abort();
}
}
struct CountdownTask {
table_engine: TableEngineRef,
table_ident: TableIdent,
region: RegionNumber,
rx: mpsc::Receiver<CountdownCommand>,
}
impl CountdownTask {
// returns true if
async fn run(&mut self) -> Option<CloseTableResult> {
// 30 years. See `Instant::far_future`.
let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 30);
// Make sure the alive countdown is not gonna happen before heartbeat task is started (the
// "start countdown" command will be sent from heartbeat task).
let countdown = tokio::time::sleep_until(far_future);
tokio::pin!(countdown);
let region = &self.region;
let table_ident = &self.table_ident;
loop {
tokio::select! {
command = self.rx.recv() => {
match command {
Some(CountdownCommand::Start(heartbeat_interval_millis)) => {
// Set first deadline in 4 heartbeats (roughly after 20 seconds from now if heartbeat
// interval is set to default 5 seconds), to make Datanode and Metasrv more tolerable to
// network or other jitters during startup.
let first_deadline = Instant::now() + Duration::from_millis(heartbeat_interval_millis) * 4;
countdown.set(tokio::time::sleep_until(first_deadline));
},
Some(CountdownCommand::Reset(deadline)) => {
if countdown.deadline() < deadline {
debug!(
"Reset deadline of region {region} of table {table_ident} to approximately {} seconds later",
(deadline - Instant::now()).as_secs_f32(),
);
countdown.set(tokio::time::sleep_until(deadline));
}
// Else the countdown could be either:
// - not started yet;
// - during startup protection;
// - received a lagging heartbeat message.
// All can be safely ignored.
},
None => {
info!(
"The handle of countdown task for region {region} of table {table_ident} \
is dropped, RegionAliveKeeper out."
);
break;
},
Some(CountdownCommand::Deadline(tx)) => {
let _ = tx.send(countdown.deadline());
}
}
}
() = &mut countdown => {
let result = self.close_region().await;
warn!(
"Region {region} of table {table_ident} is closed, result: {result:?}. \
RegionAliveKeeper out.",
);
return Some(result);
}
}
}
None
}
async fn close_region(&self) -> CloseTableResult {
let ctx = EngineContext::default();
let region = self.region;
let table_ident = &self.table_ident;
loop {
let request = CloseTableRequest {
catalog_name: table_ident.catalog.clone(),
schema_name: table_ident.schema.clone(),
table_name: table_ident.table.clone(),
table_id: table_ident.table_id,
region_numbers: vec![region],
flush: true,
};
match self.table_engine.close_table(&ctx, request).await {
Ok(result) => return result,
// If region is failed to close, immediately retry. Maybe we should panic instead?
Err(e) => error!(e;
"Failed to close region {region} of table {table_ident}. \
For the integrity of data, retry closing and retry without wait.",
),
}
}
}
}
#[cfg(test)]
mod test {
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use api::v1::meta::{HeartbeatResponse, RegionLease};
use common_meta::heartbeat::mailbox::HeartbeatMailbox;
use datatypes::schema::RawSchema;
use table::engine::manager::MemoryTableEngineManager;
use table::engine::TableEngine;
use table::requests::{CreateTableRequest, TableOptions};
use table::test_util::EmptyTable;
use super::*;
use crate::remote::mock::MockTableEngine;
async fn prepare_keepers() -> (TableIdent, RegionAliveKeepers) {
let table_engine = Arc::new(MockTableEngine::default());
let table_engine_manager = Arc::new(MemoryTableEngineManager::new(table_engine));
let keepers = RegionAliveKeepers::new(table_engine_manager, 5000);
let catalog = "my_catalog";
let schema = "my_schema";
let table = "my_table";
let table_ident = TableIdent {
catalog: catalog.to_string(),
schema: schema.to_string(),
table: table.to_string(),
table_id: 1,
engine: "MockTableEngine".to_string(),
};
let table = Arc::new(EmptyTable::new(CreateTableRequest {
id: 1,
catalog_name: catalog.to_string(),
schema_name: schema.to_string(),
table_name: table.to_string(),
desc: None,
schema: RawSchema {
column_schemas: vec![],
timestamp_index: None,
version: 0,
},
region_numbers: vec![1, 2, 3],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: TableOptions::default(),
engine: "MockTableEngine".to_string(),
}));
let catalog_manager = MemoryCatalogManager::new_with_table(table.clone());
keepers
.register_table(table_ident.clone(), table, catalog_manager)
.await
.unwrap();
assert!(keepers
.keepers
.lock()
.await
.contains_key(&table_ident.table_id));
(table_ident, keepers)
}
#[tokio::test(flavor = "multi_thread")]
async fn test_handle_heartbeat_response() {
let (table_ident, keepers) = prepare_keepers().await;
keepers.start().await;
let startup_protection_until = Instant::now() + Duration::from_secs(21);
let duration_since_epoch = (Instant::now() - keepers.epoch).as_millis() as _;
let lease_seconds = 100;
let response = HeartbeatResponse {
region_leases: vec![RegionLease {
table_ident: Some(table_ident.clone().into()),
regions: vec![1, 3], // Not extending region 2's lease time.
duration_since_epoch,
lease_seconds,
}],
..Default::default()
};
let keep_alive_until = keepers.epoch
+ Duration::from_millis(duration_since_epoch)
+ Duration::from_secs(lease_seconds);
let (tx, _) = mpsc::channel(8);
let mailbox = Arc::new(HeartbeatMailbox::new(tx));
let mut ctx = HeartbeatResponseHandlerContext::new(mailbox, response);
assert!(keepers.handle(&mut ctx).await.unwrap() == HandleControl::Continue);
// sleep to wait for background task spawned in `handle`
tokio::time::sleep(Duration::from_secs(1)).await;
async fn test(
keeper: &Arc<RegionAliveKeeper>,
region_number: RegionNumber,
startup_protection_until: Instant,
keep_alive_until: Instant,
is_kept_live: bool,
) {
let deadline = keeper.deadline(region_number).await.unwrap();
if is_kept_live {
assert!(deadline > startup_protection_until && deadline == keep_alive_until);
} else {
assert!(deadline <= startup_protection_until);
}
}
let keeper = &keepers
.keepers
.lock()
.await
.get(&table_ident.table_id)
.cloned()
.unwrap();
// Test region 1 and 3 is kept lived. Their deadlines are updated to desired instant.
test(keeper, 1, startup_protection_until, keep_alive_until, true).await;
test(keeper, 3, startup_protection_until, keep_alive_until, true).await;
// Test region 2 is not kept lived. It's deadline is not updated: still during startup protection period.
test(keeper, 2, startup_protection_until, keep_alive_until, false).await;
}
#[tokio::test(flavor = "multi_thread")]
async fn test_region_alive_keepers() {
let (table_ident, keepers) = prepare_keepers().await;
keepers
.register_region(&RegionIdent {
cluster_id: 1,
datanode_id: 1,
table_ident: table_ident.clone(),
region_number: 4,
})
.await;
keepers.start().await;
for keeper in keepers.keepers.lock().await.values() {
let regions = {
let handles = keeper.countdown_task_handles.lock().await;
handles.keys().copied().collect::<Vec<_>>()
};
for region in regions {
// assert countdown tasks are started
let deadline = keeper.deadline(region).await.unwrap();
assert!(deadline <= Instant::now() + Duration::from_secs(20));
}
}
keepers
.deregister_region(&RegionIdent {
cluster_id: 1,
datanode_id: 1,
table_ident: table_ident.clone(),
region_number: 1,
})
.await;
let mut regions = keepers
.find_keeper(table_ident.table_id)
.await
.unwrap()
.countdown_task_handles
.lock()
.await
.keys()
.copied()
.collect::<Vec<_>>();
regions.sort();
assert_eq!(regions, vec![2, 3, 4]);
let keeper = keepers.deregister_table(&table_ident).await.unwrap();
assert!(Arc::try_unwrap(keeper).is_ok(), "keeper is not dropped");
assert!(keepers.keepers.lock().await.is_empty());
}
#[tokio::test(flavor = "multi_thread")]
async fn test_region_alive_keeper() {
let table_engine = Arc::new(MockTableEngine::default());
let table_ident = TableIdent {
catalog: "my_catalog".to_string(),
schema: "my_schema".to_string(),
table: "my_table".to_string(),
table_id: 1024,
engine: "mito".to_string(),
};
let catalog_manager = MemoryCatalogManager::with_default_setup();
let keeper = RegionAliveKeeper::new(table_engine, catalog_manager, table_ident, 1000);
let region = 1;
assert!(keeper.find_handle(&region).await.is_none());
keeper.register_region(region).await;
let _ = keeper.find_handle(&region).await.unwrap();
let ten_seconds_later = || Instant::now() + Duration::from_secs(10);
keeper.keep_lived(vec![1, 2, 3], ten_seconds_later()).await;
assert!(keeper.find_handle(&2).await.is_none());
assert!(keeper.find_handle(&3).await.is_none());
let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 29);
// assert if keeper is not started, keep_lived is of no use
assert!(keeper.deadline(region).await.unwrap() > far_future);
keeper.start().await;
keeper.keep_lived(vec![1, 2, 3], ten_seconds_later()).await;
// assert keep_lived works if keeper is started
assert!(keeper.deadline(region).await.unwrap() <= ten_seconds_later());
let handle = keeper.deregister_region(region).await.unwrap();
assert!(Arc::try_unwrap(handle).is_ok(), "handle is not dropped");
assert!(keeper.find_handle(&region).await.is_none());
}
#[tokio::test(flavor = "multi_thread")]
async fn test_countdown_task_handle() {
let table_engine = Arc::new(MockTableEngine::default());
let table_ident = TableIdent {
catalog: "my_catalog".to_string(),
schema: "my_schema".to_string(),
table: "my_table".to_string(),
table_id: 1024,
engine: "mito".to_string(),
};
let finished = Arc::new(AtomicBool::new(false));
let finished_clone = finished.clone();
let handle = CountdownTaskHandle::new(
table_engine.clone(),
table_ident.clone(),
1,
|_| async move { finished_clone.store(true, Ordering::Relaxed) },
);
let tx = handle.tx.clone();
// assert countdown task is running
tx.send(CountdownCommand::Start(5000)).await.unwrap();
assert!(!finished.load(Ordering::Relaxed));
drop(handle);
tokio::time::sleep(Duration::from_secs(1)).await;
// assert countdown task is stopped
assert!(tx
.try_send(CountdownCommand::Reset(
Instant::now() + Duration::from_secs(10)
))
.is_err());
// assert `on_task_finished` is not called (because the task is aborted by the handle's drop)
assert!(!finished.load(Ordering::Relaxed));
let finished = Arc::new(AtomicBool::new(false));
let finished_clone = finished.clone();
let handle = CountdownTaskHandle::new(table_engine, table_ident, 1, |_| async move {
finished_clone.store(true, Ordering::Relaxed)
});
handle.tx.send(CountdownCommand::Start(100)).await.unwrap();
tokio::time::sleep(Duration::from_secs(1)).await;
// assert `on_task_finished` is called when task is finished normally
assert!(finished.load(Ordering::Relaxed));
}
#[tokio::test(flavor = "multi_thread")]
async fn test_countdown_task_run() {
let ctx = &EngineContext::default();
let catalog = "my_catalog";
let schema = "my_schema";
let table = "my_table";
let table_id = 1;
let request = CreateTableRequest {
id: table_id,
catalog_name: catalog.to_string(),
schema_name: schema.to_string(),
table_name: table.to_string(),
desc: None,
schema: RawSchema {
column_schemas: vec![],
timestamp_index: None,
version: 0,
},
region_numbers: vec![],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: TableOptions::default(),
engine: "mito".to_string(),
};
let table_engine = Arc::new(MockTableEngine::default());
let _ = table_engine.create_table(ctx, request).await.unwrap();
let table_ident = TableIdent {
catalog: catalog.to_string(),
schema: schema.to_string(),
table: table.to_string(),
table_id,
engine: "mito".to_string(),
};
let (tx, rx) = mpsc::channel(10);
let mut task = CountdownTask {
table_engine: table_engine.clone(),
table_ident,
region: 1,
rx,
};
let _handle = common_runtime::spawn_bg(async move {
task.run().await;
});
async fn deadline(tx: &mpsc::Sender<CountdownCommand>) -> Instant {
let (s, r) = oneshot::channel();
tx.send(CountdownCommand::Deadline(s)).await.unwrap();
r.await.unwrap()
}
// if countdown task is not started, its deadline is set to far future
assert!(deadline(&tx).await > Instant::now() + Duration::from_secs(86400 * 365 * 29));
// start countdown in 250ms * 4 = 1s
tx.send(CountdownCommand::Start(250)).await.unwrap();
// assert deadline is correctly set
assert!(deadline(&tx).await <= Instant::now() + Duration::from_secs(1));
// reset countdown in 1.5s
tx.send(CountdownCommand::Reset(
Instant::now() + Duration::from_millis(1500),
))
.await
.unwrap();
// assert the table is closed after deadline is reached
assert!(table_engine.table_exists(ctx, table_id));
// spare 500ms for the task to close the table
tokio::time::sleep(Duration::from_millis(2000)).await;
assert!(!table_engine.table_exists(ctx, table_id));
}
}

634
src/catalog/src/system.rs Normal file
View File

@@ -0,0 +1,634 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::sync::Arc;
use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MITO_ENGINE,
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
};
use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::{debug, warn};
use common_time::util;
use datatypes::prelude::{ConcreteDataType, ScalarVector, VectorRef};
use datatypes::schema::{ColumnSchema, RawSchema};
use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::ScanRequest;
use table::engine::{EngineContext, TableEngineRef};
use table::metadata::TableId;
use table::requests::{CreateTableRequest, InsertRequest, OpenTableRequest, TableOptions};
use table::TableRef;
use crate::error::{
self, CreateSystemCatalogSnafu, DeregisterTableSnafu, EmptyValueSnafu, Error,
InsertCatalogRecordSnafu, InvalidEntryTypeSnafu, InvalidKeySnafu, OpenSystemCatalogSnafu,
Result, ValueDeserializeSnafu,
};
use crate::DeregisterTableRequest;
pub const ENTRY_TYPE_INDEX: usize = 0;
pub const KEY_INDEX: usize = 1;
pub const VALUE_INDEX: usize = 3;
pub struct SystemCatalogTable(TableRef);
impl SystemCatalogTable {
pub async fn new(engine: TableEngineRef) -> Result<Self> {
let request = OpenTableRequest {
catalog_name: SYSTEM_CATALOG_NAME.to_string(),
schema_name: INFORMATION_SCHEMA_NAME.to_string(),
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
table_id: SYSTEM_CATALOG_TABLE_ID,
region_numbers: vec![0],
};
let schema = build_system_catalog_schema();
let ctx = EngineContext::default();
if let Some(table) = engine
.open_table(&ctx, request)
.await
.context(OpenSystemCatalogSnafu)?
{
Ok(Self(table))
} else {
// system catalog table is not yet created, try to create
let request = CreateTableRequest {
id: SYSTEM_CATALOG_TABLE_ID,
catalog_name: SYSTEM_CATALOG_NAME.to_string(),
schema_name: INFORMATION_SCHEMA_NAME.to_string(),
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
desc: Some("System catalog table".to_string()),
schema,
region_numbers: vec![0],
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX],
create_if_not_exists: true,
table_options: TableOptions::default(),
engine: engine.name().to_string(),
};
let table = engine
.create_table(&ctx, request)
.await
.context(CreateSystemCatalogSnafu)?;
Ok(Self(table))
}
}
pub async fn register_table(
&self,
catalog: String,
schema: String,
table_name: String,
table_id: TableId,
engine: String,
) -> Result<usize> {
let insert_request =
build_table_insert_request(catalog, schema, table_name, table_id, engine);
self.0
.insert(insert_request)
.await
.context(InsertCatalogRecordSnafu)
}
pub(crate) async fn deregister_table(
&self,
request: &DeregisterTableRequest,
table_id: TableId,
) -> Result<()> {
let deletion_request = build_table_deletion_request(request, table_id);
self.0
.insert(deletion_request)
.await
.map(|x| {
if x != 1 {
let table = common_catalog::format_full_table_name(
&request.catalog,
&request.schema,
&request.table_name
);
warn!("Failed to delete table record from information_schema, unexpected returned result: {x}, table: {table}");
}
})
.with_context(|_| DeregisterTableSnafu {
request: request.clone(),
})
}
pub async fn register_schema(&self, catalog: String, schema: String) -> Result<usize> {
let insert_request = build_schema_insert_request(catalog, schema);
self.0
.insert(insert_request)
.await
.context(InsertCatalogRecordSnafu)
}
/// Create a stream of all entries inside system catalog table
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
let full_projection = None;
let scan_req = ScanRequest {
sequence: None,
projection: full_projection,
filters: vec![],
output_ordering: None,
limit: None,
};
let stream = self
.0
.scan_to_stream(scan_req)
.await
.context(error::SystemCatalogTableScanSnafu)?;
Ok(stream)
}
pub fn as_table_ref(&self) -> TableRef {
self.0.clone()
}
}
/// Build system catalog table schema.
/// A system catalog table consists of 6 columns, namely
/// - entry_type: type of entry in current row, can be any variant of [EntryType].
/// - key: a binary encoded key of entry, differs according to different entry type.
/// - timestamp: currently not used.
/// - value: JSON-encoded value of entry's metadata.
/// - gmt_created: create time of this metadata.
/// - gmt_modified: last updated time of this metadata.
fn build_system_catalog_schema() -> RawSchema {
let cols = vec![
ColumnSchema::new(
"entry_type".to_string(),
ConcreteDataType::uint8_datatype(),
false,
),
ColumnSchema::new(
"key".to_string(),
ConcreteDataType::binary_datatype(),
false,
),
ColumnSchema::new(
"timestamp".to_string(),
ConcreteDataType::timestamp_millisecond_datatype(),
false,
)
.with_time_index(true),
ColumnSchema::new(
"value".to_string(),
ConcreteDataType::binary_datatype(),
false,
),
ColumnSchema::new(
"gmt_created".to_string(),
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
ColumnSchema::new(
"gmt_modified".to_string(),
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
];
RawSchema::new(cols)
}
/// Formats key string for table entry in system catalog
#[inline]
pub fn format_table_entry_key(catalog: &str, schema: &str, table_id: TableId) -> String {
format!("{catalog}.{schema}.{table_id}")
}
pub fn build_table_insert_request(
catalog: String,
schema: String,
table_name: String,
table_id: TableId,
engine: String,
) -> InsertRequest {
let entry_key = format_table_entry_key(&catalog, &schema, table_id);
build_insert_request(
EntryType::Table,
entry_key.as_bytes(),
serde_json::to_string(&TableEntryValue {
table_name,
engine,
is_deleted: false,
})
.unwrap()
.as_bytes(),
)
}
pub(crate) fn build_table_deletion_request(
request: &DeregisterTableRequest,
table_id: TableId,
) -> InsertRequest {
let entry_key = format_table_entry_key(&request.catalog, &request.schema, table_id);
build_insert_request(
EntryType::Table,
entry_key.as_bytes(),
serde_json::to_string(&TableEntryValue {
table_name: "".to_string(),
engine: "".to_string(),
is_deleted: true,
})
.unwrap()
.as_bytes(),
)
}
fn build_primary_key_columns(entry_type: EntryType, key: &[u8]) -> HashMap<String, VectorRef> {
HashMap::from([
(
"entry_type".to_string(),
Arc::new(UInt8Vector::from_slice([entry_type as u8])) as VectorRef,
),
(
"key".to_string(),
Arc::new(BinaryVector::from_slice(&[key])) as VectorRef,
),
(
"timestamp".to_string(),
// Timestamp in key part is intentionally left to 0
Arc::new(TimestampMillisecondVector::from_slice([0])) as VectorRef,
),
])
}
pub fn build_schema_insert_request(catalog_name: String, schema_name: String) -> InsertRequest {
let full_schema_name = format!("{catalog_name}.{schema_name}");
build_insert_request(
EntryType::Schema,
full_schema_name.as_bytes(),
serde_json::to_string(&SchemaEntryValue {})
.unwrap()
.as_bytes(),
)
}
pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) -> InsertRequest {
let primary_key_columns = build_primary_key_columns(entry_type, key);
let mut columns_values = HashMap::with_capacity(6);
columns_values.extend(primary_key_columns);
let _ = columns_values.insert(
"value".to_string(),
Arc::new(BinaryVector::from_slice(&[value])) as _,
);
let now = util::current_time_millis();
let _ = columns_values.insert(
"gmt_created".to_string(),
Arc::new(TimestampMillisecondVector::from_slice([now])) as _,
);
let _ = columns_values.insert(
"gmt_modified".to_string(),
Arc::new(TimestampMillisecondVector::from_slice([now])) as _,
);
InsertRequest {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
columns_values,
region_number: 0, // system catalog table has only one region
}
}
pub fn decode_system_catalog(
entry_type: Option<u8>,
key: Option<&[u8]>,
value: Option<&[u8]>,
) -> Result<Entry> {
debug!(
"Decode system catalog entry: {:?}, {:?}, {:?}",
entry_type, key, value
);
let entry_type = entry_type.context(InvalidKeySnafu { key: None })?;
let key = String::from_utf8_lossy(key.context(InvalidKeySnafu { key: None })?);
match EntryType::try_from(entry_type)? {
EntryType::Catalog => {
// As for catalog entry, the key is a string with format: `<catalog_name>`
// and the value is current not used.
let catalog_name = key.to_string();
Ok(Entry::Catalog(CatalogEntry { catalog_name }))
}
EntryType::Schema => {
// As for schema entry, the key is a string with format: `<catalog_name>.<schema_name>`
// and the value is current not used.
let schema_parts = key.split('.').collect::<Vec<_>>();
ensure!(
schema_parts.len() == 2,
InvalidKeySnafu {
key: Some(key.to_string())
}
);
Ok(Entry::Schema(SchemaEntry {
catalog_name: schema_parts[0].to_string(),
schema_name: schema_parts[1].to_string(),
}))
}
EntryType::Table => {
// As for table entry, the key is a string with format: `<catalog_name>.<schema_name>.<table_id>`
// and the value is a JSON string with format: `{"table_name": <table_name>}`
let table_parts = key.split('.').collect::<Vec<_>>();
ensure!(
table_parts.len() >= 3,
InvalidKeySnafu {
key: Some(key.to_string())
}
);
let value = value.context(EmptyValueSnafu)?;
debug!("Table meta value: {}", String::from_utf8_lossy(value));
let table_meta: TableEntryValue =
serde_json::from_slice(value).context(ValueDeserializeSnafu)?;
let table_id = table_parts[2].parse::<TableId>().unwrap();
Ok(Entry::Table(TableEntry {
catalog_name: table_parts[0].to_string(),
schema_name: table_parts[1].to_string(),
table_name: table_meta.table_name,
table_id,
engine: table_meta.engine,
is_deleted: table_meta.is_deleted,
}))
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum EntryType {
Catalog = 1,
Schema = 2,
Table = 3,
}
impl TryFrom<u8> for EntryType {
type Error = Error;
fn try_from(value: u8) -> std::result::Result<Self, Self::Error> {
match value {
b if b == Self::Catalog as u8 => Ok(Self::Catalog),
b if b == Self::Schema as u8 => Ok(Self::Schema),
b if b == Self::Table as u8 => Ok(Self::Table),
b => InvalidEntryTypeSnafu {
entry_type: Some(b),
}
.fail(),
}
}
}
#[derive(Debug, PartialEq, Eq, Ord, PartialOrd)]
pub enum Entry {
Catalog(CatalogEntry),
Schema(SchemaEntry),
Table(TableEntry),
}
#[derive(Debug, PartialEq, Eq, Ord, PartialOrd)]
pub struct CatalogEntry {
pub catalog_name: String,
}
#[derive(Debug, PartialEq, Eq, Ord, PartialOrd)]
pub struct SchemaEntry {
pub catalog_name: String,
pub schema_name: String,
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct SchemaEntryValue;
#[derive(Debug, PartialEq, Eq, Ord, PartialOrd)]
pub struct TableEntry {
pub catalog_name: String,
pub schema_name: String,
pub table_name: String,
pub table_id: TableId,
pub engine: String,
pub is_deleted: bool,
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct TableEntryValue {
pub table_name: String,
#[serde(default = "mito_engine")]
pub engine: String,
#[serde(default = "not_deleted")]
pub is_deleted: bool,
}
fn mito_engine() -> String {
MITO_ENGINE.to_string()
}
fn not_deleted() -> bool {
false
}
#[cfg(test)]
mod tests {
use common_recordbatch::RecordBatches;
use common_test_util::temp_dir::{create_temp_dir, TempDir};
use datatypes::value::Value;
use log_store::NoopLogStore;
use mito::config::EngineConfig;
use mito::engine::{MitoEngine, MITO_ENGINE};
use object_store::ObjectStore;
use storage::compaction::noop::NoopCompactionScheduler;
use storage::config::EngineConfig as StorageEngineConfig;
use storage::EngineImpl;
use table::metadata::TableType;
use table::metadata::TableType::Base;
use super::*;
#[test]
pub fn test_decode_catalog_entry() {
let entry = decode_system_catalog(
Some(EntryType::Catalog as u8),
Some("some_catalog".as_bytes()),
None,
)
.unwrap();
if let Entry::Catalog(e) = entry {
assert_eq!("some_catalog", e.catalog_name);
} else {
panic!("Unexpected type: {entry:?}");
}
}
#[test]
pub fn test_decode_schema_entry() {
let entry = decode_system_catalog(
Some(EntryType::Schema as u8),
Some("some_catalog.some_schema".as_bytes()),
None,
)
.unwrap();
if let Entry::Schema(e) = entry {
assert_eq!("some_catalog", e.catalog_name);
assert_eq!("some_schema", e.schema_name);
} else {
panic!("Unexpected type: {entry:?}");
}
}
#[test]
pub fn test_decode_table() {
let entry = decode_system_catalog(
Some(EntryType::Table as u8),
Some("some_catalog.some_schema.42".as_bytes()),
Some("{\"table_name\":\"some_table\"}".as_bytes()),
)
.unwrap();
if let Entry::Table(e) = entry {
assert_eq!("some_catalog", e.catalog_name);
assert_eq!("some_schema", e.schema_name);
assert_eq!("some_table", e.table_name);
assert_eq!(42, e.table_id);
} else {
panic!("Unexpected type: {entry:?}");
}
}
#[test]
pub fn test_decode_mismatch() {
assert!(decode_system_catalog(
Some(EntryType::Table as u8),
Some("some_catalog.some_schema.42".as_bytes()),
None,
)
.is_err());
}
#[test]
pub fn test_entry_type() {
assert_eq!(EntryType::Catalog, EntryType::try_from(1).unwrap());
assert_eq!(EntryType::Schema, EntryType::try_from(2).unwrap());
assert_eq!(EntryType::Table, EntryType::try_from(3).unwrap());
assert!(EntryType::try_from(4).is_err());
}
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
let dir = create_temp_dir("system-table-test");
let store_dir = dir.path().to_string_lossy();
let mut builder = object_store::services::Fs::default();
let _ = builder.root(&store_dir);
let object_store = ObjectStore::new(builder).unwrap().finish();
let noop_compaction_scheduler = Arc::new(NoopCompactionScheduler::default());
let table_engine = Arc::new(MitoEngine::new(
EngineConfig::default(),
EngineImpl::new(
StorageEngineConfig::default(),
Arc::new(NoopLogStore),
object_store.clone(),
noop_compaction_scheduler,
)
.unwrap(),
object_store,
));
(dir, table_engine)
}
#[tokio::test]
async fn test_system_table_type() {
let (_dir, table_engine) = prepare_table_engine().await;
let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
assert_eq!(Base, system_table.as_table_ref().table_type());
}
#[tokio::test]
async fn test_system_table_info() {
let (_dir, table_engine) = prepare_table_engine().await;
let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
let info = system_table.as_table_ref().table_info();
assert_eq!(TableType::Base, info.table_type);
assert_eq!(SYSTEM_CATALOG_TABLE_NAME, info.name);
assert_eq!(SYSTEM_CATALOG_TABLE_ID, info.ident.table_id);
assert_eq!(SYSTEM_CATALOG_NAME, info.catalog_name);
assert_eq!(INFORMATION_SCHEMA_NAME, info.schema_name);
}
#[tokio::test]
async fn test_system_catalog_table_records() {
let (_, table_engine) = prepare_table_engine().await;
let catalog_table = SystemCatalogTable::new(table_engine).await.unwrap();
let result = catalog_table
.register_table(
DEFAULT_CATALOG_NAME.to_string(),
DEFAULT_SCHEMA_NAME.to_string(),
"my_table".to_string(),
1,
MITO_ENGINE.to_string(),
)
.await
.unwrap();
assert_eq!(result, 1);
let records = catalog_table.records().await.unwrap();
let mut batches = RecordBatches::try_collect(records).await.unwrap().take();
assert_eq!(batches.len(), 1);
let batch = batches.remove(0);
assert_eq!(batch.num_rows(), 1);
let row = batch.rows().next().unwrap();
let Value::UInt8(entry_type) = row[0] else {
unreachable!()
};
let Value::Binary(key) = row[1].clone() else {
unreachable!()
};
let Value::Binary(value) = row[3].clone() else {
unreachable!()
};
let entry = decode_system_catalog(Some(entry_type), Some(&*key), Some(&*value)).unwrap();
let expected = Entry::Table(TableEntry {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "my_table".to_string(),
table_id: 1,
engine: MITO_ENGINE.to_string(),
is_deleted: false,
});
assert_eq!(entry, expected);
catalog_table
.deregister_table(
&DeregisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "my_table".to_string(),
},
1,
)
.await
.unwrap();
let records = catalog_table.records().await.unwrap();
let batches = RecordBatches::try_collect(records).await.unwrap().take();
assert_eq!(batches.len(), 1);
}
}

View File

@@ -123,7 +123,7 @@ mod tests {
use session::context::QueryContext; use session::context::QueryContext;
use super::*; use super::*;
use crate::memory::MemoryCatalogManager; use crate::local::MemoryCatalogManager;
#[test] #[test]
fn test_validate_table_ref() { fn test_validate_table_ref() {

77
src/catalog/src/tables.rs Normal file
View File

@@ -0,0 +1,77 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The `tables` table in system catalog keeps a record of all tables created by user.
use std::sync::Arc;
use table::metadata::TableId;
use crate::system::SystemCatalogTable;
use crate::DeregisterTableRequest;
pub struct InformationSchema {
pub system: Arc<SystemCatalogTable>,
}
pub struct SystemCatalog {
pub information_schema: Arc<InformationSchema>,
}
impl SystemCatalog {
pub(crate) fn new(system: SystemCatalogTable) -> Self {
let schema = InformationSchema {
system: Arc::new(system),
};
Self {
information_schema: Arc::new(schema),
}
}
pub async fn register_table(
&self,
catalog: String,
schema: String,
table_name: String,
table_id: TableId,
engine: String,
) -> crate::error::Result<usize> {
self.information_schema
.system
.register_table(catalog, schema, table_name, table_id, engine)
.await
}
pub(crate) async fn deregister_table(
&self,
request: &DeregisterTableRequest,
table_id: TableId,
) -> crate::error::Result<()> {
self.information_schema
.system
.deregister_table(request, table_id)
.await
}
pub async fn register_schema(
&self,
catalog: String,
schema: String,
) -> crate::error::Result<usize> {
self.information_schema
.system
.register_schema(catalog, schema)
.await
}
}

View File

@@ -0,0 +1,175 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[cfg(test)]
mod tests {
use std::sync::Arc;
use catalog::local::LocalCatalogManager;
use catalog::{CatalogManager, RegisterTableRequest, RenameTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_telemetry::{error, info};
use common_test_util::temp_dir::TempDir;
use mito::config::EngineConfig;
use table::engine::manager::MemoryTableEngineManager;
use table::table::numbers::NumbersTable;
use tokio::sync::Mutex;
async fn create_local_catalog_manager(
) -> Result<(TempDir, LocalCatalogManager), catalog::error::Error> {
let (dir, object_store) =
mito::table::test_util::new_test_object_store("setup_mock_engine_and_table").await;
let mock_engine = Arc::new(mito::table::test_util::MockMitoEngine::new(
EngineConfig::default(),
mito::table::test_util::MockEngine::default(),
object_store,
));
let engine_manager = Arc::new(MemoryTableEngineManager::new(mock_engine.clone()));
let catalog_manager = LocalCatalogManager::try_new(engine_manager).await.unwrap();
catalog_manager.start().await?;
Ok((dir, catalog_manager))
}
#[tokio::test]
async fn test_rename_table() {
common_telemetry::init_default_ut_logging();
let (_dir, catalog_manager) = create_local_catalog_manager().await.unwrap();
// register table
let table_name = "test_table";
let table_id = 42;
let request = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
table_id,
table: NumbersTable::table(table_id),
};
assert!(catalog_manager.register_table(request).await.unwrap());
// rename table
let new_table_name = "table_t";
let rename_table_req = RenameTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(),
new_table_name: new_table_name.to_string(),
table_id,
};
assert!(catalog_manager
.rename_table(rename_table_req)
.await
.unwrap());
let registered_table = catalog_manager
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
.await
.unwrap()
.unwrap();
assert_eq!(registered_table.table_info().ident.table_id, table_id);
}
#[tokio::test]
async fn test_duplicate_register() {
let (_dir, catalog_manager) = create_local_catalog_manager().await.unwrap();
let request = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "test_table".to_string(),
table_id: 42,
table: NumbersTable::table(42),
};
assert!(catalog_manager
.register_table(request.clone())
.await
.unwrap());
// register table with same table id will succeed with 0 as return val.
assert!(!catalog_manager.register_table(request).await.unwrap());
let err = catalog_manager
.register_table(RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "test_table".to_string(),
table_id: 43,
table: NumbersTable::table(43),
})
.await
.unwrap_err();
assert!(
err.to_string()
.contains("Table `greptime.public.test_table` already exists"),
"Actual error message: {err}",
);
}
#[test]
fn test_concurrent_register() {
common_telemetry::init_default_ut_logging();
let rt = Arc::new(tokio::runtime::Builder::new_multi_thread().build().unwrap());
let (_dir, catalog_manager) =
rt.block_on(async { create_local_catalog_manager().await.unwrap() });
let catalog_manager = Arc::new(catalog_manager);
let succeed = Arc::new(Mutex::new(None));
let mut handles = Vec::with_capacity(8);
for i in 0..8 {
let catalog = catalog_manager.clone();
let succeed = succeed.clone();
let handle = rt.spawn(async move {
let table_id = 42 + i;
let table = NumbersTable::table(table_id);
let table_info = table.table_info();
let req = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "test_table".to_string(),
table_id,
table,
};
match catalog.register_table(req).await {
Ok(res) => {
if res {
let mut succeed = succeed.lock().await;
info!("Successfully registered table: {}", table_id);
*succeed = Some(table_info);
}
}
Err(_) => {
error!("Failed to register table {}", table_id);
}
}
});
handles.push(handle);
}
rt.block_on(async move {
for handle in handles {
handle.await.unwrap();
}
let guard = succeed.lock().await;
let table_info = guard.as_ref().unwrap();
let table_registered = catalog_manager
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table")
.await
.unwrap()
.unwrap();
assert_eq!(
table_registered.table_info().ident.table_id,
table_info.ident.table_id
);
});
}
}

View File

@@ -0,0 +1,453 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(assert_matches)]
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use std::collections::HashSet;
use std::sync::Arc;
use std::time::Duration;
use catalog::remote::mock::MockTableEngine;
use catalog::remote::region_alive_keeper::RegionAliveKeepers;
use catalog::remote::{CachedMetaKvBackend, RemoteCatalogManager};
use catalog::{CatalogManager, RegisterSchemaRequest, RegisterTableRequest};
use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MITO_ENGINE,
};
use common_meta::helper::CatalogValue;
use common_meta::ident::TableIdent;
use common_meta::key::catalog_name::CatalogNameKey;
use common_meta::key::TableMetadataManager;
use common_meta::kv_backend::memory::MemoryKvBackend;
use common_meta::kv_backend::KvBackend;
use common_meta::rpc::store::{CompareAndPutRequest, PutRequest};
use datatypes::schema::RawSchema;
use table::engine::manager::{MemoryTableEngineManager, TableEngineManagerRef};
use table::engine::{EngineContext, TableEngineRef};
use table::requests::CreateTableRequest;
use table::test_util::EmptyTable;
use tokio::time::Instant;
struct TestingComponents {
catalog_manager: Arc<RemoteCatalogManager>,
table_engine_manager: TableEngineManagerRef,
region_alive_keepers: Arc<RegionAliveKeepers>,
}
impl TestingComponents {
fn table_engine(&self) -> TableEngineRef {
self.table_engine_manager.engine(MITO_ENGINE).unwrap()
}
}
#[tokio::test]
async fn test_cached_backend() {
let backend = CachedMetaKvBackend::wrap(Arc::new(MemoryKvBackend::default()));
let default_catalog_key = CatalogNameKey::new(DEFAULT_CATALOG_NAME).to_string();
let req = PutRequest::new()
.with_key(default_catalog_key.as_bytes())
.with_value(CatalogValue.as_bytes().unwrap());
backend.put(req).await.unwrap();
let ret = backend.get(b"__catalog_name/greptime").await.unwrap();
let _ = ret.unwrap();
let req = CompareAndPutRequest::new()
.with_key(b"__catalog_name/greptime".to_vec())
.with_expect(CatalogValue.as_bytes().unwrap())
.with_value(b"123".to_vec());
let _ = backend.compare_and_put(req).await.unwrap();
let ret = backend.get(b"__catalog_name/greptime").await.unwrap();
assert_eq!(b"123", ret.as_ref().unwrap().value.as_slice());
let req = PutRequest::new()
.with_key(b"__catalog_name/greptime".to_vec())
.with_value(b"1234".to_vec());
let _ = backend.put(req).await;
let ret = backend.get(b"__catalog_name/greptime").await.unwrap();
assert_eq!(b"1234", ret.unwrap().value.as_slice());
backend
.delete(b"__catalog_name/greptime", false)
.await
.unwrap();
let ret = backend.get(b"__catalog_name/greptime").await.unwrap();
assert!(ret.is_none());
}
async fn prepare_components(node_id: u64) -> TestingComponents {
let backend = Arc::new(MemoryKvBackend::default());
let req = PutRequest::new()
.with_key(b"__catalog_name/greptime".to_vec())
.with_value(b"".to_vec());
backend.put(req).await.unwrap();
let req = PutRequest::new()
.with_key(b"__schema_name/greptime-public".to_vec())
.with_value(b"".to_vec());
backend.put(req).await.unwrap();
let cached_backend = Arc::new(CachedMetaKvBackend::wrap(backend));
let table_engine = Arc::new(MockTableEngine::default());
let engine_manager = Arc::new(MemoryTableEngineManager::alias(
MITO_ENGINE.to_string(),
table_engine,
));
let region_alive_keepers = Arc::new(RegionAliveKeepers::new(engine_manager.clone(), 5000));
let catalog_manager = RemoteCatalogManager::new(
engine_manager.clone(),
node_id,
region_alive_keepers.clone(),
Arc::new(TableMetadataManager::new(cached_backend)),
);
catalog_manager.start().await.unwrap();
TestingComponents {
catalog_manager: Arc::new(catalog_manager),
table_engine_manager: engine_manager,
region_alive_keepers,
}
}
#[tokio::test]
async fn test_remote_catalog_default() {
common_telemetry::init_default_ut_logging();
let node_id = 42;
let TestingComponents {
catalog_manager, ..
} = prepare_components(node_id).await;
assert_eq!(
vec![DEFAULT_CATALOG_NAME.to_string()],
catalog_manager.catalog_names().await.unwrap()
);
let mut schema_names = catalog_manager
.schema_names(DEFAULT_CATALOG_NAME)
.await
.unwrap();
schema_names.sort_unstable();
assert_eq!(
vec![
INFORMATION_SCHEMA_NAME.to_string(),
DEFAULT_SCHEMA_NAME.to_string()
],
schema_names
);
}
#[tokio::test]
async fn test_remote_catalog_register_nonexistent() {
common_telemetry::init_default_ut_logging();
let node_id = 42;
let components = prepare_components(node_id).await;
// register a new table with an nonexistent catalog
let catalog_name = "nonexistent_catalog".to_string();
let schema_name = "nonexistent_schema".to_string();
let table_name = "fail_table".to_string();
// this schema has no effect
let table_schema = RawSchema::new(vec![]);
let table = components
.table_engine()
.create_table(
&EngineContext {},
CreateTableRequest {
id: 1,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
desc: None,
schema: table_schema,
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
engine: MITO_ENGINE.to_string(),
},
)
.await
.unwrap();
let reg_req = RegisterTableRequest {
catalog: catalog_name,
schema: schema_name,
table_name,
table_id: 1,
table,
};
let res = components.catalog_manager.register_table(reg_req).await;
// because nonexistent_catalog does not exist yet.
assert_matches!(
res.err().unwrap(),
catalog::error::Error::CatalogNotFound { .. }
);
}
#[tokio::test]
async fn test_register_table() {
let node_id = 42;
let components = prepare_components(node_id).await;
let mut schema_names = components
.catalog_manager
.schema_names(DEFAULT_CATALOG_NAME)
.await
.unwrap();
schema_names.sort_unstable();
assert_eq!(
vec![
INFORMATION_SCHEMA_NAME.to_string(),
DEFAULT_SCHEMA_NAME.to_string(),
],
schema_names
);
// register a new table with an nonexistent catalog
let catalog_name = DEFAULT_CATALOG_NAME.to_string();
let schema_name = DEFAULT_SCHEMA_NAME.to_string();
let table_name = "test_table".to_string();
let table_id = 1;
// this schema has no effect
let table_schema = RawSchema::new(vec![]);
let table = components
.table_engine()
.create_table(
&EngineContext {},
CreateTableRequest {
id: table_id,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
desc: None,
schema: table_schema,
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
engine: MITO_ENGINE.to_string(),
},
)
.await
.unwrap();
let reg_req = RegisterTableRequest {
catalog: catalog_name,
schema: schema_name,
table_name: table_name.clone(),
table_id,
table,
};
assert!(components
.catalog_manager
.register_table(reg_req)
.await
.unwrap());
assert_eq!(
vec![table_name],
components
.catalog_manager
.table_names(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
.await
.unwrap()
);
}
#[tokio::test]
async fn test_register_catalog_schema_table() {
let node_id = 42;
let components = prepare_components(node_id).await;
let catalog_name = "test_catalog".to_string();
let schema_name = "nonexistent_schema".to_string();
// register catalog to catalog manager
assert!(components
.catalog_manager
.clone()
.register_catalog(catalog_name.clone())
.await
.is_ok());
assert_eq!(
HashSet::<String>::from_iter(vec![
DEFAULT_CATALOG_NAME.to_string(),
catalog_name.clone()
]),
HashSet::from_iter(components.catalog_manager.catalog_names().await.unwrap())
);
let table_to_register = components
.table_engine()
.create_table(
&EngineContext {},
CreateTableRequest {
id: 2,
catalog_name: catalog_name.clone(),
schema_name: schema_name.clone(),
table_name: "".to_string(),
desc: None,
schema: RawSchema::new(vec![]),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
engine: MITO_ENGINE.to_string(),
},
)
.await
.unwrap();
let reg_req = RegisterTableRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table_name: " fail_table".to_string(),
table_id: 2,
table: table_to_register,
};
// this register will fail since schema does not exist yet
assert_matches!(
components
.catalog_manager
.register_table(reg_req.clone())
.await
.unwrap_err(),
catalog::error::Error::SchemaNotFound { .. }
);
let register_schema_request = RegisterSchemaRequest {
catalog: catalog_name.to_string(),
schema: schema_name.to_string(),
};
assert!(components
.catalog_manager
.register_schema(register_schema_request)
.await
.expect("Register schema should not fail"));
assert!(components
.catalog_manager
.register_table(reg_req)
.await
.unwrap());
assert_eq!(
HashSet::from([schema_name.clone(), INFORMATION_SCHEMA_NAME.to_string()]),
components
.catalog_manager
.schema_names(&catalog_name)
.await
.unwrap()
.into_iter()
.collect()
)
}
#[tokio::test]
async fn test_register_table_before_and_after_region_alive_keeper_started() {
let components = prepare_components(42).await;
let catalog_manager = &components.catalog_manager;
let region_alive_keepers = &components.region_alive_keepers;
let table_before = TableIdent {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table: "table_before".to_string(),
table_id: 1,
engine: MITO_ENGINE.to_string(),
};
let request = RegisterTableRequest {
catalog: table_before.catalog.clone(),
schema: table_before.schema.clone(),
table_name: table_before.table.clone(),
table_id: table_before.table_id,
table: Arc::new(EmptyTable::new(CreateTableRequest {
id: table_before.table_id,
catalog_name: table_before.catalog.clone(),
schema_name: table_before.schema.clone(),
table_name: table_before.table.clone(),
desc: None,
schema: RawSchema::new(vec![]),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
engine: MITO_ENGINE.to_string(),
})),
};
assert!(catalog_manager.register_table(request).await.unwrap());
let keeper = region_alive_keepers
.find_keeper(table_before.table_id)
.await
.unwrap();
let deadline = keeper.deadline(0).await.unwrap();
let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 29);
// assert region alive countdown is not started
assert!(deadline > far_future);
region_alive_keepers.start().await;
let table_after = TableIdent {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table: "table_after".to_string(),
table_id: 2,
engine: MITO_ENGINE.to_string(),
};
let request = RegisterTableRequest {
catalog: table_after.catalog.clone(),
schema: table_after.schema.clone(),
table_name: table_after.table.clone(),
table_id: table_after.table_id,
table: Arc::new(EmptyTable::new(CreateTableRequest {
id: table_after.table_id,
catalog_name: table_after.catalog.clone(),
schema_name: table_after.schema.clone(),
table_name: table_after.table.clone(),
desc: None,
schema: RawSchema::new(vec![]),
region_numbers: vec![0],
primary_key_indices: vec![],
create_if_not_exists: false,
table_options: Default::default(),
engine: MITO_ENGINE.to_string(),
})),
};
assert!(catalog_manager.register_table(request).await.unwrap());
let keeper = region_alive_keepers
.find_keeper(table_after.table_id)
.await
.unwrap();
let deadline = keeper.deadline(0).await.unwrap();
// assert countdown is started for the table registered after [RegionAliveKeepers] started
assert!(deadline <= Instant::now() + Duration::from_secs(20));
let keeper = region_alive_keepers
.find_keeper(table_before.table_id)
.await
.unwrap();
let deadline = keeper.deadline(0).await.unwrap();
// assert countdown is started for the table registered before [RegionAliveKeepers] started, too
assert!(deadline <= Instant::now() + Duration::from_secs(20));
}
}

View File

@@ -8,45 +8,41 @@ license.workspace = true
testing = [] testing = []
[dependencies] [dependencies]
api.workspace = true api = { workspace = true }
arrow-flight.workspace = true arrow-flight.workspace = true
async-stream.workspace = true async-stream.workspace = true
async-trait.workspace = true common-base = { workspace = true }
common-base.workspace = true common-catalog = { workspace = true }
common-catalog.workspace = true common-error = { workspace = true }
common-error.workspace = true common-grpc = { workspace = true }
common-grpc.workspace = true common-meta = { workspace = true }
common-macro.workspace = true common-query = { workspace = true }
common-meta.workspace = true common-recordbatch = { workspace = true }
common-query.workspace = true common-telemetry = { workspace = true }
common-recordbatch.workspace = true common-time = { workspace = true }
common-telemetry.workspace = true
common-time.workspace = true
datafusion.workspace = true datafusion.workspace = true
datatypes.workspace = true datatypes = { workspace = true }
derive_builder.workspace = true derive_builder.workspace = true
enum_dispatch = "0.3" enum_dispatch = "0.3"
futures-util.workspace = true futures-util.workspace = true
lazy_static.workspace = true moka = { version = "0.9", features = ["future"] }
moka = { workspace = true, features = ["future"] }
parking_lot = "0.12" parking_lot = "0.12"
prometheus.workspace = true
prost.workspace = true prost.workspace = true
rand.workspace = true rand.workspace = true
session.workspace = true
snafu.workspace = true snafu.workspace = true
tokio-stream = { version = "0.1", features = ["net"] } tokio-stream = { version = "0.1", features = ["net"] }
tokio.workspace = true tokio.workspace = true
tonic.workspace = true tonic.workspace = true
[dev-dependencies] [dev-dependencies]
common-grpc-expr.workspace = true common-grpc-expr = { workspace = true }
datanode.workspace = true datanode = { workspace = true }
derive-new = "0.5" derive-new = "0.5"
substrait.workspace = true prost.workspace = true
substrait = { workspace = true }
tracing = "0.1" tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] }
[dev-dependencies.substrait_proto] [dev-dependencies.substrait_proto]
package = "substrait" package = "substrait"
version = "0.17" version = "0.7"

View File

@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, SemanticType, TableId}; use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, TableId};
use client::{Client, Database}; use client::{Client, Database};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE}; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
use prost::Message; use prost::Message;
@@ -41,30 +41,21 @@ async fn run() {
column_defs: vec![ column_defs: vec![
ColumnDef { ColumnDef {
name: "timestamp".to_string(), name: "timestamp".to_string(),
data_type: ColumnDataType::TimestampMillisecond as i32, datatype: ColumnDataType::TimestampMillisecond as i32,
is_nullable: false, is_nullable: false,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Timestamp as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "key".to_string(), name: "key".to_string(),
data_type: ColumnDataType::Uint64 as i32, datatype: ColumnDataType::Uint64 as i32,
is_nullable: false, is_nullable: false,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Tag as i32,
comment: String::new(),
..Default::default()
}, },
ColumnDef { ColumnDef {
name: "value".to_string(), name: "value".to_string(),
data_type: ColumnDataType::Uint64 as i32, datatype: ColumnDataType::Uint64 as i32,
is_nullable: false, is_nullable: false,
default_constraint: vec![], default_constraint: vec![],
semantic_type: SemanticType::Field as i32,
comment: String::new(),
..Default::default()
}, },
], ],
time_index: "timestamp".to_string(), time_index: "timestamp".to_string(),
@@ -72,6 +63,7 @@ async fn run() {
create_if_not_exists: false, create_if_not_exists: false,
table_options: Default::default(), table_options: Default::default(),
table_id: Some(TableId { id: 1024 }), table_id: Some(TableId { id: 1024 }),
region_numbers: vec![0],
engine: MITO_ENGINE.to_string(), engine: MITO_ENGINE.to_string(),
}; };
@@ -81,7 +73,7 @@ async fn run() {
let logical = mock_logical_plan(); let logical = mock_logical_plan();
event!(Level::INFO, "plan size: {:#?}", logical.len()); event!(Level::INFO, "plan size: {:#?}", logical.len());
let result = db.logical_plan(logical).await.unwrap(); let result = db.logical_plan(logical, None).await.unwrap();
event!(Level::INFO, "result: {:#?}", result); event!(Level::INFO, "result: {:#?}", result);
} }

View File

@@ -42,14 +42,14 @@ async fn run() {
.insert(vec![to_insert_request(weather_records_1())]) .insert(vec![to_insert_request(weather_records_1())])
.await .await
{ {
error!("Error: {e:?}"); error!("Error: {e}");
} }
if let Err(e) = stream_inserter if let Err(e) = stream_inserter
.insert(vec![to_insert_request(weather_records_2())]) .insert(vec![to_insert_request(weather_records_2())])
.await .await
{ {
error!("Error: {e:?}"); error!("Error: {e}");
} }
let result = stream_inserter.finish().await; let result = stream_inserter.finish().await;
@@ -59,7 +59,7 @@ async fn run() {
info!("Rows written: {rows}"); info!("Rows written: {rows}");
} }
Err(e) => { Err(e) => {
error!("Error: {e:?}"); error!("Error: {e}");
} }
}; };
} }
@@ -131,7 +131,7 @@ fn to_insert_request(records: Vec<WeatherRecord>) -> InsertRequest {
Column { Column {
column_name: "ts".to_owned(), column_name: "ts".to_owned(),
values: Some(column::Values { values: Some(column::Values {
timestamp_millisecond_values: timestamp_millis, ts_millisecond_values: timestamp_millis,
..Default::default() ..Default::default()
}), }),
semantic_type: SemanticType::Timestamp as i32, semantic_type: SemanticType::Timestamp as i32,
@@ -177,5 +177,6 @@ fn to_insert_request(records: Vec<WeatherRecord>) -> InsertRequest {
table_name: "weather_demo".to_owned(), table_name: "weather_demo".to_owned(),
columns, columns,
row_count: rows as u32, row_count: rows as u32,
..Default::default()
} }
} }

View File

@@ -138,46 +138,24 @@ impl Client {
Ok((addr, channel)) Ok((addr, channel))
} }
fn max_grpc_recv_message_size(&self) -> usize {
self.inner
.channel_manager
.config()
.max_recv_message_size
.as_bytes() as usize
}
fn max_grpc_send_message_size(&self) -> usize {
self.inner
.channel_manager
.config()
.max_send_message_size
.as_bytes() as usize
}
pub(crate) fn make_flight_client(&self) -> Result<FlightClient> { pub(crate) fn make_flight_client(&self) -> Result<FlightClient> {
let (addr, channel) = self.find_channel()?; let (addr, channel) = self.find_channel()?;
Ok(FlightClient { Ok(FlightClient {
addr, addr,
client: FlightServiceClient::new(channel) client: FlightServiceClient::new(channel),
.max_decoding_message_size(self.max_grpc_recv_message_size())
.max_encoding_message_size(self.max_grpc_send_message_size()),
}) })
} }
pub(crate) fn make_database_client(&self) -> Result<DatabaseClient> { pub(crate) fn make_database_client(&self) -> Result<DatabaseClient> {
let (_, channel) = self.find_channel()?; let (_, channel) = self.find_channel()?;
Ok(DatabaseClient { Ok(DatabaseClient {
inner: GreptimeDatabaseClient::new(channel) inner: GreptimeDatabaseClient::new(channel),
.max_decoding_message_size(self.max_grpc_recv_message_size())
.max_encoding_message_size(self.max_grpc_send_message_size()),
}) })
} }
pub(crate) fn raw_region_client(&self) -> Result<PbRegionClient<Channel>> { pub(crate) fn raw_region_client(&self) -> Result<PbRegionClient<Channel>> {
let (_, channel) = self.find_channel()?; let (_, channel) = self.find_channel()?;
Ok(PbRegionClient::new(channel) Ok(PbRegionClient::new(channel))
.max_decoding_message_size(self.max_grpc_recv_message_size())
.max_encoding_message_size(self.max_grpc_send_message_size()))
} }
pub fn make_prometheus_gateway_client(&self) -> Result<PrometheusGatewayClient<Channel>> { pub fn make_prometheus_gateway_client(&self) -> Result<PrometheusGatewayClient<Channel>> {

View File

@@ -13,15 +13,12 @@
// limitations under the License. // limitations under the License.
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Formatter};
use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager}; use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::datanode_manager::{Datanode, DatanodeManager};
use common_meta::peer::Peer; use common_meta::peer::Peer;
use moka::future::{Cache, CacheBuilder}; use moka::future::{Cache, CacheBuilder};
use crate::region::RegionRequester;
use crate::Client; use crate::Client;
pub struct DatanodeClients { pub struct DatanodeClients {
@@ -43,15 +40,6 @@ impl Debug for DatanodeClients {
} }
} }
#[async_trait::async_trait]
impl DatanodeManager for DatanodeClients {
async fn datanode(&self, datanode: &Peer) -> Arc<dyn Datanode> {
let client = self.get_client(datanode).await;
Arc::new(RegionRequester::new(client))
}
}
impl DatanodeClients { impl DatanodeClients {
pub fn new(config: ChannelConfig) -> Self { pub fn new(config: ChannelConfig) -> Self {
Self { Self {

View File

@@ -17,9 +17,9 @@ use api::v1::ddl_request::Expr as DdlExpr;
use api::v1::greptime_request::Request; use api::v1::greptime_request::Request;
use api::v1::query_request::Query; use api::v1::query_request::Query;
use api::v1::{ use api::v1::{
AlterExpr, AuthHeader, CreateTableExpr, DdlRequest, DeleteRequests, DropTableExpr, AlterExpr, AuthHeader, CompactTableExpr, CreateTableExpr, DdlRequest, DeleteRequests,
GreptimeRequest, InsertRequests, PromRangeQuery, QueryRequest, RequestHeader, DropTableExpr, FlushTableExpr, GreptimeRequest, InsertRequests, PromRangeQuery, QueryRequest,
RowInsertRequests, TruncateTableExpr, RequestHeader, RowInsertRequests, TruncateTableExpr,
}; };
use arrow_flight::Ticket; use arrow_flight::Ticket;
use async_stream::stream; use async_stream::stream;
@@ -28,8 +28,7 @@ use common_grpc::flight::{FlightDecoder, FlightMessage};
use common_query::Output; use common_query::Output;
use common_recordbatch::error::ExternalSnafu; use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::RecordBatchStreamAdaptor; use common_recordbatch::RecordBatchStreamAdaptor;
use common_telemetry::logging; use common_telemetry::{logging, timer};
use common_telemetry::tracing_context::W3cTrace;
use futures_util::StreamExt; use futures_util::StreamExt;
use prost::Message; use prost::Message;
use snafu::{ensure, ResultExt}; use snafu::{ensure, ResultExt};
@@ -112,12 +111,12 @@ impl Database {
} }
pub async fn insert(&self, requests: InsertRequests) -> Result<u32> { pub async fn insert(&self, requests: InsertRequests) -> Result<u32> {
let _timer = metrics::METRIC_GRPC_INSERT.start_timer(); let _timer = timer!(metrics::METRIC_GRPC_INSERT);
self.handle(Request::Inserts(requests)).await self.handle(Request::Inserts(requests)).await
} }
pub async fn row_insert(&self, requests: RowInsertRequests) -> Result<u32> { pub async fn row_insert(&self, requests: RowInsertRequests) -> Result<u32> {
let _timer = metrics::METRIC_GRPC_INSERT.start_timer(); let _timer = timer!(metrics::METRIC_GRPC_INSERT);
self.handle(Request::RowInserts(requests)).await self.handle(Request::RowInserts(requests)).await
} }
@@ -142,48 +141,55 @@ impl Database {
} }
pub async fn delete(&self, request: DeleteRequests) -> Result<u32> { pub async fn delete(&self, request: DeleteRequests) -> Result<u32> {
let _timer = metrics::METRIC_GRPC_DELETE.start_timer(); let _timer = timer!(metrics::METRIC_GRPC_DELETE);
self.handle(Request::Deletes(request)).await self.handle(Request::Deletes(request)).await
} }
async fn handle(&self, request: Request) -> Result<u32> { async fn handle(&self, request: Request) -> Result<u32> {
let mut client = self.client.make_database_client()?.inner; let mut client = self.client.make_database_client()?.inner;
let request = self.to_rpc_request(request); let request = self.to_rpc_request(request, None);
let response = client.handle(request).await?.into_inner(); let response = client.handle(request).await?.into_inner();
from_grpc_response(response) from_grpc_response(response)
} }
#[inline] #[inline]
fn to_rpc_request(&self, request: Request) -> GreptimeRequest { fn to_rpc_request(&self, request: Request, trace_id: Option<u64>) -> GreptimeRequest {
GreptimeRequest { GreptimeRequest {
header: Some(RequestHeader { header: Some(RequestHeader {
catalog: self.catalog.clone(), catalog: self.catalog.clone(),
schema: self.schema.clone(), schema: self.schema.clone(),
authorization: self.ctx.auth_header.clone(), authorization: self.ctx.auth_header.clone(),
dbname: self.dbname.clone(), dbname: self.dbname.clone(),
// TODO(Taylor-lagrange): add client grpc tracing trace_id,
tracing_context: W3cTrace::new(), span_id: None,
}), }),
request: Some(request), request: Some(request),
} }
} }
pub async fn sql<S>(&self, sql: S) -> Result<Output> pub async fn sql(&self, sql: &str) -> Result<Output> {
where let _timer = timer!(metrics::METRIC_GRPC_SQL);
S: AsRef<str>, self.do_get(
{ Request::Query(QueryRequest {
let _timer = metrics::METRIC_GRPC_SQL.start_timer(); query: Some(Query::Sql(sql.to_string())),
self.do_get(Request::Query(QueryRequest { }),
query: Some(Query::Sql(sql.as_ref().to_string())), None,
})) )
.await .await
} }
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> { pub async fn logical_plan(
let _timer = metrics::METRIC_GRPC_LOGICAL_PLAN.start_timer(); &self,
self.do_get(Request::Query(QueryRequest { logical_plan: Vec<u8>,
trace_id: Option<u64>,
) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_LOGICAL_PLAN);
self.do_get(
Request::Query(QueryRequest {
query: Some(Query::LogicalPlan(logical_plan)), query: Some(Query::LogicalPlan(logical_plan)),
})) }),
trace_id,
)
.await .await
} }
@@ -194,54 +200,91 @@ impl Database {
end: &str, end: &str,
step: &str, step: &str,
) -> Result<Output> { ) -> Result<Output> {
let _timer = metrics::METRIC_GRPC_PROMQL_RANGE_QUERY.start_timer(); let _timer = timer!(metrics::METRIC_GRPC_PROMQL_RANGE_QUERY);
self.do_get(Request::Query(QueryRequest { self.do_get(
Request::Query(QueryRequest {
query: Some(Query::PromRangeQuery(PromRangeQuery { query: Some(Query::PromRangeQuery(PromRangeQuery {
query: promql.to_string(), query: promql.to_string(),
start: start.to_string(), start: start.to_string(),
end: end.to_string(), end: end.to_string(),
step: step.to_string(), step: step.to_string(),
})), })),
})) }),
None,
)
.await .await
} }
pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> { pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
let _timer = metrics::METRIC_GRPC_CREATE_TABLE.start_timer(); let _timer = timer!(metrics::METRIC_GRPC_CREATE_TABLE);
self.do_get(Request::Ddl(DdlRequest { self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CreateTable(expr)), expr: Some(DdlExpr::CreateTable(expr)),
})) }),
None,
)
.await .await
} }
pub async fn alter(&self, expr: AlterExpr) -> Result<Output> { pub async fn alter(&self, expr: AlterExpr) -> Result<Output> {
let _timer = metrics::METRIC_GRPC_ALTER.start_timer(); let _timer = timer!(metrics::METRIC_GRPC_ALTER);
self.do_get(Request::Ddl(DdlRequest { self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::Alter(expr)), expr: Some(DdlExpr::Alter(expr)),
})) }),
None,
)
.await .await
} }
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<Output> { pub async fn drop_table(&self, expr: DropTableExpr) -> Result<Output> {
let _timer = metrics::METRIC_GRPC_DROP_TABLE.start_timer(); let _timer = timer!(metrics::METRIC_GRPC_DROP_TABLE);
self.do_get(Request::Ddl(DdlRequest { self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::DropTable(expr)), expr: Some(DdlExpr::DropTable(expr)),
})) }),
None,
)
.await
}
pub async fn flush_table(&self, expr: FlushTableExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_FLUSH_TABLE);
self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::FlushTable(expr)),
}),
None,
)
.await
}
pub async fn compact_table(&self, expr: CompactTableExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_COMPACT_TABLE);
self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CompactTable(expr)),
}),
None,
)
.await .await
} }
pub async fn truncate_table(&self, expr: TruncateTableExpr) -> Result<Output> { pub async fn truncate_table(&self, expr: TruncateTableExpr) -> Result<Output> {
let _timer = metrics::METRIC_GRPC_TRUNCATE_TABLE.start_timer(); let _timer = timer!(metrics::METRIC_GRPC_TRUNCATE_TABLE);
self.do_get(Request::Ddl(DdlRequest { self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::TruncateTable(expr)), expr: Some(DdlExpr::TruncateTable(expr)),
})) }),
None,
)
.await .await
} }
async fn do_get(&self, request: Request) -> Result<Output> { async fn do_get(&self, request: Request, trace_id: Option<u64>) -> Result<Output> {
// FIXME(paomian): should be added some labels for metrics // FIXME(paomian): should be added some labels for metrics
let _timer = metrics::METRIC_GRPC_DO_GET.start_timer(); let _timer = timer!(metrics::METRIC_GRPC_DO_GET);
let request = self.to_rpc_request(request); let request = self.to_rpc_request(request, trace_id);
let request = Ticket { let request = Ticket {
ticket: request.encode_to_vec().into(), ticket: request.encode_to_vec().into(),
}; };
@@ -259,7 +302,7 @@ impl Database {
source: BoxedError::new(ServerSnafu { code, msg }.build()), source: BoxedError::new(ServerSnafu { code, msg }.build()),
}; };
logging::error!( logging::error!(
"Failed to do Flight get, addr: {}, code: {}, source: {:?}", "Failed to do Flight get, addr: {}, code: {}, source: {}",
client.addr(), client.addr(),
tonic_code, tonic_code,
error error

View File

@@ -17,37 +17,29 @@ use std::any::Any;
use common_error::ext::{BoxedError, ErrorExt}; use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode; use common_error::status_code::StatusCode;
use common_error::{GREPTIME_ERROR_CODE, GREPTIME_ERROR_MSG}; use common_error::{GREPTIME_ERROR_CODE, GREPTIME_ERROR_MSG};
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu}; use snafu::{Location, Snafu};
use tonic::{Code, Status}; use tonic::{Code, Status};
#[derive(Snafu)] #[derive(Debug, Snafu)]
#[snafu(visibility(pub))] #[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error { pub enum Error {
#[snafu(display("Illegal Flight messages, reason: {}", reason))] #[snafu(display("Illegal Flight messages, reason: {}", reason))]
IllegalFlightMessages { reason: String, location: Location }, IllegalFlightMessages { reason: String, location: Location },
#[snafu(display("Failed to do Flight get, code: {}", tonic_code))] #[snafu(display("Failed to do Flight get, code: {}, source: {}", tonic_code, source))]
FlightGet { FlightGet {
addr: String, addr: String,
tonic_code: Code, tonic_code: Code,
source: BoxedError, source: BoxedError,
}, },
#[snafu(display("Failure occurs during handling request"))] #[snafu(display("Failed to convert FlightData, source: {}", source))]
HandleRequest {
location: Location,
source: BoxedError,
},
#[snafu(display("Failed to convert FlightData"))]
ConvertFlightData { ConvertFlightData {
location: Location, location: Location,
source: common_grpc::Error, source: common_grpc::Error,
}, },
#[snafu(display("Column datatype error"))] #[snafu(display("Column datatype error, source: {}", source))]
ColumnDataType { ColumnDataType {
location: Location, location: Location,
source: api::error::Error, source: api::error::Error,
@@ -59,16 +51,17 @@ pub enum Error {
#[snafu(display("Missing required field in protobuf, field: {}", field))] #[snafu(display("Missing required field in protobuf, field: {}", field))]
MissingField { field: String, location: Location }, MissingField { field: String, location: Location },
#[snafu(display("Failed to create gRPC channel, peer address: {}", addr))] #[snafu(display(
"Failed to create gRPC channel, peer address: {}, source: {}",
addr,
source
))]
CreateChannel { CreateChannel {
addr: String, addr: String,
location: Location, location: Location,
source: common_grpc::error::Error, source: common_grpc::error::Error,
}, },
#[snafu(display("Failed to request RegionServer, code: {}", code))]
RegionServer { code: Code, source: BoxedError },
// Server error carried in Tonic Status's metadata. // Server error carried in Tonic Status's metadata.
#[snafu(display("{}", msg))] #[snafu(display("{}", msg))]
Server { code: StatusCode, msg: String }, Server { code: StatusCode, msg: String },
@@ -92,9 +85,7 @@ impl ErrorExt for Error {
| Error::ClientStreaming { .. } => StatusCode::Internal, | Error::ClientStreaming { .. } => StatusCode::Internal,
Error::Server { code, .. } => *code, Error::Server { code, .. } => *code,
Error::FlightGet { source, .. } Error::FlightGet { source, .. } => source.status_code(),
| Error::HandleRequest { source, .. }
| Error::RegionServer { source, .. } => source.status_code(),
Error::CreateChannel { source, .. } | Error::ConvertFlightData { source, .. } => { Error::CreateChannel { source, .. } | Error::ConvertFlightData { source, .. } => {
source.status_code() source.status_code()
} }

View File

@@ -26,8 +26,6 @@ use api::v1::greptime_response::Response;
use api::v1::{AffectedRows, GreptimeResponse}; use api::v1::{AffectedRows, GreptimeResponse};
pub use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; pub use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::status_code::StatusCode; use common_error::status_code::StatusCode;
pub use common_query::Output;
pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
use snafu::OptionExt; use snafu::OptionExt;
pub use self::client::Client; pub use self::client::Client;

View File

@@ -12,34 +12,17 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use lazy_static::lazy_static; //! client metrics
use prometheus::*; pub const METRIC_GRPC_CREATE_TABLE: &str = "grpc.create_table";
pub const METRIC_GRPC_PROMQL_RANGE_QUERY: &str = "grpc.promql.range_query";
lazy_static! { pub const METRIC_GRPC_INSERT: &str = "grpc.insert";
pub static ref METRIC_GRPC_CREATE_TABLE: Histogram = pub const METRIC_GRPC_DELETE: &str = "grpc.delete";
register_histogram!("grpc_create_table", "grpc create table").unwrap(); pub const METRIC_GRPC_SQL: &str = "grpc.sql";
pub static ref METRIC_GRPC_PROMQL_RANGE_QUERY: Histogram = pub const METRIC_GRPC_LOGICAL_PLAN: &str = "grpc.logical_plan";
register_histogram!("grpc_promql_range_query", "grpc promql range query").unwrap(); pub const METRIC_GRPC_ALTER: &str = "grpc.alter";
pub static ref METRIC_GRPC_INSERT: Histogram = pub const METRIC_GRPC_DROP_TABLE: &str = "grpc.drop_table";
register_histogram!("grpc_insert", "grpc insert").unwrap(); pub const METRIC_GRPC_FLUSH_TABLE: &str = "grpc.flush_table";
pub static ref METRIC_GRPC_DELETE: Histogram = pub const METRIC_GRPC_COMPACT_TABLE: &str = "grpc.compact_table";
register_histogram!("grpc_delete", "grpc delete").unwrap(); pub const METRIC_GRPC_TRUNCATE_TABLE: &str = "grpc.truncate_table";
pub static ref METRIC_GRPC_SQL: Histogram = pub const METRIC_GRPC_DO_GET: &str = "grpc.do_get";
register_histogram!("grpc_sql", "grpc sql").unwrap(); pub(crate) const METRIC_REGION_REQUEST_GRPC: &str = "grpc.region_request";
pub static ref METRIC_GRPC_LOGICAL_PLAN: Histogram =
register_histogram!("grpc_logical_plan", "grpc logical plan").unwrap();
pub static ref METRIC_GRPC_ALTER: Histogram =
register_histogram!("grpc_alter", "grpc alter").unwrap();
pub static ref METRIC_GRPC_DROP_TABLE: Histogram =
register_histogram!("grpc_drop_table", "grpc drop table").unwrap();
pub static ref METRIC_GRPC_TRUNCATE_TABLE: Histogram =
register_histogram!("grpc_truncate_table", "grpc truncate table").unwrap();
pub static ref METRIC_GRPC_DO_GET: Histogram =
register_histogram!("grpc_do_get", "grpc do get").unwrap();
pub static ref METRIC_REGION_REQUEST_GRPC: HistogramVec = register_histogram_vec!(
"grpc_region_request",
"grpc region request",
&["request_type"]
)
.unwrap();
}

View File

@@ -12,180 +12,64 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use api::v1::region::{QueryRequest, RegionRequest, RegionResponse}; use api::v1::region::{region_request, RegionRequest, RegionRequestHeader, RegionResponse};
use api::v1::ResponseHeader; use api::v1::ResponseHeader;
use arrow_flight::Ticket;
use async_stream::stream;
use async_trait::async_trait;
use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode; use common_error::status_code::StatusCode;
use common_grpc::flight::{FlightDecoder, FlightMessage}; use common_telemetry::timer;
use common_meta::datanode_manager::{AffectedRows, Datanode}; use snafu::OptionExt;
use common_meta::error::{self as meta_error, Result as MetaResult};
use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::{RecordBatchStreamAdaptor, SendableRecordBatchStream};
use common_telemetry::error;
use prost::Message;
use snafu::{location, Location, OptionExt, ResultExt};
use tokio_stream::StreamExt;
use crate::error::Error::RegionServer; use crate::error::{IllegalDatabaseResponseSnafu, Result, ServerSnafu};
use crate::error::{ use crate::{metrics, Client};
self, ConvertFlightDataSnafu, IllegalDatabaseResponseSnafu, IllegalFlightMessagesSnafu,
MissingFieldSnafu, Result, ServerSnafu, type AffectedRows = u64;
};
use crate::{metrics, Client, Error};
#[derive(Debug)] #[derive(Debug)]
pub struct RegionRequester { pub struct RegionRequester {
trace_id: Option<u64>,
span_id: Option<u64>,
client: Client, client: Client,
} }
#[async_trait]
impl Datanode for RegionRequester {
async fn handle(&self, request: RegionRequest) -> MetaResult<AffectedRows> {
self.handle_inner(request).await.map_err(|err| {
if matches!(err, RegionServer { .. }) {
meta_error::Error::RetryLater {
source: BoxedError::new(err),
}
} else {
meta_error::Error::External {
source: BoxedError::new(err),
location: location!(),
}
}
})
}
async fn handle_query(&self, request: QueryRequest) -> MetaResult<SendableRecordBatchStream> {
let ticket = Ticket {
ticket: request.encode_to_vec().into(),
};
self.do_get_inner(ticket)
.await
.map_err(BoxedError::new)
.context(meta_error::ExternalSnafu)
}
}
impl RegionRequester { impl RegionRequester {
pub fn new(client: Client) -> Self { pub fn new(client: Client) -> Self {
Self { client } // TODO(LFC): Pass in trace_id and span_id from some context when we have it.
Self {
trace_id: None,
span_id: None,
client,
}
} }
pub async fn do_get_inner(&self, ticket: Ticket) -> Result<SendableRecordBatchStream> { pub async fn handle(self, request: region_request::Body) -> Result<AffectedRows> {
let mut flight_client = self.client.make_flight_client()?; let request_type = request.as_ref().to_string();
let response = flight_client
.mut_inner() let request = RegionRequest {
.do_get(ticket) header: Some(RegionRequestHeader {
.await trace_id: self.trace_id,
.map_err(|e| { span_id: self.span_id,
let tonic_code = e.code(); }),
let e: error::Error = e.into(); body: Some(request),
let code = e.status_code();
let msg = e.to_string();
let error = Error::FlightGet {
tonic_code,
addr: flight_client.addr().to_string(),
source: BoxedError::new(ServerSnafu { code, msg }.build()),
}; };
error!(
e; "Failed to do Flight get, addr: {}, code: {}", let _timer = timer!(
flight_client.addr(), metrics::METRIC_REGION_REQUEST_GRPC,
tonic_code &[("request_type", request_type)]
); );
error
})?;
let flight_data_stream = response.into_inner();
let mut decoder = FlightDecoder::default();
let mut flight_message_stream = flight_data_stream.map(move |flight_data| {
flight_data
.map_err(Error::from)
.and_then(|data| decoder.try_decode(data).context(ConvertFlightDataSnafu))
});
let Some(first_flight_message) = flight_message_stream.next().await else {
return IllegalFlightMessagesSnafu {
reason: "Expect the response not to be empty",
}
.fail();
};
let FlightMessage::Schema(schema) = first_flight_message? else {
return IllegalFlightMessagesSnafu {
reason: "Expect schema to be the first flight message",
}
.fail();
};
let stream = Box::pin(stream!({
while let Some(flight_message) = flight_message_stream.next().await {
let flight_message = flight_message
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let FlightMessage::Recordbatch(record_batch) = flight_message else {
yield IllegalFlightMessagesSnafu {
reason: "A Schema message must be succeeded exclusively by a set of RecordBatch messages"
}
.fail()
.map_err(BoxedError::new)
.context(ExternalSnafu);
break;
};
yield Ok(record_batch);
}
}));
let record_batch_stream = RecordBatchStreamAdaptor {
schema,
stream,
output_ordering: None,
};
Ok(Box::pin(record_batch_stream))
}
async fn handle_inner(&self, request: RegionRequest) -> Result<AffectedRows> {
let request_type = request
.body
.as_ref()
.with_context(|| MissingFieldSnafu { field: "body" })?
.as_ref()
.to_string();
let _timer = metrics::METRIC_REGION_REQUEST_GRPC
.with_label_values(&[request_type.as_str()])
.start_timer();
let mut client = self.client.raw_region_client()?; let mut client = self.client.raw_region_client()?;
let RegionResponse { let RegionResponse {
header, header,
affected_rows, affected_rows,
} = client } = client.handle(request).await?.into_inner();
.handle(request)
.await
.map_err(|e| {
let code = e.code();
let err: error::Error = e.into();
// Uses `Error::RegionServer` instead of `Error::Server`
error::Error::RegionServer {
code,
source: BoxedError::new(err),
}
})?
.into_inner();
check_response_header(header)?; check_response_header(header)?;
Ok(affected_rows) Ok(affected_rows)
} }
pub async fn handle(&self, request: RegionRequest) -> Result<AffectedRows> {
self.handle_inner(request).await
}
} }
pub fn check_response_header(header: Option<ResponseHeader>) -> Result<()> { fn check_response_header(header: Option<ResponseHeader>) -> Result<()> {
let status = header let status = header
.and_then(|header| header.status) .and_then(|header| header.status)
.context(IllegalDatabaseResponseSnafu { .context(IllegalDatabaseResponseSnafu {

View File

@@ -10,69 +10,61 @@ name = "greptime"
path = "src/bin/greptime.rs" path = "src/bin/greptime.rs"
[features] [features]
default = ["metrics-process"]
tokio-console = ["common-telemetry/tokio-console"] tokio-console = ["common-telemetry/tokio-console"]
metrics-process = ["servers/metrics-process"]
[dependencies] [dependencies]
anymap = "1.0.0-beta.2" anymap = "1.0.0-beta.2"
async-trait.workspace = true async-trait.workspace = true
auth.workspace = true auth.workspace = true
catalog.workspace = true catalog = { workspace = true }
chrono.workspace = true chrono.workspace = true
clap = { version = "4.4", features = ["derive"] } clap = { version = "3.1", features = ["derive"] }
client.workspace = true client = { workspace = true }
common-base.workspace = true common-base = { workspace = true }
common-catalog.workspace = true common-error = { workspace = true }
common-config.workspace = true common-meta = { workspace = true }
common-error.workspace = true common-query = { workspace = true }
common-macro.workspace = true common-recordbatch = { workspace = true }
common-meta.workspace = true
common-procedure.workspace = true
common-query.workspace = true
common-recordbatch.workspace = true
common-telemetry = { workspace = true, features = [ common-telemetry = { workspace = true, features = [
"deadlock_detection", "deadlock_detection",
] } ] }
config = "0.13" config = "0.13"
datanode.workspace = true datanode = { workspace = true }
datatypes.workspace = true datatypes = { workspace = true }
either = "1.8" either = "1.8"
etcd-client.workspace = true etcd-client.workspace = true
file-engine.workspace = true frontend = { workspace = true }
frontend.workspace = true
futures.workspace = true futures.workspace = true
lazy_static.workspace = true meta-client = { workspace = true }
meta-client.workspace = true meta-srv = { workspace = true }
meta-srv.workspace = true metrics.workspace = true
mito2.workspace = true
nu-ansi-term = "0.46" nu-ansi-term = "0.46"
partition.workspace = true partition = { workspace = true }
plugins.workspace = true
prometheus.workspace = true
prost.workspace = true prost.workspace = true
query.workspace = true query = { workspace = true }
rand.workspace = true rand.workspace = true
regex.workspace = true
rustyline = "10.1" rustyline = "10.1"
serde.workspace = true serde.workspace = true
serde_json.workspace = true servers = { workspace = true }
servers.workspace = true session = { workspace = true }
session.workspace = true
snafu.workspace = true snafu.workspace = true
substrait.workspace = true substrait = { workspace = true }
table.workspace = true table = { workspace = true }
tokio.workspace = true tokio.workspace = true
toml.workspace = true
[target.'cfg(not(windows))'.dependencies] [target.'cfg(not(windows))'.dependencies]
tikv-jemallocator = "0.5" tikv-jemallocator = "0.5"
[dev-dependencies] [dev-dependencies]
common-test-util.workspace = true common-test-util = { workspace = true }
serde.workspace = true serde.workspace = true
temp-env = "0.3" temp-env = "0.3"
toml.workspace = true
[target.'cfg(not(windows))'.dev-dependencies] [target.'cfg(not(windows))'.dev-dependencies]
rexpect = "0.5" rexpect = "0.5"
[build-dependencies] [build-dependencies]
common-version.workspace = true common-version = { workspace = true }

View File

@@ -21,11 +21,7 @@ use cmd::error::Result;
use cmd::options::{Options, TopLevelOptions}; use cmd::options::{Options, TopLevelOptions};
use cmd::{cli, datanode, frontend, metasrv, standalone}; use cmd::{cli, datanode, frontend, metasrv, standalone};
use common_telemetry::logging::{error, info, TracingOptions}; use common_telemetry::logging::{error, info, TracingOptions};
use metrics::gauge;
lazy_static::lazy_static! {
static ref APP_VERSION: prometheus::IntGaugeVec =
prometheus::register_int_gauge_vec!("app_version", "app version", &["short_version", "version"]).unwrap();
}
#[derive(Parser)] #[derive(Parser)]
#[clap(name = "greptimedb", version = print_version())] #[clap(name = "greptimedb", version = print_version())]
@@ -120,7 +116,7 @@ impl SubCommand {
Ok(Application::Metasrv(app)) Ok(Application::Metasrv(app))
} }
(SubCommand::Standalone(cmd), Options::Standalone(opts)) => { (SubCommand::Standalone(cmd), Options::Standalone(opts)) => {
let app = cmd.build(*opts).await?; let app = cmd.build(opts.fe_opts, opts.dn_opts).await?;
Ok(Application::Standalone(app)) Ok(Application::Standalone(app))
} }
(SubCommand::Cli(cmd), Options::Cli(_)) => { (SubCommand::Cli(cmd), Options::Cli(_)) => {
@@ -208,13 +204,11 @@ async fn main() -> Result<()> {
}; };
common_telemetry::set_panic_hook(); common_telemetry::set_panic_hook();
let _guard = common_telemetry::init_default_metrics_recorder();
common_telemetry::init_global_logging(app_name, logging_opts, tracing_opts, opts.node_id()); let _guard = common_telemetry::init_global_logging(app_name, logging_opts, tracing_opts);
// Report app version as gauge. // Report app version as gauge.
APP_VERSION gauge!("app_version", 1.0, "short_version" => short_version(), "version" => full_version());
.with_label_values(&[short_version(), full_version()])
.inc();
// Log version and argument flags. // Log version and argument flags.
info!( info!(

View File

@@ -14,7 +14,6 @@
mod bench; mod bench;
mod cmd; mod cmd;
mod export;
mod helper; mod helper;
mod repl; mod repl;
// TODO(weny): Removes it // TODO(weny): Removes it
@@ -28,7 +27,6 @@ use common_telemetry::logging::LoggingOptions;
pub use repl::Repl; pub use repl::Repl;
use upgrade::UpgradeCommand; use upgrade::UpgradeCommand;
use self::export::ExportCommand;
use crate::error::Result; use crate::error::Result;
use crate::options::{Options, TopLevelOptions}; use crate::options::{Options, TopLevelOptions};
@@ -80,19 +78,17 @@ impl Command {
#[derive(Parser)] #[derive(Parser)]
enum SubCommand { enum SubCommand {
// Attach(AttachCommand), Attach(AttachCommand),
Upgrade(UpgradeCommand), Upgrade(UpgradeCommand),
Bench(BenchTableMetadataCommand), Bench(BenchTableMetadataCommand),
Export(ExportCommand),
} }
impl SubCommand { impl SubCommand {
async fn build(self) -> Result<Instance> { async fn build(self) -> Result<Instance> {
match self { match self {
// SubCommand::Attach(cmd) => cmd.build().await, SubCommand::Attach(cmd) => cmd.build().await,
SubCommand::Upgrade(cmd) => cmd.build().await, SubCommand::Upgrade(cmd) => cmd.build().await,
SubCommand::Bench(cmd) => cmd.build().await, SubCommand::Bench(cmd) => cmd.build().await,
SubCommand::Export(cmd) => cmd.build().await,
} }
} }
} }
@@ -108,9 +104,51 @@ pub(crate) struct AttachCommand {
} }
impl AttachCommand { impl AttachCommand {
#[allow(dead_code)]
async fn build(self) -> Result<Instance> { async fn build(self) -> Result<Instance> {
let repl = Repl::try_new(&self).await?; let repl = Repl::try_new(&self).await?;
Ok(Instance::Repl(repl)) Ok(Instance::Repl(repl))
} }
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_load_options() {
let cmd = Command {
cmd: SubCommand::Attach(AttachCommand {
grpc_addr: String::from(""),
meta_addr: None,
disable_helper: false,
}),
};
let opts = cmd.load_options(TopLevelOptions::default()).unwrap();
let logging_opts = opts.logging_options();
assert_eq!("/tmp/greptimedb/logs", logging_opts.dir);
assert!(logging_opts.level.is_none());
assert!(!logging_opts.enable_jaeger_tracing);
}
#[test]
fn test_top_level_options() {
let cmd = Command {
cmd: SubCommand::Attach(AttachCommand {
grpc_addr: String::from(""),
meta_addr: None,
disable_helper: false,
}),
};
let opts = cmd
.load_options(TopLevelOptions {
log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
log_level: Some("debug".to_string()),
})
.unwrap();
let logging_opts = opts.logging_options();
assert_eq!("/tmp/greptimedb/test/logs", logging_opts.dir);
assert_eq!("debug", logging_opts.level.as_ref().unwrap());
}
}

View File

@@ -20,13 +20,14 @@ use std::time::Duration;
use async_trait::async_trait; use async_trait::async_trait;
use clap::Parser; use clap::Parser;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef}; use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::etcd::EtcdStore;
use common_meta::peer::Peer; use common_meta::peer::Peer;
use common_meta::rpc::router::{Region, RegionRoute}; use common_meta::rpc::router::{Region, RegionRoute};
use common_meta::table_name::TableName; use common_meta::table_name::TableName;
use common_telemetry::info; use common_telemetry::info;
use datatypes::data_type::ConcreteDataType; use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema}; use datatypes::schema::{ColumnSchema, RawSchema};
use meta_srv::service::store::etcd::EtcdStore;
use meta_srv::service::store::kv::KvBackendAdapter;
use rand::Rng; use rand::Rng;
use table::metadata::{RawTableInfo, RawTableMeta, TableId, TableIdent, TableType}; use table::metadata::{RawTableInfo, RawTableMeta, TableId, TableIdent, TableType};
@@ -63,7 +64,9 @@ impl BenchTableMetadataCommand {
pub async fn build(&self) -> Result<Instance> { pub async fn build(&self) -> Result<Instance> {
let etcd_store = EtcdStore::with_endpoints([&self.etcd_addr]).await.unwrap(); let etcd_store = EtcdStore::with_endpoints([&self.etcd_addr]).await.unwrap();
let table_metadata_manager = Arc::new(TableMetadataManager::new(etcd_store)); let table_metadata_manager = Arc::new(TableMetadataManager::new(KvBackendAdapter::wrap(
etcd_store,
)));
let tool = BenchTableMetadata { let tool = BenchTableMetadata {
table_metadata_manager, table_metadata_manager,
@@ -117,6 +120,7 @@ fn create_table_info(table_id: TableId, table_name: TableName) -> RawTableInfo {
created_on: chrono::DateTime::default(), created_on: chrono::DateTime::default(),
primary_key_indices: vec![], primary_key_indices: vec![],
next_column_id: columns as u32 + 1, next_column_id: columns as u32 + 1,
engine_options: Default::default(),
value_indices: vec![], value_indices: vec![],
options: Default::default(), options: Default::default(),
region_numbers: (1..=100).collect(), region_numbers: (1..=100).collect(),
@@ -154,7 +158,6 @@ fn create_region_routes() -> Vec<RegionRoute> {
addr: String::new(), addr: String::new(),
}), }),
follower_peers: vec![], follower_peers: vec![],
leader_status: None,
}); });
} }

View File

@@ -1,420 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::path::Path;
use std::sync::Arc;
use async_trait::async_trait;
use clap::{Parser, ValueEnum};
use client::api::v1::auth_header::AuthScheme;
use client::api::v1::Basic;
use client::{Client, Database, DEFAULT_SCHEMA_NAME};
use common_query::Output;
use common_recordbatch::util::collect;
use common_telemetry::{debug, error, info, warn};
use datatypes::scalars::ScalarVector;
use datatypes::vectors::{StringVector, Vector};
use snafu::{OptionExt, ResultExt};
use tokio::fs::File;
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::sync::Semaphore;
use crate::cli::{Instance, Tool};
use crate::error::{
CollectRecordBatchesSnafu, ConnectServerSnafu, EmptyResultSnafu, Error, FileIoSnafu,
IllegalConfigSnafu, InvalidDatabaseNameSnafu, NotDataFromOutputSnafu, RequestDatabaseSnafu,
Result,
};
type TableReference = (String, String, String);
#[derive(Debug, Default, Clone, ValueEnum)]
enum ExportTarget {
/// Corresponding to `SHOW CREATE TABLE`
#[default]
CreateTable,
/// Corresponding to `EXPORT TABLE`
TableData,
}
#[derive(Debug, Default, Parser)]
pub struct ExportCommand {
/// Server address to connect
#[clap(long)]
addr: String,
/// Directory to put the exported data. E.g.: /tmp/greptimedb-export
#[clap(long)]
output_dir: String,
/// The name of the catalog to export. Default to "greptime-*"".
#[clap(long, default_value = "")]
database: String,
/// Parallelism of the export.
#[clap(long, short = 'j', default_value = "1")]
export_jobs: usize,
/// Max retry times for each job.
#[clap(long, default_value = "3")]
max_retry: usize,
/// Things to export
#[clap(long, short = 't', value_enum)]
target: ExportTarget,
/// basic authentication for connecting to the server
#[clap(long)]
auth_basic: Option<String>,
}
impl ExportCommand {
pub async fn build(&self) -> Result<Instance> {
let client = Client::with_urls([self.addr.clone()]);
client
.health_check()
.await
.with_context(|_| ConnectServerSnafu {
addr: self.addr.clone(),
})?;
let (catalog, schema) = split_database(&self.database)?;
let mut database_client = Database::new(
catalog.clone(),
schema.clone().unwrap_or(DEFAULT_SCHEMA_NAME.to_string()),
client,
);
if let Some(auth_basic) = &self.auth_basic {
let (username, password) = auth_basic.split_once(':').context(IllegalConfigSnafu {
msg: "auth_basic cannot be split by ':'".to_string(),
})?;
database_client.set_auth(AuthScheme::Basic(Basic {
username: username.to_string(),
password: password.to_string(),
}));
}
Ok(Instance::Tool(Box::new(Export {
client: database_client,
catalog,
schema,
output_dir: self.output_dir.clone(),
parallelism: self.export_jobs,
target: self.target.clone(),
})))
}
}
pub struct Export {
client: Database,
catalog: String,
schema: Option<String>,
output_dir: String,
parallelism: usize,
target: ExportTarget,
}
impl Export {
/// Iterate over all db names.
///
/// Newbie: `db_name` is catalog + schema.
async fn iter_db_names(&self) -> Result<Vec<(String, String)>> {
if let Some(schema) = &self.schema {
Ok(vec![(self.catalog.clone(), schema.clone())])
} else {
let mut client = self.client.clone();
client.set_catalog(self.catalog.clone());
let result =
client
.sql("show databases")
.await
.with_context(|_| RequestDatabaseSnafu {
sql: "show databases".to_string(),
})?;
let Output::Stream(stream) = result else {
NotDataFromOutputSnafu.fail()?
};
let record_batch = collect(stream)
.await
.context(CollectRecordBatchesSnafu)?
.pop()
.context(EmptyResultSnafu)?;
let schemas = record_batch
.column(0)
.as_any()
.downcast_ref::<StringVector>()
.unwrap();
let mut result = Vec::with_capacity(schemas.len());
for i in 0..schemas.len() {
let schema = schemas.get_data(i).unwrap().to_owned();
if schema == common_catalog::consts::INFORMATION_SCHEMA_NAME {
continue;
}
result.push((self.catalog.clone(), schema));
}
Ok(result)
}
}
/// Return a list of [`TableReference`] to be exported.
/// Includes all tables under the given `catalog` and `schema`
async fn get_table_list(&self, catalog: &str, schema: &str) -> Result<Vec<TableReference>> {
// TODO: SQL injection hurts
let sql = format!(
"select table_catalog, table_schema, table_name from \
information_schema.tables where table_type = \'BASE TABLE\'\
and table_catalog = \'{catalog}\' and table_schema = \'{schema}\'",
);
let mut client = self.client.clone();
client.set_catalog(catalog);
client.set_schema(schema);
let result = client
.sql(&sql)
.await
.with_context(|_| RequestDatabaseSnafu { sql })?;
let Output::Stream(stream) = result else {
NotDataFromOutputSnafu.fail()?
};
let Some(record_batch) = collect(stream)
.await
.context(CollectRecordBatchesSnafu)?
.pop()
else {
return Ok(vec![]);
};
debug!("Fetched table list: {}", record_batch.pretty_print());
if record_batch.num_rows() == 0 {
return Ok(vec![]);
}
let mut result = Vec::with_capacity(record_batch.num_rows());
let catalog_column = record_batch
.column(0)
.as_any()
.downcast_ref::<StringVector>()
.unwrap();
let schema_column = record_batch
.column(1)
.as_any()
.downcast_ref::<StringVector>()
.unwrap();
let table_column = record_batch
.column(2)
.as_any()
.downcast_ref::<StringVector>()
.unwrap();
for i in 0..record_batch.num_rows() {
let catalog = catalog_column.get_data(i).unwrap().to_owned();
let schema = schema_column.get_data(i).unwrap().to_owned();
let table = table_column.get_data(i).unwrap().to_owned();
result.push((catalog, schema, table));
}
Ok(result)
}
async fn show_create_table(&self, catalog: &str, schema: &str, table: &str) -> Result<String> {
let sql = format!("show create table {}.{}.{}", catalog, schema, table);
let mut client = self.client.clone();
client.set_catalog(catalog);
client.set_schema(schema);
let result = client
.sql(&sql)
.await
.with_context(|_| RequestDatabaseSnafu { sql })?;
let Output::Stream(stream) = result else {
NotDataFromOutputSnafu.fail()?
};
let record_batch = collect(stream)
.await
.context(CollectRecordBatchesSnafu)?
.pop()
.context(EmptyResultSnafu)?;
let create_table = record_batch
.column(1)
.as_any()
.downcast_ref::<StringVector>()
.unwrap()
.get_data(0)
.unwrap();
Ok(format!("{create_table};\n"))
}
async fn export_create_table(&self) -> Result<()> {
let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.iter_db_names().await?;
let db_count = db_names.len();
let mut tasks = Vec::with_capacity(db_names.len());
for (catalog, schema) in db_names {
let semaphore_moved = semaphore.clone();
tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap();
let table_list = self.get_table_list(&catalog, &schema).await?;
let table_count = table_list.len();
tokio::fs::create_dir_all(&self.output_dir)
.await
.context(FileIoSnafu)?;
let output_file =
Path::new(&self.output_dir).join(format!("{catalog}-{schema}.sql"));
let mut file = File::create(output_file).await.context(FileIoSnafu)?;
for (c, s, t) in table_list {
match self.show_create_table(&c, &s, &t).await {
Err(e) => {
error!(e; "Failed to export table {}.{}.{}", c, s, t)
}
Ok(create_table) => {
file.write_all(create_table.as_bytes())
.await
.context(FileIoSnafu)?;
}
}
}
info!("finished exporting {catalog}.{schema} with {table_count} tables",);
Ok::<(), Error>(())
});
}
let success = futures::future::join_all(tasks)
.await
.into_iter()
.filter(|r| match r {
Ok(_) => true,
Err(e) => {
error!(e; "export job failed");
false
}
})
.count();
info!("success {success}/{db_count} jobs");
Ok(())
}
async fn export_table_data(&self) -> Result<()> {
let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.iter_db_names().await?;
let db_count = db_names.len();
let mut tasks = Vec::with_capacity(db_names.len());
for (catalog, schema) in db_names {
let semaphore_moved = semaphore.clone();
tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap();
tokio::fs::create_dir_all(&self.output_dir)
.await
.context(FileIoSnafu)?;
let output_dir = Path::new(&self.output_dir).join(format!("{catalog}-{schema}/"));
let mut client = self.client.clone();
client.set_catalog(catalog.clone());
client.set_schema(schema.clone());
// copy database to
let sql = format!(
"copy database {} to '{}' with (format='parquet');",
schema,
output_dir.to_str().unwrap()
);
client
.sql(sql.clone())
.await
.context(RequestDatabaseSnafu { sql })?;
info!("finished exporting {catalog}.{schema} data");
// export copy from sql
let dir_filenames = match output_dir.read_dir() {
Ok(dir) => dir,
Err(_) => {
warn!("empty database {catalog}.{schema}");
return Ok(());
}
};
let copy_from_file =
Path::new(&self.output_dir).join(format!("{catalog}-{schema}_copy_from.sql"));
let mut writer =
BufWriter::new(File::create(copy_from_file).await.context(FileIoSnafu)?);
for table_file in dir_filenames {
let table_file = table_file.unwrap();
let table_name = table_file
.file_name()
.into_string()
.unwrap()
.replace(".parquet", "");
writer
.write(
format!(
"copy {} from '{}' with (format='parquet');\n",
table_name,
table_file.path().to_str().unwrap()
)
.as_bytes(),
)
.await
.context(FileIoSnafu)?;
}
writer.flush().await.context(FileIoSnafu)?;
info!("finished exporting {catalog}.{schema} copy_from.sql");
Ok::<(), Error>(())
});
}
let success = futures::future::join_all(tasks)
.await
.into_iter()
.filter(|r| match r {
Ok(_) => true,
Err(e) => {
error!(e; "export job failed");
false
}
})
.count();
info!("success {success}/{db_count} jobs");
Ok(())
}
}
#[async_trait]
impl Tool for Export {
async fn do_work(&self) -> Result<()> {
match self.target {
ExportTarget::CreateTable => self.export_create_table().await,
ExportTarget::TableData => self.export_table_data().await,
}
}
}
/// Split at `-`.
fn split_database(database: &str) -> Result<(String, Option<String>)> {
let (catalog, schema) = database
.split_once('-')
.with_context(|| InvalidDatabaseNameSnafu {
database: database.to_string(),
})?;
if schema == "*" {
Ok((catalog.to_string(), None))
} else {
Ok((catalog.to_string(), Some(schema.to_string())))
}
}

View File

@@ -16,15 +16,20 @@ use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use std::time::Instant; use std::time::Instant;
use catalog::kvbackend::{CachedMetaKvBackend, KvBackendCatalogManager}; use catalog::remote::CachedMetaKvBackend;
use client::client_manager::DatanodeClients;
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_base::Plugins; use common_base::Plugins;
use common_error::ext::ErrorExt; use common_error::ext::ErrorExt;
use common_meta::key::TableMetadataManager;
use common_query::Output; use common_query::Output;
use common_recordbatch::RecordBatches; use common_recordbatch::RecordBatches;
use common_telemetry::logging; use common_telemetry::logging;
use either::Either; use either::Either;
use frontend::catalog::FrontendCatalogManager;
use meta_client::client::MetaClientBuilder; use meta_client::client::MetaClientBuilder;
use partition::manager::PartitionRuleManager;
use partition::route::TableRoutes;
use query::datafusion::DatafusionQueryEngine; use query::datafusion::DatafusionQueryEngine;
use query::logical_optimizer::LogicalOptimizer; use query::logical_optimizer::LogicalOptimizer;
use query::parser::QueryLanguageParser; use query::parser::QueryLanguageParser;
@@ -34,7 +39,7 @@ use query::QueryEngine;
use rustyline::error::ReadlineError; use rustyline::error::ReadlineError;
use rustyline::Editor; use rustyline::Editor;
use session::context::QueryContext; use session::context::QueryContext;
use snafu::ResultExt; use snafu::{ErrorCompat, ResultExt};
use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan}; use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan};
use crate::cli::cmd::ReplCommand; use crate::cli::cmd::ReplCommand;
@@ -147,7 +152,7 @@ impl Repl {
.await .await
.map_err(|e| { .map_err(|e| {
let status_code = e.status_code(); let status_code = e.status_code();
let root_cause = e.output_msg(); let root_cause = e.iter_chain().last().unwrap();
println!("Error: {}({status_code}), {root_cause}", status_code as u32) println!("Error: {}({status_code}), {root_cause}", status_code as u32)
}) })
.is_ok() .is_ok()
@@ -175,7 +180,7 @@ impl Repl {
.encode(&plan) .encode(&plan)
.context(SubstraitEncodeLogicalPlanSnafu)?; .context(SubstraitEncodeLogicalPlanSnafu)?;
self.database.logical_plan(plan.to_vec()).await self.database.logical_plan(plan.to_vec(), None).await
} else { } else {
self.database.sql(&sql).await self.database.sql(&sql).await
} }
@@ -249,14 +254,24 @@ async fn create_query_engine(meta_addr: &str) -> Result<DatafusionQueryEngine> {
let cached_meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone())); let cached_meta_backend = Arc::new(CachedMetaKvBackend::new(meta_client.clone()));
let catalog_list = let table_routes = Arc::new(TableRoutes::new(meta_client));
KvBackendCatalogManager::new(cached_meta_backend.clone(), cached_meta_backend); let partition_manager = Arc::new(PartitionRuleManager::new(table_routes));
let plugins: Plugins = Default::default();
let datanode_clients = Arc::new(DatanodeClients::default());
let catalog_list = Arc::new(FrontendCatalogManager::new(
cached_meta_backend.clone(),
cached_meta_backend.clone(),
partition_manager,
datanode_clients,
Arc::new(TableMetadataManager::new(cached_meta_backend)),
));
let plugins: Arc<Plugins> = Default::default();
let state = Arc::new(QueryEngineState::new( let state = Arc::new(QueryEngineState::new(
catalog_list, catalog_list,
None,
None,
false, false,
None,
None,
plugins.clone(), plugins.clone(),
)); ));

Some files were not shown because too many files have changed in this diff Show More