Compare commits

..

2 Commits

Author SHA1 Message Date
LFC
6ed6e39673 release v0.3.1 2023-06-21 16:28:41 +08:00
LFC
e5c61ec290 release v0.3.1 2023-06-21 16:28:07 +08:00
1013 changed files with 21816 additions and 76128 deletions

View File

@@ -12,9 +12,5 @@ rustflags = [
"-Wclippy::print_stdout", "-Wclippy::print_stdout",
"-Wclippy::print_stderr", "-Wclippy::print_stderr",
"-Wclippy::implicit_clone", "-Wclippy::implicit_clone",
"-Aclippy::items_after_test_module",
# It seems clippy has made a false positive decision here when upgrading rust toolchain to
# nightly-2023-08-07, we do need it to be borrowed mutably.
# Allow it for now; try disallow it when the toolchain is upgraded in the future.
"-Aclippy::needless_pass_by_ref_mut",
] ]

View File

@@ -1,3 +1,2 @@
[profile.default] [profile.default]
slow-timeout = { period = "60s", terminate-after = 3, grace-period = "30s" } slow-timeout = { period = "60s", terminate-after = 3, grace-period = "30s" }
retries = { backoff = "exponential", count = 3, delay = "10s", jitter = true }

View File

@@ -20,3 +20,6 @@ out/
# Rust # Rust
target/ target/
# Git
.git

View File

@@ -14,8 +14,4 @@ GT_AZBLOB_CONTAINER=AZBLOB container
GT_AZBLOB_ACCOUNT_NAME=AZBLOB account name GT_AZBLOB_ACCOUNT_NAME=AZBLOB account name
GT_AZBLOB_ACCOUNT_KEY=AZBLOB account key GT_AZBLOB_ACCOUNT_KEY=AZBLOB account key
GT_AZBLOB_ENDPOINT=AZBLOB endpoint GT_AZBLOB_ENDPOINT=AZBLOB endpoint
# Settings for gcs test
GT_GCS_BUCKET = GCS bucket
GT_GCS_SCOPE = GCS scope
GT_GCS_CREDENTIAL_PATH = GCS credential path
GT_GCS_ENDPOINT = GCS end point

View File

@@ -1,93 +0,0 @@
name: Build and push dev-builder image
description: Build and push dev-builder image to DockerHub and ACR
inputs:
dockerhub-image-registry:
description: The dockerhub image registry to store the images
required: false
default: docker.io
dockerhub-image-registry-username:
description: The dockerhub username to login to the image registry
required: true
dockerhub-image-registry-token:
description: The dockerhub token to login to the image registry
required: true
dockerhub-image-namespace:
description: The dockerhub namespace of the image registry to store the images
required: false
default: greptime
acr-image-registry:
description: The ACR image registry to store the images
required: true
acr-image-registry-username:
description: The ACR username to login to the image registry
required: true
acr-image-registry-password:
description: The ACR password to login to the image registry
required: true
acr-image-namespace:
description: The ACR namespace of the image registry to store the images
required: false
default: greptime
version:
description: Version of the dev-builder
required: false
default: latest
runs:
using: composite
steps:
- name: Login to Dockerhub
uses: docker/login-action@v2
with:
registry: ${{ inputs.dockerhub-image-registry }}
username: ${{ inputs.dockerhub-image-registry-username }}
password: ${{ inputs.dockerhub-image-registry-token }}
- name: Build and push ubuntu dev builder image to dockerhub
shell: bash
run:
make dev-builder \
BASE_IMAGE=ubuntu \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
- name: Build and push centos dev builder image to dockerhub
shell: bash
run:
make dev-builder \
BASE_IMAGE=centos \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
- name: Login to ACR
uses: docker/login-action@v2
continue-on-error: true
with:
registry: ${{ inputs.acr-image-registry }}
username: ${{ inputs.acr-image-registry-username }}
password: ${{ inputs.acr-image-registry-password }}
- name: Build and push ubuntu dev builder image to ACR
shell: bash
continue-on-error: true
run: # buildx will cache the images that already built, so it will not take long time to build the images again.
make dev-builder \
BASE_IMAGE=ubuntu \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.acr-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.acr-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}
- name: Build and push centos dev builder image to ACR
shell: bash
continue-on-error: true
run: # buildx will cache the images that already built, so it will not take long time to build the images again.
make dev-builder \
BASE_IMAGE=centos \
BUILDX_MULTI_PLATFORM_BUILD=true \
IMAGE_REGISTRY=${{ inputs.acr-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.acr-image-namespace }} \
IMAGE_TAG=${{ inputs.version }}

View File

@@ -1,67 +0,0 @@
name: Build greptime binary
description: Build and upload the single linux artifact
inputs:
base-image:
description: Base image to build greptime
required: true
features:
description: Cargo features to build
required: true
cargo-profile:
description: Cargo profile to build
required: true
artifacts-dir:
description: Directory to store artifacts
required: true
version:
description: Version of the artifact
required: true
release-to-s3-bucket:
description: S3 bucket to store released artifacts
required: true
aws-access-key-id:
description: AWS access key id
required: true
aws-secret-access-key:
description: AWS secret access key
required: true
aws-region:
description: AWS region
required: true
upload-to-s3:
description: Upload to S3
required: false
default: 'true'
upload-latest-artifacts:
description: Upload the latest artifacts to S3
required: false
default: 'true'
working-dir:
description: Working directory to build the artifacts
required: false
default: .
runs:
using: composite
steps:
- name: Build greptime binary
shell: bash
run: |
cd ${{ inputs.working-dir }} && \
make build-by-dev-builder \
CARGO_PROFILE=${{ inputs.cargo-profile }} \
FEATURES=${{ inputs.features }} \
BASE_IMAGE=${{ inputs.base-image }}
- name: Upload artifacts
uses: ./.github/actions/upload-artifacts
with:
artifacts-dir: ${{ inputs.artifacts-dir }}
target-file: ./target/${{ inputs.cargo-profile }}/greptime
version: ${{ inputs.version }}
release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
upload-to-s3: ${{ inputs.upload-to-s3 }}
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
working-dir: ${{ inputs.working-dir }}

View File

@@ -1,104 +0,0 @@
name: Build greptime images
description: Build and push greptime images
inputs:
image-registry:
description: The image registry to store the images
required: true
image-registry-username:
description: The username to login to the image registry
required: true
image-registry-password:
description: The password to login to the image registry
required: true
amd64-artifact-name:
description: The name of the amd64 artifact for building images
required: true
arm64-artifact-name:
description: The name of the arm64 artifact for building images
required: false
default: ""
image-namespace:
description: The namespace of the image registry to store the images
required: true
image-name:
description: The name of the image to build
required: true
image-tag:
description: The tag of the image to build
required: true
docker-file:
description: The path to the Dockerfile to build
required: true
platforms:
description: The supported platforms to build the image
required: true
push-latest-tag:
description: Whether to push the latest tag
required: false
default: 'true'
runs:
using: composite
steps:
- name: Login to image registry
uses: docker/login-action@v2
with:
registry: ${{ inputs.image-registry }}
username: ${{ inputs.image-registry-username }}
password: ${{ inputs.image-registry-password }}
- name: Set up qemu for multi-platform builds
uses: docker/setup-qemu-action@v2
- name: Set up buildx
uses: docker/setup-buildx-action@v2
- name: Download amd64 artifacts
uses: actions/download-artifact@v3
with:
name: ${{ inputs.amd64-artifact-name }}
- name: Unzip the amd64 artifacts
shell: bash
run: |
tar xvf ${{ inputs.amd64-artifact-name }}.tar.gz && \
rm ${{ inputs.amd64-artifact-name }}.tar.gz && \
rm -rf amd64 && \
mv ${{ inputs.amd64-artifact-name }} amd64
- name: Download arm64 artifacts
uses: actions/download-artifact@v3
if: ${{ inputs.arm64-artifact-name }}
with:
name: ${{ inputs.arm64-artifact-name }}
- name: Unzip the arm64 artifacts
shell: bash
if: ${{ inputs.arm64-artifact-name }}
run: |
tar xvf ${{ inputs.arm64-artifact-name }}.tar.gz && \
rm ${{ inputs.arm64-artifact-name }}.tar.gz && \
rm -rf arm64 && \
mv ${{ inputs.arm64-artifact-name }} arm64
- name: Build and push images(without latest) for amd64 and arm64
if: ${{ inputs.push-latest-tag == 'false' }}
uses: docker/build-push-action@v3
with:
context: .
file: ${{ inputs.docker-file }}
push: true
platforms: ${{ inputs.platforms }}
tags: |
${{ inputs.image-registry }}/${{ inputs.image-namespace }}/${{ inputs.image-name }}:${{ inputs.image-tag }}
- name: Build and push images for amd64 and arm64
if: ${{ inputs.push-latest-tag == 'true' }}
uses: docker/build-push-action@v3
with:
context: .
file: ${{ inputs.docker-file }}
push: true
platforms: ${{ inputs.platforms }}
tags: |
${{ inputs.image-registry }}/${{ inputs.image-namespace }}/${{ inputs.image-name }}:latest
${{ inputs.image-registry }}/${{ inputs.image-namespace }}/${{ inputs.image-name }}:${{ inputs.image-tag }}

View File

@@ -1,62 +0,0 @@
name: Group for building greptimedb images
description: Group for building greptimedb images
inputs:
image-registry:
description: The image registry to store the images
required: true
image-namespace:
description: The namespace of the image registry to store the images
required: true
image-name:
description: The name of the image to build
required: false
default: greptimedb
image-registry-username:
description: The username to login to the image registry
required: true
image-registry-password:
description: The password to login to the image registry
required: true
version:
description: Version of the artifact
required: true
push-latest-tag:
description: Whether to push the latest tag
required: false
default: 'true'
dev-mode:
description: Enable dev mode, only build standard greptime
required: false
default: 'false'
runs:
using: composite
steps:
- name: Build and push standard images to dockerhub
uses: ./.github/actions/build-greptime-images
with: # The image will be used as '${{ inputs.image-registry }}/${{ inputs.image-namespace }}/${{ inputs.image-name }}:${{ inputs.version }}'
image-registry: ${{ inputs.image-registry }}
image-namespace: ${{ inputs.image-namespace }}
image-registry-username: ${{ inputs.image-registry-username }}
image-registry-password: ${{ inputs.image-registry-password }}
image-name: ${{ inputs.image-name }}
image-tag: ${{ inputs.version }}
docker-file: docker/ci/ubuntu/Dockerfile
amd64-artifact-name: greptime-linux-amd64-pyo3-${{ inputs.version }}
arm64-artifact-name: greptime-linux-arm64-pyo3-${{ inputs.version }}
platforms: linux/amd64,linux/arm64
push-latest-tag: ${{ inputs.push-latest-tag }}
- name: Build and push centos images to dockerhub
if: ${{ inputs.dev-mode == 'false' }}
uses: ./.github/actions/build-greptime-images
with:
image-registry: ${{ inputs.image-registry }}
image-namespace: ${{ inputs.image-namespace }}
image-registry-username: ${{ inputs.image-registry-username }}
image-registry-password: ${{ inputs.image-registry-password }}
image-name: ${{ inputs.image-name }}-centos
image-tag: ${{ inputs.version }}
docker-file: docker/ci/centos/Dockerfile
amd64-artifact-name: greptime-linux-amd64-centos-${{ inputs.version }}
platforms: linux/amd64
push-latest-tag: ${{ inputs.push-latest-tag }}

View File

@@ -1,116 +0,0 @@
name: Build linux artifacts
description: Build linux artifacts
inputs:
arch:
description: Architecture to build
required: true
cargo-profile:
description: Cargo profile to build
required: true
version:
description: Version of the artifact
required: true
disable-run-tests:
description: Disable running integration tests
required: true
release-to-s3-bucket:
description: S3 bucket to store released artifacts
required: true
aws-access-key-id:
description: AWS access key id
required: true
aws-secret-access-key:
description: AWS secret access key
required: true
aws-region:
description: AWS region
required: true
dev-mode:
description: Enable dev mode, only build standard greptime
required: false
default: 'false'
upload-to-s3:
description: Upload to S3
required: false
default: 'true'
upload-latest-artifacts:
description: Upload the latest artifacts to S3
required: false
default: 'true'
working-dir:
description: Working directory to build the artifacts
required: false
default: .
runs:
using: composite
steps:
- name: Run integration test
if: ${{ inputs.disable-run-tests == 'false' }}
shell: bash
# NOTE: If the BUILD_JOBS > 4, it's always OOM in EC2 instance.
run: |
cd ${{ inputs.working-dir }} && \
make run-it-in-container BUILD_JOBS=4
- name: Upload sqlness logs
if: ${{ failure() && inputs.disable-run-tests == 'false' }} # Only upload logs when the integration tests failed.
uses: actions/upload-artifact@v3
with:
name: sqlness-logs
path: /tmp/greptime-*.log
retention-days: 3
- name: Build standard greptime
uses: ./.github/actions/build-greptime-binary
with:
base-image: ubuntu
features: pyo3_backend,servers/dashboard
cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-pyo3-${{ inputs.version }}
version: ${{ inputs.version }}
release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
upload-to-s3: ${{ inputs.upload-to-s3 }}
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
working-dir: ${{ inputs.working-dir }}
- name: Build greptime without pyo3
if: ${{ inputs.dev-mode == 'false' }}
uses: ./.github/actions/build-greptime-binary
with:
base-image: ubuntu
features: servers/dashboard
cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
version: ${{ inputs.version }}
release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
upload-to-s3: ${{ inputs.upload-to-s3 }}
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
working-dir: ${{ inputs.working-dir }}
- name: Clean up the target directory # Clean up the target directory for the centos7 base image, or it will still use the objects of last build.
shell: bash
run: |
rm -rf ./target/
- name: Build greptime on centos base image
uses: ./.github/actions/build-greptime-binary
if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Only build centos7 base image for amd64.
with:
base-image: centos
features: servers/dashboard
cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
version: ${{ inputs.version }}
release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
upload-to-s3: ${{ inputs.upload-to-s3 }}
upload-latest-artifacts: ${{ inputs.upload-latest-artifacts }}
working-dir: ${{ inputs.working-dir }}

View File

@@ -1,105 +0,0 @@
name: Build macos artifacts
description: Build macos artifacts
inputs:
arch:
description: Architecture to build
required: true
rust-toolchain:
description: Rust toolchain to use
required: true
cargo-profile:
description: Cargo profile to build
required: true
features:
description: Cargo features to build
required: true
version:
description: Version of the artifact
required: true
disable-run-tests:
description: Disable running integration tests
required: true
release-to-s3-bucket:
description: S3 bucket to store released artifacts
required: true
artifacts-dir:
description: Directory to store artifacts
required: true
aws-access-key-id:
description: AWS access key id
required: true
aws-secret-access-key:
description: AWS secret access key
required: true
aws-region:
description: AWS region
required: true
runs:
using: composite
steps:
- name: Cache cargo assets
id: cache
uses: actions/cache@v3
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ inputs.arch }}-build-cargo-${{ hashFiles('**/Cargo.lock') }}
- name: Install protoc
shell: bash
run: |
brew install protobuf
- name: Install rust toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ inputs.rust-toolchain }}
targets: ${{ inputs.arch }}
- name: Start etcd # For integration tests.
if: ${{ inputs.disable-run-tests == 'false' }}
shell: bash
run: |
brew install etcd && \
brew services start etcd
- name: Install latest nextest release # For integration tests.
if: ${{ inputs.disable-run-tests == 'false' }}
uses: taiki-e/install-action@nextest
- name: Run integration tests
if: ${{ inputs.disable-run-tests == 'false' }}
shell: bash
run: |
make test sqlness-test
- name: Upload sqlness logs
if: ${{ failure() }} # Only upload logs when the integration tests failed.
uses: actions/upload-artifact@v3
with:
name: sqlness-logs
path: /tmp/greptime-*.log
retention-days: 3
- name: Build greptime binary
shell: bash
run: |
make build \
CARGO_PROFILE=${{ inputs.cargo-profile }} \
FEATURES=${{ inputs.features }} \
TARGET=${{ inputs.arch }}
- name: Upload artifacts
uses: ./.github/actions/upload-artifacts
with:
artifacts-dir: ${{ inputs.artifacts-dir }}
target-file: target/${{ inputs.arch }}/${{ inputs.cargo-profile }}/greptime
version: ${{ inputs.version }}
release-to-s3-bucket: ${{ inputs.release-to-s3-bucket }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}

View File

@@ -1,50 +0,0 @@
name: Release artifacts
description: Release artifacts
inputs:
version:
description: Version to release
required: true
runs:
using: composite
steps:
# Download artifacts from previous jobs, the artifacts will be downloaded to:
# ${WORKING_DIR}
# |- greptime-darwin-amd64-pyo3-v0.5.0/greptime-darwin-amd64-pyo3-v0.5.0.tar.gz
# |- greptime-darwin-amd64-pyo3-v0.5.0.sha256sum/greptime-darwin-amd64-pyo3-v0.5.0.sha256sum
# |- greptime-darwin-amd64-v0.5.0/greptime-darwin-amd64-v0.5.0.tar.gz
# |- greptime-darwin-amd64-v0.5.0.sha256sum/greptime-darwin-amd64-v0.5.0.sha256sum
# ...
- name: Download artifacts
uses: actions/download-artifact@v3
- name: Create git tag for release
if: ${{ github.event_name != 'push' }} # Meaning this is a scheduled or manual workflow.
shell: bash
run: |
git tag ${{ inputs.version }}
# Only publish release when the release tag is like v1.0.0, v1.0.1, v1.0.2, etc.
- name: Set release arguments
shell: bash
run: |
if [[ "${{ inputs.version }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "prerelease=false" >> $GITHUB_ENV
echo "makeLatest=true" >> $GITHUB_ENV
echo "generateReleaseNotes=false" >> $GITHUB_ENV
else
echo "prerelease=true" >> $GITHUB_ENV
echo "makeLatest=false" >> $GITHUB_ENV
echo "generateReleaseNotes=true" >> $GITHUB_ENV
fi
- name: Publish release
uses: ncipollo/release-action@v1
with:
name: "Release ${{ inputs.version }}"
prerelease: ${{ env.prerelease }}
makeLatest: ${{ env.makeLatest }}
tag: ${{ inputs.version }}
generateReleaseNotes: ${{ env.generateReleaseNotes }}
allowUpdates: true
artifacts: |
**/greptime-*/*

View File

@@ -1,67 +0,0 @@
name: Start EC2 runner
description: Start EC2 runner
inputs:
runner:
description: The linux runner name
required: true
aws-access-key-id:
description: AWS access key id
required: true
aws-secret-access-key:
description: AWS secret access key
required: true
aws-region:
description: AWS region
required: true
github-token:
description: The GitHub token to clone private repository
required: false
default: ""
image-id:
description: The EC2 image id
required: true
security-group-id:
description: The EC2 security group id
required: true
subnet-id:
description: The EC2 subnet id
required: true
outputs:
label:
description: "label"
value: ${{ steps.start-linux-arm64-ec2-runner.outputs.label || inputs.runner }}
ec2-instance-id:
description: "ec2-instance-id"
value: ${{ steps.start-linux-arm64-ec2-runner.outputs.ec2-instance-id }}
runs:
using: composite
steps:
- name: Configure AWS credentials
if: startsWith(inputs.runner, 'ec2')
uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
# The EC2 runner will use the following format:
# <vm-type>-<instance-type>-<arch>
# like 'ec2-c6a.4xlarge-amd64'.
- name: Get EC2 instance type
if: startsWith(inputs.runner, 'ec2')
id: get-ec2-instance-type
shell: bash
run: |
echo "instance-type=$(echo ${{ inputs.runner }} | cut -d'-' -f2)" >> $GITHUB_OUTPUT
- name: Start EC2 runner
if: startsWith(inputs.runner, 'ec2')
uses: machulav/ec2-github-runner@v2
id: start-linux-arm64-ec2-runner
with:
mode: start
ec2-image-id: ${{ inputs.image-id }}
ec2-instance-type: ${{ steps.get-ec2-instance-type.outputs.instance-type }}
subnet-id: ${{ inputs.subnet-id }}
security-group-id: ${{ inputs.security-group-id }}
github-token: ${{ inputs.github-token }}

View File

@@ -1,41 +0,0 @@
name: Stop EC2 runner
description: Stop EC2 runner
inputs:
label:
description: The linux runner name
required: true
ec2-instance-id:
description: The EC2 instance id
required: true
aws-access-key-id:
description: AWS access key id
required: true
aws-secret-access-key:
description: AWS secret access key
required: true
aws-region:
description: AWS region
required: true
github-token:
description: The GitHub token to clone private repository
required: false
default: ""
runs:
using: composite
steps:
- name: Configure AWS credentials
if: ${{ inputs.label && inputs.ec2-instance-id }}
uses: aws-actions/configure-aws-credentials@v2
with:
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
- name: Stop EC2 runner
if: ${{ inputs.label && inputs.ec2-instance-id }}
uses: machulav/ec2-github-runner@v2
with:
mode: stop
label: ${{ inputs.label }}
ec2-instance-id: ${{ inputs.ec2-instance-id }}
github-token: ${{ inputs.github-token }}

View File

@@ -1,125 +0,0 @@
name: Upload artifacts
description: Upload artifacts
inputs:
artifacts-dir:
description: Directory to store artifacts
required: true
target-file:
description: The path of the target artifact
required: true
version:
description: Version of the artifact
required: true
release-to-s3-bucket:
description: S3 bucket to store released artifacts
required: true
aws-access-key-id:
description: AWS access key id
required: true
aws-secret-access-key:
description: AWS secret access key
required: true
aws-region:
description: AWS region
required: true
upload-to-s3:
description: Upload to S3
required: false
default: 'true'
upload-latest-artifacts:
description: Upload the latest artifacts to S3
required: false
default: 'true'
upload-max-retry-times:
description: Max retry times for uploading artifacts to S3
required: false
default: "20"
upload-retry-timeout:
description: Timeout for uploading artifacts to S3
required: false
default: "10" # minutes
working-dir:
description: Working directory to upload the artifacts
required: false
default: .
runs:
using: composite
steps:
- name: Create artifacts directory
working-directory: ${{ inputs.working-dir }}
shell: bash
run: |
mkdir -p ${{ inputs.artifacts-dir }} && \
mv ${{ inputs.target-file }} ${{ inputs.artifacts-dir }}
# The compressed artifacts will use the following layout:
# greptime-linux-amd64-pyo3-v0.3.0sha256sum
# greptime-linux-amd64-pyo3-v0.3.0.tar.gz
# greptime-linux-amd64-pyo3-v0.3.0
# └── greptime
- name: Compress artifacts and calculate checksum
working-directory: ${{ inputs.working-dir }}
shell: bash
run: |
tar -zcvf ${{ inputs.artifacts-dir }}.tar.gz ${{ inputs.artifacts-dir }} && \
echo $(shasum -a 256 ${{ inputs.artifacts-dir }}.tar.gz | cut -f1 -d' ') > ${{ inputs.artifacts-dir }}.sha256sum
# Note: The artifacts will be double zip compressed(related issue: https://github.com/actions/upload-artifact/issues/39).
# However, when we use 'actions/download-artifact@v3' to download the artifacts, it will be automatically unzipped.
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ inputs.artifacts-dir }}
path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.tar.gz
- name: Upload checksum
uses: actions/upload-artifact@v3
with:
name: ${{ inputs.artifacts-dir }}.sha256sum
path: ${{ inputs.working-dir }}/${{ inputs.artifacts-dir }}.sha256sum
- name: Upload artifacts to S3
if: ${{ inputs.upload-to-s3 == 'true' }}
uses: nick-invision/retry@v2
env:
AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }}
AWS_DEFAULT_REGION: ${{ inputs.aws-region }}
with:
max_attempts: ${{ inputs.upload-max-retry-times }}
timeout_minutes: ${{ inputs.upload-retry-timeout }}
# The bucket layout will be:
# releases/greptimedb
# ├── v0.1.0
# │ ├── greptime-darwin-amd64-pyo3-v0.1.0.sha256sum
# │ └── greptime-darwin-amd64-pyo3-v0.1.0.tar.gz
# └── v0.2.0
# ├── greptime-darwin-amd64-pyo3-v0.2.0.sha256sum
# └── greptime-darwin-amd64-pyo3-v0.2.0.tar.gz
command: |
cd ${{ inputs.working-dir }} && \
aws s3 cp \
${{ inputs.artifacts-dir }}.tar.gz \
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/${{ inputs.version }}/${{ inputs.artifacts-dir }}.tar.gz && \
aws s3 cp \
${{ inputs.artifacts-dir }}.sha256sum \
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/${{ inputs.version }}/${{ inputs.artifacts-dir }}.sha256sum
- name: Upload latest artifacts to S3
if: ${{ inputs.upload-to-s3 == 'true' && inputs.upload-latest-artifacts == 'true' }} # We'll also upload the latest artifacts to S3 in the scheduled and formal release.
uses: nick-invision/retry@v2
env:
AWS_ACCESS_KEY_ID: ${{ inputs.aws-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-secret-access-key }}
AWS_DEFAULT_REGION: ${{ inputs.aws-region }}
with:
max_attempts: ${{ inputs.upload-max-retry-times }}
timeout_minutes: ${{ inputs.upload-retry-timeout }}
command: |
cd ${{ inputs.working-dir }} && \
aws s3 cp \
${{ inputs.artifacts-dir }}.tar.gz \
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/latest/${{ inputs.artifacts-dir }}.tar.gz && \
aws s3 cp \
${{ inputs.artifacts-dir }}.sha256sum \
s3://${{ inputs.release-to-s3-bucket }}/releases/greptimedb/latest/${{ inputs.artifacts-dir }}.sha256sum

View File

@@ -1,68 +0,0 @@
#!/usr/bin/env bash
set -e
# - If it's a tag push release, the version is the tag name(${{ github.ref_name }});
# - If it's a scheduled release, the version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-$buildTime', like 'v0.2.0-nightly-20230313';
# - If it's a manual release, the version is '${{ env.NEXT_RELEASE_VERSION }}-$(git rev-parse --short HEAD)-YYYYMMDDSS', like 'v0.2.0-e5b243c-2023071245';
# - If it's a nightly build, the version is 'nightly-YYYYMMDD-$(git rev-parse --short HEAD)', like 'nightly-20230712-e5b243c'.
# create_version ${GIHUB_EVENT_NAME} ${NEXT_RELEASE_VERSION} ${NIGHTLY_RELEASE_PREFIX}
function create_version() {
# Read from envrionment variables.
if [ -z "$GITHUB_EVENT_NAME" ]; then
echo "GITHUB_EVENT_NAME is empty"
exit 1
fi
if [ -z "$NEXT_RELEASE_VERSION" ]; then
echo "NEXT_RELEASE_VERSION is empty"
exit 1
fi
if [ -z "$NIGHTLY_RELEASE_PREFIX" ]; then
echo "NIGHTLY_RELEASE_PREFIX is empty"
exit 1
fi
# Reuse $NEXT_RELEASE_VERSION to identify whether it's a nightly build.
# It will be like 'nigtly-20230808-7d0d8dc6'.
if [ "$NEXT_RELEASE_VERSION" = nightly ]; then
echo "$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")-$(git rev-parse --short HEAD)"
exit 0
fi
# Reuse $NEXT_RELEASE_VERSION to identify whether it's a dev build.
# It will be like 'dev-2023080819-f0e7216c'.
if [ "$NEXT_RELEASE_VERSION" = dev ]; then
if [ -z "$COMMIT_SHA" ]; then
echo "COMMIT_SHA is empty in dev build"
exit 1
fi
echo "dev-$(date "+%Y%m%d-%s")-$(echo "$COMMIT_SHA" | cut -c1-8)"
exit 0
fi
# Note: Only output 'version=xxx' to stdout when everything is ok, so that it can be used in GitHub Actions Outputs.
if [ "$GITHUB_EVENT_NAME" = push ]; then
if [ -z "$GITHUB_REF_NAME" ]; then
echo "GITHUB_REF_NAME is empty in push event"
exit 1
fi
echo "$GITHUB_REF_NAME"
elif [ "$GITHUB_EVENT_NAME" = workflow_dispatch ]; then
echo "$NEXT_RELEASE_VERSION-$(git rev-parse --short HEAD)-$(date "+%Y%m%d-%s")"
elif [ "$GITHUB_EVENT_NAME" = schedule ]; then
echo "$NEXT_RELEASE_VERSION-$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")"
else
echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME"
exit 1
fi
}
# You can run as following examples:
# GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly GITHUB_REF_NAME=v0.3.0 ./create-version.sh
# GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
# GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
# GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
# GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh
create_version

View File

@@ -13,7 +13,7 @@ on:
name: Build API docs name: Build API docs
env: env:
RUST_TOOLCHAIN: nightly-2023-08-07 RUST_TOOLCHAIN: nightly-2023-05-03
jobs: jobs:
apidoc: apidoc:

View File

@@ -1,344 +0,0 @@
# Development build only build the debug version of the artifacts manually.
name: GreptimeDB Development Build
on:
workflow_dispatch: # Allows you to run this workflow manually.
inputs:
repository:
description: The public repository to build
required: false
default: GreptimeTeam/greptimedb
commit: # Note: We only pull the source code and use the current workflow to build the artifacts.
description: The commit to build
required: true
linux_amd64_runner:
type: choice
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-latest
- ubuntu-latest-8-cores
- ubuntu-latest-16-cores
- ubuntu-latest-32-cores
- ubuntu-latest-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
- ec2-c6i.8xlarge-amd64 # 32C64G
- ec2-c6i.16xlarge-amd64 # 64C128G
linux_arm64_runner:
type: choice
description: The runner uses to build linux-arm64 artifacts
default: ec2-c6g.4xlarge-arm64
options:
- ec2-c6g.xlarge-arm64 # 4C8G
- ec2-c6g.2xlarge-arm64 # 8C16G
- ec2-c6g.4xlarge-arm64 # 16C32G
- ec2-c6g.8xlarge-arm64 # 32C64G
- ec2-c6g.16xlarge-arm64 # 64C128G
skip_test:
description: Do not run integration tests during the build
type: boolean
default: true
build_linux_amd64_artifacts:
type: boolean
description: Build linux-amd64 artifacts
required: false
default: true
build_linux_arm64_artifacts:
type: boolean
description: Build linux-arm64 artifacts
required: false
default: true
release_images:
type: boolean
description: Build and push images to DockerHub and ACR
required: false
default: true
# Use env variables to control all the release process.
env:
CARGO_PROFILE: nightly
# Controls whether to run tests, include unit-test, integration-test and sqlness.
DISABLE_RUN_TESTS: ${{ inputs.skip_test || vars.DEFAULT_SKIP_TEST }}
# Always use 'dev' to indicate it's the dev build.
NEXT_RELEASE_VERSION: dev
NIGHTLY_RELEASE_PREFIX: nightly
# Use the different image name to avoid conflict with the release images.
IMAGE_NAME: greptimedb-dev
# The source code will check out in the following path: '${WORKING_DIR}/dev/greptime'.
CHECKOUT_GREPTIMEDB_PATH: dev/greptimedb
jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
# The following EC2 resource id will be used for resource releasing.
linux-amd64-ec2-runner-label: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-amd64-ec2-runner-instance-id: ${{ steps.start-linux-amd64-runner.outputs.ec2-instance-id }}
linux-arm64-ec2-runner-label: ${{ steps.start-linux-arm64-runner.outputs.label }}
linux-arm64-ec2-runner-instance-id: ${{ steps.start-linux-arm64-runner.outputs.ec2-instance-id }}
# The 'version' use as the global tag name of the release workflow.
version: ${{ steps.create-version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Create version
id: create-version
run: |
version=$(./.github/scripts/create-version.sh) && \
echo $version && \
echo "version=$version" >> $GITHUB_OUTPUT
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_REF_NAME: ${{ github.ref_name }}
COMMIT_SHA: ${{ inputs.commit }}
NEXT_RELEASE_VERSION: ${{ env.NEXT_RELEASE_VERSION }}
NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}
- name: Allocate linux-amd64 runner
if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'schedule' }}
uses: ./.github/actions/start-runner
id: start-linux-amd64-runner
with:
runner: ${{ inputs.linux_amd64_runner || vars.DEFAULT_AMD64_RUNNER }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
image-id: ${{ vars.EC2_RUNNER_LINUX_AMD64_IMAGE_ID }}
security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
- name: Allocate linux-arm64 runner
if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'schedule' }}
uses: ./.github/actions/start-runner
id: start-linux-arm64-runner
with:
runner: ${{ inputs.linux_arm64_runner || vars.DEFAULT_ARM64_RUNNER }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
image-id: ${{ vars.EC2_RUNNER_LINUX_ARM64_IMAGE_ID }}
security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
build-linux-amd64-artifacts:
name: Build linux-amd64 artifacts
if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'schedule' }}
needs: [
allocate-runners,
]
runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Checkout greptimedb
uses: actions/checkout@v3
with:
repository: ${{ inputs.repository }}
ref: ${{ inputs.commit }}
path: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
- uses: ./.github/actions/build-linux-artifacts
with:
arch: amd64
cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
dev-mode: true # Only build the standard greptime binary.
upload-to-s3: false # No need to upload to S3.
working-dir: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
build-linux-arm64-artifacts:
name: Build linux-arm64 artifacts
if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'schedule' }}
needs: [
allocate-runners,
]
runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Checkout greptimedb
uses: actions/checkout@v3
with:
repository: ${{ inputs.repository }}
ref: ${{ inputs.commit }}
path: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
- uses: ./.github/actions/build-linux-artifacts
with:
arch: arm64
cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
dev-mode: true # Only build the standard greptime binary.
upload-to-s3: false # No need to upload to S3.
working-dir: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
release-images-to-dockerhub:
name: Build and push images to DockerHub
if: ${{ inputs.release_images || github.event_name == 'schedule' }}
needs: [
allocate-runners,
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-latest
outputs:
build-result: ${{ steps.set-build-result.outputs.build-result }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build and push images to dockerhub
uses: ./.github/actions/build-images
with:
image-registry: docker.io
image-namespace: ${{ vars.IMAGE_NAMESPACE }}
image-name: ${{ env.IMAGE_NAME }}
image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: false # Don't push the latest tag to registry.
dev-mode: true # Only build the standard images.
- name: Set build result
id: set-build-result
run: |
echo "build-result=success" >> $GITHUB_OUTPUT
release-images-to-acr:
name: Build and push images to ACR
if: ${{ inputs.release_images || github.event_name == 'schedule' }}
needs: [
allocate-runners,
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-latest
# When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
continue-on-error: true
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build and push images to ACR
uses: ./.github/actions/build-images
with:
image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
image-namespace: ${{ vars.IMAGE_NAMESPACE }}
image-name: ${{ env.IMAGE_NAME }}
image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: false # Don't push the latest tag to registry.
dev-mode: true # Only build the standard images.
stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
needs: [
allocate-runners,
build-linux-amd64-artifacts,
]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Stop EC2 runner
uses: ./.github/actions/stop-runner
with:
label: ${{ needs.allocate-runners.outputs.linux-amd64-ec2-runner-label }}
ec2-instance-id: ${{ needs.allocate-runners.outputs.linux-amd64-ec2-runner-instance-id }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
stop-linux-arm64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
needs: [
allocate-runners,
build-linux-arm64-artifacts,
]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Stop EC2 runner
uses: ./.github/actions/stop-runner
with:
label: ${{ needs.allocate-runners.outputs.linux-arm64-ec2-runner-label }}
ec2-instance-id: ${{ needs.allocate-runners.outputs.linux-arm64-ec2-runner-instance-id }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
notification:
if: ${{ always() }} # Not requiring successful dependent jobs, always run.
name: Send notification to Greptime team
needs: [
release-images-to-dockerhub
]
runs-on: ubuntu-latest
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:
- name: Notifiy nightly build successful result
uses: slackapi/slack-github-action@v1.23.0
if: ${{ needs.release-images-to-dockerhub.outputs.build-result == 'success' }}
with:
payload: |
{"text": "GreptimeDB's ${{ env.NEXT_RELEASE_VERSION }} build has completed successfully."}
- name: Notifiy nightly build failed result
uses: slackapi/slack-github-action@v1.23.0
if: ${{ needs.release-images-to-dockerhub.outputs.build-result != 'success' }}
with:
payload: |
{"text": "GreptimeDB's ${{ env.NEXT_RELEASE_VERSION }} build has failed, please check 'https://github.com/GreptimeTeam/greptimedb/actions/workflows/${{ env.NEXT_RELEASE_VERSION }}-build.yml'."}

View File

@@ -1,5 +1,4 @@
on: on:
merge_group:
pull_request: pull_request:
types: [opened, synchronize, reopened, ready_for_review] types: [opened, synchronize, reopened, ready_for_review]
paths-ignore: paths-ignore:
@@ -24,12 +23,8 @@ on:
name: CI name: CI
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env: env:
RUST_TOOLCHAIN: nightly-2023-08-07 RUST_TOOLCHAIN: nightly-2023-05-03
jobs: jobs:
typos: typos:
@@ -55,7 +50,7 @@ jobs:
- name: Rust Cache - name: Rust Cache
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
- name: Run cargo check - name: Run cargo check
run: cargo check --locked --workspace --all-targets run: cargo check --workspace --all-targets
toml: toml:
name: Toml Check name: Toml Check
@@ -66,22 +61,62 @@ jobs:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@master - uses: dtolnay/rust-toolchain@master
with: with:
toolchain: stable toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache - name: Rust Cache
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
- name: Install taplo - name: Install taplo
run: cargo +stable install taplo-cli --version ^0.8 --locked run: cargo install taplo-cli --version ^0.8 --locked
- name: Run taplo - name: Run taplo
run: taplo format --check run: taplo format --check --option "indent_string= "
# Use coverage to run test.
# test:
# name: Test Suite
# if: github.event.pull_request.draft == false
# runs-on: ubuntu-latest
# timeout-minutes: 60
# steps:
# - uses: actions/checkout@v3
# - name: Cache LLVM and Clang
# id: cache-llvm
# uses: actions/cache@v3
# with:
# path: ./llvm
# key: llvm
# - uses: arduino/setup-protoc@v1
# with:
# repo-token: ${{ secrets.GITHUB_TOKEN }}
# - uses: KyleMayes/install-llvm-action@v1
# with:
# version: "14.0"
# cached: ${{ steps.cache-llvm.outputs.cache-hit }}
# - uses: dtolnay/rust-toolchain@master
# with:
# toolchain: ${{ env.RUST_TOOLCHAIN }}
# - name: Rust Cache
# uses: Swatinem/rust-cache@v2
# - name: Cleanup disk
# uses: curoky/cleanup-disk-action@v2.0
# with:
# retain: 'rust,llvm'
# - name: Install latest nextest release
# uses: taiki-e/install-action@nextest
# - name: Run tests
# run: cargo nextest run
# env:
# CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
# RUST_BACKTRACE: 1
# GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
# GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
# GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
# UNITTEST_LOG_DIR: "__unittest_logs"
sqlness: sqlness:
name: Sqlness Test name: Sqlness Test
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ${{ matrix.os }} runs-on: ubuntu-latest-8-cores
strategy:
matrix:
os: [ ubuntu-latest-8-cores, windows-latest-8-cores ]
timeout-minutes: 60 timeout-minutes: 60
needs: [clippy]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v1
@@ -92,14 +127,25 @@ jobs:
toolchain: ${{ env.RUST_TOOLCHAIN }} toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: Rust Cache - name: Rust Cache
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
- name: Run etcd
run: |
ETCD_VER=v3.5.7
DOWNLOAD_URL=https://github.com/etcd-io/etcd/releases/download
curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
mkdir -p /tmp/etcd-download
tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
sudo cp -a /tmp/etcd-download/etcd* /usr/local/bin/
nohup etcd >/tmp/etcd.log 2>&1 &
- name: Run sqlness - name: Run sqlness
run: cargo sqlness run: cargo sqlness && ls /tmp
- name: Upload sqlness logs - name: Upload sqlness logs
if: always() if: always()
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3
with: with:
name: sqlness-logs name: sqlness-logs
path: ${{ runner.temp }}/greptime-*.log path: /tmp/greptime-*.log
retention-days: 3 retention-days: 3
fmt: fmt:
@@ -144,6 +190,7 @@ jobs:
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
runs-on: ubuntu-latest-8-cores runs-on: ubuntu-latest-8-cores
timeout-minutes: 60 timeout-minutes: 60
needs: [clippy]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1 - uses: arduino/setup-protoc@v1
@@ -188,43 +235,3 @@ jobs:
flags: rust flags: rust
fail_ci_if_error: false fail_ci_if_error: false
verbose: true verbose: true
test-on-windows:
if: github.event.pull_request.draft == false
runs-on: windows-latest-8-cores
timeout-minutes: 60
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v3
- uses: arduino/setup-protoc@v1
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
components: llvm-tools-preview
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install Cargo Nextest
uses: taiki-e/install-action@nextest
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install PyArrow Package
run: pip install pyarrow
- name: Install WSL distribution
uses: Vampire/setup-wsl@v2
with:
distribution: Ubuntu-22.04
- name: Running tests
run: cargo nextest run -F pyo3_backend,dashboard
env:
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.S3_ACCESS_KEY }}
GT_S3_REGION: ${{ secrets.S3_REGION }}
UNITTEST_LOG_DIR: "__unittest_logs"

View File

@@ -1,5 +1,4 @@
on: on:
merge_group:
pull_request: pull_request:
types: [opened, synchronize, reopened, ready_for_review] types: [opened, synchronize, reopened, ready_for_review]
paths: paths:
@@ -28,13 +27,6 @@ name: CI
# https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/defining-the-mergeability-of-pull-requests/troubleshooting-required-status-checks#handling-skipped-but-required-checks # https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/defining-the-mergeability-of-pull-requests/troubleshooting-required-status-checks#handling-skipped-but-required-checks
jobs: jobs:
typos:
name: Spell Check with Typos
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: crate-ci/typos@v1.13.10
check: check:
name: Check name: Check
if: github.event.pull_request.draft == false if: github.event.pull_request.draft == false
@@ -61,10 +53,3 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
sqlness:
name: Sqlness Test
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- run: 'echo "No action required"'

View File

@@ -1,311 +0,0 @@
# Nightly build only do the following things:
# 1. Run integration tests;
# 2. Build binaries and images for linux-amd64 and linux-arm64 platform;
name: GreptimeDB Nightly Build
on:
schedule:
# Trigger at 00:00(UTC) on every day-of-week from Monday through Friday.
- cron: '0 0 * * 1-5'
workflow_dispatch: # Allows you to run this workflow manually.
inputs:
linux_amd64_runner:
type: choice
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.2xlarge-amd64
options:
- ubuntu-latest
- ubuntu-latest-8-cores
- ubuntu-latest-16-cores
- ubuntu-latest-32-cores
- ubuntu-latest-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
- ec2-c6i.8xlarge-amd64 # 32C64G
- ec2-c6i.16xlarge-amd64 # 64C128G
linux_arm64_runner:
type: choice
description: The runner uses to build linux-arm64 artifacts
default: ec2-c6g.2xlarge-arm64
options:
- ec2-c6g.xlarge-arm64 # 4C8G
- ec2-c6g.2xlarge-arm64 # 8C16G
- ec2-c6g.4xlarge-arm64 # 16C32G
- ec2-c6g.8xlarge-arm64 # 32C64G
- ec2-c6g.16xlarge-arm64 # 64C128G
skip_test:
description: Do not run integration tests during the build
type: boolean
default: true
build_linux_amd64_artifacts:
type: boolean
description: Build linux-amd64 artifacts
required: false
default: false
build_linux_arm64_artifacts:
type: boolean
description: Build linux-arm64 artifacts
required: false
default: false
release_images:
type: boolean
description: Build and push images to DockerHub and ACR
required: false
default: false
# Use env variables to control all the release process.
env:
CARGO_PROFILE: nightly
# Controls whether to run tests, include unit-test, integration-test and sqlness.
DISABLE_RUN_TESTS: ${{ inputs.skip_test || vars.DEFAULT_SKIP_TEST }}
# Always use 'nightly' to indicate it's the nightly build.
NEXT_RELEASE_VERSION: nightly
NIGHTLY_RELEASE_PREFIX: nightly
jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
# The following EC2 resource id will be used for resource releasing.
linux-amd64-ec2-runner-label: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-amd64-ec2-runner-instance-id: ${{ steps.start-linux-amd64-runner.outputs.ec2-instance-id }}
linux-arm64-ec2-runner-label: ${{ steps.start-linux-arm64-runner.outputs.label }}
linux-arm64-ec2-runner-instance-id: ${{ steps.start-linux-arm64-runner.outputs.ec2-instance-id }}
# The 'version' use as the global tag name of the release workflow.
version: ${{ steps.create-version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Create version
id: create-version
run: |
version=$(./.github/scripts/create-version.sh) && \
echo $version && \
echo "version=$version" >> $GITHUB_OUTPUT
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_REF_NAME: ${{ github.ref_name }}
NEXT_RELEASE_VERSION: ${{ env.NEXT_RELEASE_VERSION }}
NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}
- name: Allocate linux-amd64 runner
if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'schedule' }}
uses: ./.github/actions/start-runner
id: start-linux-amd64-runner
with:
runner: ${{ inputs.linux_amd64_runner || vars.DEFAULT_AMD64_RUNNER }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
image-id: ${{ vars.EC2_RUNNER_LINUX_AMD64_IMAGE_ID }}
security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
- name: Allocate linux-arm64 runner
if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'schedule' }}
uses: ./.github/actions/start-runner
id: start-linux-arm64-runner
with:
runner: ${{ inputs.linux_arm64_runner || vars.DEFAULT_ARM64_RUNNER }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
image-id: ${{ vars.EC2_RUNNER_LINUX_ARM64_IMAGE_ID }}
security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
build-linux-amd64-artifacts:
name: Build linux-amd64 artifacts
if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'schedule' }}
needs: [
allocate-runners,
]
runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: ./.github/actions/build-linux-artifacts
with:
arch: amd64
cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
upload-latest-artifacts: false
build-linux-arm64-artifacts:
name: Build linux-arm64 artifacts
if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'schedule' }}
needs: [
allocate-runners,
]
runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: ./.github/actions/build-linux-artifacts
with:
arch: arm64
cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
upload-latest-artifacts: false
release-images-to-dockerhub:
name: Build and push images to DockerHub
if: ${{ inputs.release_images || github.event_name == 'schedule' }}
needs: [
allocate-runners,
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-latest
outputs:
nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build and push images to dockerhub
uses: ./.github/actions/build-images
with:
image-registry: docker.io
image-namespace: ${{ vars.IMAGE_NAMESPACE }}
image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: false # Don't push the latest tag to registry.
- name: Set nightly build result
id: set-nightly-build-result
run: |
echo "nightly-build-result=success" >> $GITHUB_OUTPUT
release-images-to-acr:
name: Build and push images to ACR
if: ${{ inputs.release_images || github.event_name == 'schedule' }}
needs: [
allocate-runners,
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-latest
# When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
continue-on-error: true
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build and push images to ACR
uses: ./.github/actions/build-images
with:
image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
image-namespace: ${{ vars.IMAGE_NAMESPACE }}
image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: false # Don't push the latest tag to registry.
stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
needs: [
allocate-runners,
build-linux-amd64-artifacts,
]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Stop EC2 runner
uses: ./.github/actions/stop-runner
with:
label: ${{ needs.allocate-runners.outputs.linux-amd64-ec2-runner-label }}
ec2-instance-id: ${{ needs.allocate-runners.outputs.linux-amd64-ec2-runner-instance-id }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
stop-linux-arm64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
needs: [
allocate-runners,
build-linux-arm64-artifacts,
]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Stop EC2 runner
uses: ./.github/actions/stop-runner
with:
label: ${{ needs.allocate-runners.outputs.linux-arm64-ec2-runner-label }}
ec2-instance-id: ${{ needs.allocate-runners.outputs.linux-arm64-ec2-runner-instance-id }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
notification:
if: ${{ always() }} # Not requiring successful dependent jobs, always run.
name: Send notification to Greptime team
needs: [
release-images-to-dockerhub
]
runs-on: ubuntu-latest
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:
- name: Notifiy nightly build successful result
uses: slackapi/slack-github-action@v1.23.0
if: ${{ needs.release-images-to-dockerhub.outputs.nightly-build-result == 'success' }}
with:
payload: |
{"text": "GreptimeDB's ${{ env.NEXT_RELEASE_VERSION }} build has completed successfully."}
- name: Notifiy nightly build failed result
uses: slackapi/slack-github-action@v1.23.0
if: ${{ needs.release-images-to-dockerhub.outputs.nightly-build-result != 'success' }}
with:
payload: |
{"text": "GreptimeDB's ${{ env.NEXT_RELEASE_VERSION }} build has failed, please check 'https://github.com/GreptimeTeam/greptimedb/actions/workflows/${{ env.NEXT_RELEASE_VERSION }}-build.yml'."}

View File

@@ -1,8 +1,3 @@
name: Release
# There are two kinds of formal release:
# 1. The tag('v*.*.*') push release: the release workflow will be triggered by the tag push event.
# 2. The scheduled release(the version will be '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD'): the release workflow will be triggered by the schedule event.
on: on:
push: push:
tags: tags:
@@ -10,386 +5,475 @@ on:
schedule: schedule:
# At 00:00 on Monday. # At 00:00 on Monday.
- cron: '0 0 * * 1' - cron: '0 0 * * 1'
workflow_dispatch: # Allows you to run this workflow manually. # Mannually trigger only builds binaries.
# Notes: The GitHub Actions ONLY support 10 inputs, and it's already used up. workflow_dispatch:
inputs: inputs:
linux_amd64_runner: dry_run:
type: choice description: 'Skip docker push and release steps'
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-latest
- ubuntu-latest-8-cores
- ubuntu-latest-16-cores
- ubuntu-latest-32-cores
- ubuntu-latest-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
- ec2-c6i.8xlarge-amd64 # 32C64G
- ec2-c6i.16xlarge-amd64 # 64C128G
linux_arm64_runner:
type: choice
description: The runner uses to build linux-arm64 artifacts
default: ec2-c6g.4xlarge-arm64
options:
- ec2-c6g.xlarge-arm64 # 4C8G
- ec2-c6g.2xlarge-arm64 # 8C16G
- ec2-c6g.4xlarge-arm64 # 16C32G
- ec2-c6g.8xlarge-arm64 # 32C64G
- ec2-c6g.16xlarge-arm64 # 64C128G
macos_runner:
type: choice
description: The runner uses to build macOS artifacts
default: macos-latest
options:
- macos-latest
skip_test:
description: Do not run integration tests during the build
type: boolean type: boolean
default: true default: true
build_linux_amd64_artifacts: skip_test:
description: 'Do not run tests during build'
type: boolean type: boolean
description: Build linux-amd64 artifacts
required: false
default: false
build_linux_arm64_artifacts:
type: boolean
description: Build linux-arm64 artifacts
required: false
default: false
build_macos_artifacts:
type: boolean
description: Build macos artifacts
required: false
default: false
release_artifacts:
type: boolean
description: Create GitHub release and upload artifacts
required: false
default: false
release_images:
type: boolean
description: Build and push images to DockerHub and ACR
required: false
default: false
release_dev_builder_image:
type: boolean
description: Release dev-builder image
required: false
default: false default: false
# Use env variables to control all the release process. name: Release
env: env:
# The arguments of building greptime. RUST_TOOLCHAIN: nightly-2023-05-03
RUST_TOOLCHAIN: nightly-2023-08-07
SCHEDULED_BUILD_VERSION_PREFIX: v0.3.1
SCHEDULED_PERIOD: nightly
CARGO_PROFILE: nightly CARGO_PROFILE: nightly
# Controls whether to run tests, include unit-test, integration-test and sqlness. # Controls whether to run tests, include unit-test, integration-test and sqlness.
DISABLE_RUN_TESTS: ${{ inputs.skip_test || vars.DEFAULT_SKIP_TEST }} DISABLE_RUN_TESTS: ${{ inputs.skip_test || false }}
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.4.0
jobs: jobs:
allocate-runners: build-macos:
name: Allocate runners name: Build macOS binary
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
macos-runner: ${{ inputs.macos_runner || vars.DEFAULT_MACOS_RUNNER }}
# The following EC2 resource id will be used for resource releasing.
linux-amd64-ec2-runner-label: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-amd64-ec2-runner-instance-id: ${{ steps.start-linux-amd64-runner.outputs.ec2-instance-id }}
linux-arm64-ec2-runner-label: ${{ steps.start-linux-arm64-runner.outputs.label }}
linux-arm64-ec2-runner-instance-id: ${{ steps.start-linux-arm64-runner.outputs.ec2-instance-id }}
# The 'version' use as the global tag name of the release workflow.
version: ${{ steps.create-version.outputs.version }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
# The create-version will create a global variable named 'version' in the global workflows.
# - If it's a tag push release, the version is the tag name(${{ github.ref_name }});
# - If it's a scheduled release, the version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-$buildTime', like v0.2.0-nigthly-20230313;
# - If it's a manual release, the version is '${{ env.NEXT_RELEASE_VERSION }}-<short-git-sha>-YYYYMMDDSS', like v0.2.0-e5b243c-2023071245;
- name: Create version
id: create-version
run: |
echo "version=$(./.github/scripts/create-version.sh)" >> $GITHUB_OUTPUT
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_REF_NAME: ${{ github.ref_name }}
NEXT_RELEASE_VERSION: ${{ env.NEXT_RELEASE_VERSION }}
NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}
- name: Allocate linux-amd64 runner
if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
uses: ./.github/actions/start-runner
id: start-linux-amd64-runner
with:
runner: ${{ inputs.linux_amd64_runner || vars.DEFAULT_AMD64_RUNNER }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
image-id: ${{ vars.EC2_RUNNER_LINUX_AMD64_IMAGE_ID }}
security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
- name: Allocate linux-arm64 runner
if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
uses: ./.github/actions/start-runner
id: start-linux-arm64-runner
with:
runner: ${{ inputs.linux_arm64_runner || vars.DEFAULT_ARM64_RUNNER }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
image-id: ${{ vars.EC2_RUNNER_LINUX_ARM64_IMAGE_ID }}
security-group-id: ${{ vars.EC2_RUNNER_SECURITY_GROUP_ID }}
subnet-id: ${{ vars.EC2_RUNNER_SUBNET_ID }}
build-linux-amd64-artifacts:
name: Build linux-amd64 artifacts
if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [
allocate-runners,
]
runs-on: ${{ needs.allocate-runners.outputs.linux-amd64-runner }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: ./.github/actions/build-linux-artifacts
with:
arch: amd64
cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
build-linux-arm64-artifacts:
name: Build linux-arm64 artifacts
if: ${{ inputs.build_linux_arm64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [
allocate-runners,
]
runs-on: ${{ needs.allocate-runners.outputs.linux-arm64-runner }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: ./.github/actions/build-linux-artifacts
with:
arch: arm64
cargo-profile: ${{ env.CARGO_PROFILE }}
version: ${{ needs.allocate-runners.outputs.version }}
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }}
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
build-macos-artifacts:
name: Build macOS artifacts
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
# The file format is greptime-<os>-<arch>
include: include:
- os: ${{ needs.allocate-runners.outputs.macos-runner }} - arch: aarch64-apple-darwin
arch: aarch64-apple-darwin os: macos-latest
features: servers/dashboard file: greptime-darwin-arm64
artifacts-dir-prefix: greptime-darwin-arm64 continue-on-error: false
- os: ${{ needs.allocate-runners.outputs.macos-runner }} opts: "-F servers/dashboard"
arch: aarch64-apple-darwin - arch: x86_64-apple-darwin
features: pyo3_backend,servers/dashboard os: macos-latest
artifacts-dir-prefix: greptime-darwin-arm64-pyo3 file: greptime-darwin-amd64
- os: ${{ needs.allocate-runners.outputs.macos-runner }} continue-on-error: false
features: servers/dashboard opts: "-F servers/dashboard"
arch: x86_64-apple-darwin - arch: aarch64-apple-darwin
artifacts-dir-prefix: greptime-darwin-amd64 os: macos-latest
- os: ${{ needs.allocate-runners.outputs.macos-runner }} file: greptime-darwin-arm64-pyo3
features: pyo3_backend,servers/dashboard continue-on-error: false
arch: x86_64-apple-darwin opts: "-F pyo3_backend,servers/dashboard"
artifacts-dir-prefix: greptime-darwin-amd64-pyo3 - arch: x86_64-apple-darwin
os: macos-latest
file: greptime-darwin-amd64-pyo3
continue-on-error: false
opts: "-F pyo3_backend,servers/dashboard"
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
needs: [ continue-on-error: ${{ matrix.continue-on-error }}
allocate-runners, if: github.repository == 'GreptimeTeam/greptimedb'
]
if: ${{ inputs.build_macos_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
steps: steps:
- uses: actions/checkout@v3 - name: Checkout sources
with: uses: actions/checkout@v3
fetch-depth: 0
- uses: ./.github/actions/build-macos-artifacts - name: Cache cargo assets
id: cache
uses: actions/cache@v3
with: with:
arch: ${{ matrix.arch }} path: |
rust-toolchain: ${{ env.RUST_TOOLCHAIN }} ~/.cargo/bin/
cargo-profile: ${{ env.CARGO_PROFILE }} ~/.cargo/registry/index/
features: ${{ matrix.features }} ~/.cargo/registry/cache/
version: ${{ needs.allocate-runners.outputs.version }} ~/.cargo/git/db/
disable-run-tests: ${{ env.DISABLE_RUN_TESTS }} target/
release-to-s3-bucket: ${{ vars.AWS_RELEASE_BUCKET }} key: ${{ matrix.arch }}-build-cargo-${{ hashFiles('**/Cargo.lock') }}
artifacts-dir: ${{ matrix.artifacts-dir-prefix }}-${{ needs.allocate-runners.outputs.version }}
aws-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
release-images-to-dockerhub: - name: Install Protoc for macos
name: Build and push images to DockerHub if: contains(matrix.arch, 'darwin')
if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }} run: |
needs: [ brew install protobuf
allocate-runners,
build-linux-amd64-artifacts, - name: Install etcd for macos
build-linux-arm64-artifacts, if: contains(matrix.arch, 'darwin')
] run: |
runs-on: ubuntu-2004-16-cores brew install etcd
brew services start etcd
- name: Install rust toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
targets: ${{ matrix.arch }}
- name: Install latest nextest release
uses: taiki-e/install-action@nextest
- name: Output package versions
run: protoc --version ; cargo version ; rustc --version ; gcc --version ; g++ --version
- name: Run tests
if: env.DISABLE_RUN_TESTS == 'false'
run: make test sqlness-test
- name: Run cargo build
if: contains(matrix.arch, 'darwin') || contains(matrix.opts, 'pyo3_backend') == false
run: cargo build --profile ${{ env.CARGO_PROFILE }} --locked --target ${{ matrix.arch }} ${{ matrix.opts }}
- name: Calculate checksum and rename binary
shell: bash
run: |
cd target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}
chmod +x greptime
tar -zcvf ${{ matrix.file }}.tgz greptime
echo $(shasum -a 256 ${{ matrix.file }}.tgz | cut -f1 -d' ') > ${{ matrix.file }}.sha256sum
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.file }}
path: target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}/${{ matrix.file }}.tgz
- name: Upload checksum of artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.file }}.sha256sum
path: target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}/${{ matrix.file }}.sha256sum
build-linux:
name: Build linux binary
strategy:
fail-fast: false
matrix:
# The file format is greptime-<os>-<arch>
include:
- arch: x86_64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-amd64
continue-on-error: false
opts: "-F servers/dashboard"
- arch: aarch64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-arm64
continue-on-error: false
opts: "-F servers/dashboard"
- arch: x86_64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-amd64-pyo3
continue-on-error: false
opts: "-F pyo3_backend,servers/dashboard"
- arch: aarch64-unknown-linux-gnu
os: ubuntu-2004-16-cores
file: greptime-linux-arm64-pyo3
continue-on-error: false
opts: "-F pyo3_backend,servers/dashboard"
runs-on: ${{ matrix.os }}
continue-on-error: ${{ matrix.continue-on-error }}
if: github.repository == 'GreptimeTeam/greptimedb'
steps: steps:
- uses: actions/checkout@v3 - name: Checkout sources
with: uses: actions/checkout@v3
fetch-depth: 0
- name: Build and push images to dockerhub - name: Cache cargo assets
uses: ./.github/actions/build-images id: cache
uses: actions/cache@v3
with: with:
image-registry: docker.io path: |
image-namespace: ${{ vars.IMAGE_NAMESPACE }} ~/.cargo/bin/
image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }} ~/.cargo/registry/index/
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }} ~/.cargo/registry/cache/
version: ${{ needs.allocate-runners.outputs.version }} ~/.cargo/git/db/
target/
key: ${{ matrix.arch }}-build-cargo-${{ hashFiles('**/Cargo.lock') }}
release-images-to-acr: - name: Install Protoc for linux
name: Build and push images to ACR if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
if: ${{ inputs.release_images || github.event_name == 'push' || github.event_name == 'schedule' }} run: | # Make sure the protoc is >= 3.15
needs: [ wget https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip
allocate-runners, unzip protoc-21.9-linux-x86_64.zip -d protoc
build-linux-amd64-artifacts, sudo cp protoc/bin/protoc /usr/local/bin/
build-linux-arm64-artifacts, sudo cp -r protoc/include/google /usr/local/include/
]
runs-on: ubuntu-2004-16-cores - name: Install etcd for linux
# When we push to ACR, it's easy to fail due to some unknown network issues. if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
# However, we don't want to fail the whole workflow because of this. run: |
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated. ETCD_VER=v3.5.7
DOWNLOAD_URL=https://github.com/etcd-io/etcd/releases/download
curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
mkdir -p /tmp/etcd-download
tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download --strip-components=1
rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz
sudo cp -a /tmp/etcd-download/etcd* /usr/local/bin/
nohup etcd >/tmp/etcd.log 2>&1 &
- name: Install dependencies for linux
if: contains(matrix.arch, 'linux') && endsWith(matrix.arch, '-gnu')
run: |
sudo apt-get -y update
sudo apt-get -y install libssl-dev pkg-config g++-aarch64-linux-gnu gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu wget
# FIXME(zyy17): Should we specify the version of python when building binary for darwin?
- name: Compile Python 3.10.10 from source for linux
if: contains(matrix.arch, 'linux') && contains(matrix.opts, 'pyo3_backend')
run: |
sudo chmod +x ./docker/aarch64/compile-python.sh
sudo ./docker/aarch64/compile-python.sh ${{ matrix.arch }}
- name: Install rust toolchain
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
targets: ${{ matrix.arch }}
- name: Install latest nextest release
uses: taiki-e/install-action@nextest
- name: Output package versions
run: protoc --version ; cargo version ; rustc --version ; gcc --version ; g++ --version
- name: Run tests
if: env.DISABLE_RUN_TESTS == 'false'
run: make test sqlness-test
- name: Run cargo build
if: contains(matrix.arch, 'darwin') || contains(matrix.opts, 'pyo3_backend') == false
run: cargo build --profile ${{ env.CARGO_PROFILE }} --locked --target ${{ matrix.arch }} ${{ matrix.opts }}
- name: Run cargo build with pyo3 for aarch64-linux
if: contains(matrix.arch, 'aarch64-unknown-linux-gnu') && contains(matrix.opts, 'pyo3_backend')
run: |
# TODO(zyy17): We should make PYO3_CROSS_LIB_DIR configurable.
export PYTHON_INSTALL_PATH_AMD64=${PWD}/python-3.10.10/amd64
export LD_LIBRARY_PATH=$PYTHON_INSTALL_PATH_AMD64/lib:$LD_LIBRARY_PATH
export LIBRARY_PATH=$PYTHON_INSTALL_PATH_AMD64/lib:$LIBRARY_PATH
export PATH=$PYTHON_INSTALL_PATH_AMD64/bin:$PATH
export PYO3_CROSS_LIB_DIR=${PWD}/python-3.10.10/aarch64
echo "PYO3_CROSS_LIB_DIR: $PYO3_CROSS_LIB_DIR"
alias python=$PYTHON_INSTALL_PATH_AMD64/bin/python3
alias pip=$PYTHON_INSTALL_PATH_AMD64/bin/python3-pip
cargo build --profile ${{ env.CARGO_PROFILE }} --locked --target ${{ matrix.arch }} ${{ matrix.opts }}
- name: Run cargo build with pyo3 for amd64-linux
if: contains(matrix.arch, 'x86_64-unknown-linux-gnu') && contains(matrix.opts, 'pyo3_backend')
run: |
export PYTHON_INSTALL_PATH_AMD64=${PWD}/python-3.10.10/amd64
export LD_LIBRARY_PATH=$PYTHON_INSTALL_PATH_AMD64/lib:$LD_LIBRARY_PATH
export LIBRARY_PATH=$PYTHON_INSTALL_PATH_AMD64/lib:$LIBRARY_PATH
export PATH=$PYTHON_INSTALL_PATH_AMD64/bin:$PATH
echo "implementation=CPython" >> pyo3.config
echo "version=3.10" >> pyo3.config
echo "implementation=CPython" >> pyo3.config
echo "shared=true" >> pyo3.config
echo "abi3=true" >> pyo3.config
echo "lib_name=python3.10" >> pyo3.config
echo "lib_dir=$PYTHON_INSTALL_PATH_AMD64/lib" >> pyo3.config
echo "executable=$PYTHON_INSTALL_PATH_AMD64/bin/python3" >> pyo3.config
echo "pointer_width=64" >> pyo3.config
echo "build_flags=" >> pyo3.config
echo "suppress_build_script_link_lines=false" >> pyo3.config
cat pyo3.config
export PYO3_CONFIG_FILE=${PWD}/pyo3.config
alias python=$PYTHON_INSTALL_PATH_AMD64/bin/python3
alias pip=$PYTHON_INSTALL_PATH_AMD64/bin/python3-pip
cargo build --profile ${{ env.CARGO_PROFILE }} --locked --target ${{ matrix.arch }} ${{ matrix.opts }}
- name: Calculate checksum and rename binary
shell: bash
run: |
cd target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}
chmod +x greptime
tar -zcvf ${{ matrix.file }}.tgz greptime
echo $(shasum -a 256 ${{ matrix.file }}.tgz | cut -f1 -d' ') > ${{ matrix.file }}.sha256sum
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.file }}
path: target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}/${{ matrix.file }}.tgz
- name: Upload checksum of artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.file }}.sha256sum
path: target/${{ matrix.arch }}/${{ env.CARGO_PROFILE }}/${{ matrix.file }}.sha256sum
docker:
name: Build docker image
needs: [build-linux, build-macos]
runs-on: ubuntu-latest
if: github.repository == 'GreptimeTeam/greptimedb' && !(inputs.dry_run || false)
steps:
- name: Checkout sources
uses: actions/checkout@v3
- name: Login to Dockerhub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Configure scheduled build image tag # the tag would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}
shell: bash
if: github.event_name != 'push'
run: |
buildTime=`date "+%Y%m%d"`
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
echo "IMAGE_TAG=${SCHEDULED_BUILD_VERSION:1}" >> $GITHUB_ENV
- name: Configure tag # If the release tag is v0.1.0, then the image version tag will be 0.1.0.
shell: bash
if: github.event_name == 'push'
run: |
VERSION=${{ github.ref_name }}
echo "IMAGE_TAG=${VERSION:1}" >> $GITHUB_ENV
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up buildx
uses: docker/setup-buildx-action@v2
- name: Download amd64 binary
uses: actions/download-artifact@v3
with:
name: greptime-linux-amd64-pyo3
path: amd64
- name: Unzip the amd64 artifacts
run: |
tar xvf amd64/greptime-linux-amd64-pyo3.tgz -C amd64/ && rm amd64/greptime-linux-amd64-pyo3.tgz
cp -r amd64 docker/ci
- name: Download arm64 binary
id: download-arm64
uses: actions/download-artifact@v3
with:
name: greptime-linux-arm64-pyo3
path: arm64
- name: Unzip the arm64 artifacts
id: unzip-arm64
if: success() || steps.download-arm64.conclusion == 'success'
run: |
tar xvf arm64/greptime-linux-arm64-pyo3.tgz -C arm64/ && rm arm64/greptime-linux-arm64-pyo3.tgz
cp -r arm64 docker/ci
- name: Build and push all
uses: docker/build-push-action@v3
if: success() || steps.unzip-arm64.conclusion == 'success' # Build and push all platform if unzip-arm64 succeeds
with:
context: ./docker/ci/
file: ./docker/ci/Dockerfile
push: true
platforms: linux/amd64,linux/arm64
tags: |
greptime/greptimedb:latest
greptime/greptimedb:${{ env.IMAGE_TAG }}
- name: Build and push amd64 only
uses: docker/build-push-action@v3
if: success() || steps.download-arm64.conclusion == 'failure' # Only build and push amd64 platform if download-arm64 fails
with:
context: ./docker/ci/
file: ./docker/ci/Dockerfile
push: true
platforms: linux/amd64
tags: |
greptime/greptimedb:latest
greptime/greptimedb:${{ env.IMAGE_TAG }}
release:
name: Release artifacts
# Release artifacts only when all the artifacts are built successfully.
needs: [build-linux, build-macos, docker]
runs-on: ubuntu-latest
if: github.repository == 'GreptimeTeam/greptimedb' && !(inputs.dry_run || false)
steps:
- name: Checkout sources
uses: actions/checkout@v3
- name: Download artifacts
uses: actions/download-artifact@v3
- name: Configure scheduled build version # the version would be ${SCHEDULED_BUILD_VERSION_PREFIX}-${SCHEDULED_PERIOD}-YYYYMMDD, like v0.2.0-nigthly-20230313.
shell: bash
if: github.event_name != 'push'
run: |
buildTime=`date "+%Y%m%d"`
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-${{ env.SCHEDULED_PERIOD }}-$buildTime
echo "SCHEDULED_BUILD_VERSION=${SCHEDULED_BUILD_VERSION}" >> $GITHUB_ENV
# Only publish release when the release tag is like v1.0.0, v1.0.1, v1.0.2, etc.
- name: Set whether it is the latest release
run: |
if [[ "${{ github.ref_name }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "prerelease=false" >> $GITHUB_ENV
echo "makeLatest=true" >> $GITHUB_ENV
else
echo "prerelease=true" >> $GITHUB_ENV
echo "makeLatest=false" >> $GITHUB_ENV
fi
- name: Create scheduled build git tag
if: github.event_name != 'push'
run: |
git tag ${{ env.SCHEDULED_BUILD_VERSION }}
- name: Publish scheduled release # configure the different release title and tags.
uses: ncipollo/release-action@v1
if: github.event_name != 'push'
with:
name: "Release ${{ env.SCHEDULED_BUILD_VERSION }}"
prerelease: ${{ env.prerelease }}
makeLatest: ${{ env.makeLatest }}
tag: ${{ env.SCHEDULED_BUILD_VERSION }}
generateReleaseNotes: true
artifacts: |
**/greptime-*
- name: Publish release
uses: ncipollo/release-action@v1
if: github.event_name == 'push'
with:
name: "${{ github.ref_name }}"
prerelease: ${{ env.prerelease }}
makeLatest: ${{ env.makeLatest }}
generateReleaseNotes: true
artifacts: |
**/greptime-*
docker-push-acr:
name: Push docker image to alibaba cloud container registry
needs: [docker]
runs-on: ubuntu-latest
if: github.repository == 'GreptimeTeam/greptimedb' && !(inputs.dry_run || false)
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v3 - name: Checkout sources
with:
fetch-depth: 0
- name: Build and push images to ACR
uses: ./.github/actions/build-images
with:
image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
image-namespace: ${{ vars.IMAGE_NAMESPACE }}
image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
version: ${{ needs.allocate-runners.outputs.version }}
release-artifacts:
name: Create GitHub release and upload artifacts
if: ${{ inputs.release_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [
allocate-runners,
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
build-macos-artifacts,
release-images-to-dockerhub,
]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Release artifacts
uses: ./.github/actions/release-artifacts
with:
version: ${{ needs.allocate-runners.outputs.version }}
release-dev-builder-image:
name: Release dev builder image
if: ${{ inputs.release_dev_builder_image }} # Only manually trigger this job.
runs-on: ubuntu-latest-16-cores
steps:
- name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build and push dev builder image - name: Set up QEMU
uses: ./.github/actions/build-dev-builder-image uses: docker/setup-qemu-action@v2
with:
dockerhub-image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
dockerhub-image-registry-token: ${{ secrets.DOCKERHUB_TOKEN }}
acr-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
acr-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
acr-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
### Stop runners ### - name: Set up Docker Buildx
# It's very necessary to split the job of releasing runners into 'stop-linux-amd64-runner' and 'stop-linux-arm64-runner'. uses: docker/setup-buildx-action@v2
# Because we can terminate the specified EC2 instance immediately after the job is finished without uncessary waiting.
stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
needs: [
allocate-runners,
build-linux-amd64-artifacts,
]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Stop EC2 runner - name: Login to alibaba cloud container registry
uses: ./.github/actions/stop-runner uses: docker/login-action@v2
with: with:
label: ${{ needs.allocate-runners.outputs.linux-amd64-ec2-runner-label }} registry: registry.cn-hangzhou.aliyuncs.com
ec2-instance-id: ${{ needs.allocate-runners.outputs.linux-amd64-ec2-runner-instance-id }} username: ${{ secrets.ALICLOUD_USERNAME }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} password: ${{ secrets.ALICLOUD_PASSWORD }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
stop-linux-arm64-runner: # It's always run as the last job in the workflow to make sure that the runner is released. - name: Configure scheduled build image tag # the tag would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}
name: Stop linux-arm64 runner shell: bash
# Only run this job when the runner is allocated. if: github.event_name != 'push'
if: ${{ always() }} run: |
runs-on: ubuntu-latest buildTime=`date "+%Y%m%d"`
needs: [ SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
allocate-runners, echo "IMAGE_TAG=${SCHEDULED_BUILD_VERSION:1}" >> $GITHUB_ENV
build-linux-arm64-artifacts,
]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Stop EC2 runner - name: Configure tag # If the release tag is v0.1.0, then the image version tag will be 0.1.0.
uses: ./.github/actions/stop-runner shell: bash
with: if: github.event_name == 'push'
label: ${{ needs.allocate-runners.outputs.linux-arm64-ec2-runner-label }} run: |
ec2-instance-id: ${{ needs.allocate-runners.outputs.linux-arm64-ec2-runner-instance-id }} VERSION=${{ github.ref_name }}
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} echo "IMAGE_TAG=${VERSION:1}" >> $GITHUB_ENV
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.EC2_RUNNER_REGION }} - name: Push image to alibaba cloud container registry # Use 'docker buildx imagetools create' to create a new image base on source image.
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} run: |
docker buildx imagetools create \
--tag registry.cn-hangzhou.aliyuncs.com/greptime/greptimedb:latest \
--tag registry.cn-hangzhou.aliyuncs.com/greptime/greptimedb:${{ env.IMAGE_TAG }} \
greptime/greptimedb:${{ env.IMAGE_TAG }}

2
.gitignore vendored
View File

@@ -44,5 +44,3 @@ benchmarks/data
# Vscode workspace # Vscode workspace
*.code-workspace *.code-workspace
venv/

2551
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,6 @@
members = [ members = [
"benchmarks", "benchmarks",
"src/api", "src/api",
"src/auth",
"src/catalog", "src/catalog",
"src/client", "src/client",
"src/cmd", "src/cmd",
@@ -12,13 +11,13 @@ members = [
"src/common/error", "src/common/error",
"src/common/function", "src/common/function",
"src/common/function-macro", "src/common/function-macro",
"src/common/greptimedb-telemetry",
"src/common/grpc", "src/common/grpc",
"src/common/grpc-expr", "src/common/grpc-expr",
"src/common/mem-prof", "src/common/mem-prof",
"src/common/meta", "src/common/meta",
"src/common/procedure", "src/common/procedure",
"src/common/procedure-test", "src/common/procedure-test",
"src/common/pprof",
"src/common/query", "src/common/query",
"src/common/recordbatch", "src/common/recordbatch",
"src/common/runtime", "src/common/runtime",
@@ -26,7 +25,6 @@ members = [
"src/common/telemetry", "src/common/telemetry",
"src/common/test-util", "src/common/test-util",
"src/common/time", "src/common/time",
"src/common/version",
"src/datanode", "src/datanode",
"src/datatypes", "src/datatypes",
"src/file-table-engine", "src/file-table-engine",
@@ -35,7 +33,6 @@ members = [
"src/meta-client", "src/meta-client",
"src/meta-srv", "src/meta-srv",
"src/mito", "src/mito",
"src/mito2",
"src/object-store", "src/object-store",
"src/partition", "src/partition",
"src/promql", "src/promql",
@@ -46,117 +43,56 @@ members = [
"src/sql", "src/sql",
"src/storage", "src/storage",
"src/store-api", "src/store-api",
"src/flow",
"src/table", "src/table",
"src/table-procedure", "src/table-procedure",
"tests-integration", "tests-integration",
"tests/runner", "tests/runner",
] ]
resolver = "2"
[workspace.package] [workspace.package]
version = "0.4.0-nightly" version = "0.3.1"
edition = "2021" edition = "2021"
license = "Apache-2.0" license = "Apache-2.0"
[workspace.dependencies] [workspace.dependencies]
arrow = { version = "43.0" } arrow = { version = "40.0" }
etcd-client = "0.11" arrow-array = "40.0"
arrow-array = "43.0" arrow-flight = "40.0"
arrow-flight = "43.0" arrow-schema = { version = "40.0", features = ["serde"] }
arrow-schema = { version = "43.0", features = ["serde"] }
async-stream = "0.3" async-stream = "0.3"
async-trait = "0.1" async-trait = "0.1"
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } # TODO(ruihang): use arrow-datafusion when it contains https://github.com/apache/arrow-datafusion/pull/6032
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" } datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
derive_builder = "0.12" datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "63e52dde9e44cac4b1f6c6e6b6bf6368ba3bd323" }
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "4a277f27caa035a801d5b9c020a0449777736614" } greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "7aeaeaba1e0ca6a5c736b6ab2eb63144ae3d284b" }
humantime-serde = "1.1"
itertools = "0.10" itertools = "0.10"
lazy_static = "1.4" parquet = "40.0"
once_cell = "1.18"
opentelemetry-proto = { version = "0.2", features = ["gen-tonic", "metrics"] }
parquet = "43.0"
paste = "1.0" paste = "1.0"
prost = "0.11" prost = "0.11"
rand = "0.8" rand = "0.8"
regex = "1.8"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] } snafu = { version = "0.7", features = ["backtraces"] }
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "296a4f6c73b129d6f565a42a2e5e53c6bc2b9da4", features = [ sqlparser = "0.34"
"visitor",
] }
strum = { version = "0.25", features = ["derive"] }
tempfile = "3" tempfile = "3"
tokio = { version = "1.28", features = ["full"] } tokio = { version = "1.28", features = ["full"] }
tokio-util = { version = "0.7", features = ["io-util", "compat"] } tokio-util = { version = "0.7", features = ["io-util", "compat"] }
toml = "0.7"
tonic = { version = "0.9", features = ["tls"] } tonic = { version = "0.9", features = ["tls"] }
uuid = { version = "1", features = ["serde", "v4", "fast-rng"] } uuid = { version = "1", features = ["serde", "v4", "fast-rng"] }
metrics = "0.20" metrics = "0.20"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" } meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "f0798c4c648d89f51abe63e870919c75dd463199" }
## workspaces members
api = { path = "src/api" }
auth = { path = "src/auth" }
catalog = { path = "src/catalog" }
client = { path = "src/client" }
cmd = { path = "src/cmd" }
common-base = { path = "src/common/base" }
common-catalog = { path = "src/common/catalog" }
common-datasource = { path = "src/common/datasource" }
common-error = { path = "src/common/error" }
common-function = { path = "src/common/function" }
common-function-macro = { path = "src/common/function-macro" }
common-greptimedb-telemetry = { path = "src/common/greptimedb-telemetry" }
common-grpc = { path = "src/common/grpc" }
common-grpc-expr = { path = "src/common/grpc-expr" }
common-mem-prof = { path = "src/common/mem-prof" }
common-meta = { path = "src/common/meta" }
common-procedure = { path = "src/common/procedure" }
common-procedure-test = { path = "src/common/procedure-test" }
common-pprof = { path = "src/common/pprof" }
common-query = { path = "src/common/query" }
common-recordbatch = { path = "src/common/recordbatch" }
common-runtime = { path = "src/common/runtime" }
substrait = { path = "src/common/substrait" }
common-telemetry = { path = "src/common/telemetry" }
common-test-util = { path = "src/common/test-util" }
common-time = { path = "src/common/time" }
common-version = { path = "src/common/version" }
datanode = { path = "src/datanode" }
datatypes = { path = "src/datatypes" }
file-table-engine = { path = "src/file-table-engine" }
frontend = { path = "src/frontend" }
log-store = { path = "src/log-store" }
meta-client = { path = "src/meta-client" }
meta-srv = { path = "src/meta-srv" }
mito = { path = "src/mito" }
mito2 = { path = "src/mito2" }
object-store = { path = "src/object-store" }
partition = { path = "src/partition" }
promql = { path = "src/promql" }
query = { path = "src/query" }
script = { path = "src/script" }
servers = { path = "src/servers" }
session = { path = "src/session" }
sql = { path = "src/sql" }
storage = { path = "src/storage" }
store-api = { path = "src/store-api" }
table = { path = "src/table" }
table-procedure = { path = "src/table-procedure" }
[workspace.dependencies.meter-macros] [workspace.dependencies.meter-macros]
git = "https://github.com/GreptimeTeam/greptime-meter.git" git = "https://github.com/GreptimeTeam/greptime-meter.git"
rev = "abbd357c1e193cd270ea65ee7652334a150b628f" rev = "f0798c4c648d89f51abe63e870919c75dd463199"
[profile.release] [profile.release]
debug = true debug = true

View File

@@ -1,7 +1,7 @@
[build] [build]
pre-build = [ pre-build = [
"dpkg --add-architecture $CROSS_DEB_ARCH", "dpkg --add-architecture $CROSS_DEB_ARCH",
"apt update && apt install -y unzip zlib1g-dev zlib1g-dev:$CROSS_DEB_ARCH", "apt update && apt install -y unzip zlib1g-dev:$CROSS_DEB_ARCH",
"curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip && unzip protoc-3.15.8-linux-x86_64.zip -d /usr/", "curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip && unzip protoc-3.15.8-linux-x86_64.zip -d /usr/",
"chmod a+x /usr/bin/protoc && chmod -R a+rx /usr/include/google", "chmod a+x /usr/bin/protoc && chmod -R a+rx /usr/include/google",
] ]

125
Makefile
View File

@@ -1,77 +1,15 @@
# The arguments for building images. IMAGE_REGISTRY ?= greptimedb
CARGO_PROFILE ?=
FEATURES ?=
TARGET_DIR ?=
TARGET ?=
CARGO_BUILD_OPTS := --locked
IMAGE_REGISTRY ?= docker.io
IMAGE_NAMESPACE ?= greptime
IMAGE_TAG ?= latest IMAGE_TAG ?= latest
BUILDX_MULTI_PLATFORM_BUILD ?= false
BUILDX_BUILDER_NAME ?= gtbuilder
BASE_IMAGE ?= ubuntu
RUST_TOOLCHAIN ?= $(shell cat rust-toolchain.toml | grep channel | cut -d'"' -f2)
CARGO_REGISTRY_CACHE ?= ${HOME}/.cargo/registry
ARCH := $(shell uname -m | sed 's/x86_64/amd64/' | sed 's/aarch64/arm64/')
OUTPUT_DIR := $(shell if [ "$(RELEASE)" = "true" ]; then echo "release"; elif [ ! -z "$(CARGO_PROFILE)" ]; then echo "$(CARGO_PROFILE)" ; else echo "debug"; fi)
# The arguments for running integration tests.
ETCD_VERSION ?= v3.5.9
ETCD_IMAGE ?= quay.io/coreos/etcd:${ETCD_VERSION}
RETRY_COUNT ?= 3
NEXTEST_OPTS := --retries ${RETRY_COUNT}
BUILD_JOBS ?= $(shell which nproc 1>/dev/null && expr $$(nproc) / 2) # If nproc is not available, we don't set the build jobs.
ifeq ($(BUILD_JOBS), 0) # If the number of cores is less than 2, set the build jobs to 1.
BUILD_JOBS := 1
endif
ifneq ($(strip $(BUILD_JOBS)),)
NEXTEST_OPTS += --build-jobs=${BUILD_JOBS}
endif
ifneq ($(strip $(CARGO_PROFILE)),)
CARGO_BUILD_OPTS += --profile ${CARGO_PROFILE}
endif
ifneq ($(strip $(FEATURES)),)
CARGO_BUILD_OPTS += --features ${FEATURES}
endif
ifneq ($(strip $(TARGET_DIR)),)
CARGO_BUILD_OPTS += --target-dir ${TARGET_DIR}
endif
ifneq ($(strip $(TARGET)),)
CARGO_BUILD_OPTS += --target ${TARGET}
endif
ifneq ($(strip $(RELEASE)),)
CARGO_BUILD_OPTS += --release
endif
ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), true)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64,linux/arm64 --push
else
BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
endif
##@ Build ##@ Build
.PHONY: build .PHONY: build
build: ## Build debug version greptime. build: ## Build debug version greptime.
cargo build ${CARGO_BUILD_OPTS} cargo build
.POHNY: build-by-dev-builder .PHONY: release
build-by-dev-builder: ## Build greptime by dev-builder. release: ## Build release version greptime.
docker run --network=host \ cargo build --release
-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry \
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \
make build \
CARGO_PROFILE=${CARGO_PROFILE} \
FEATURES=${FEATURES} \
TARGET_DIR=${TARGET_DIR} \
TARGET=${TARGET} \
RELEASE=${RELEASE}
.PHONY: clean .PHONY: clean
clean: ## Clean the project. clean: ## Clean the project.
@@ -83,42 +21,20 @@ fmt: ## Format all the Rust code.
.PHONY: fmt-toml .PHONY: fmt-toml
fmt-toml: ## Format all TOML files. fmt-toml: ## Format all TOML files.
taplo format taplo format --option "indent_string= "
.PHONY: check-toml .PHONY: check-toml
check-toml: ## Check all TOML files. check-toml: ## Check all TOML files.
taplo format --check taplo format --check --option "indent_string= "
.PHONY: docker-image .PHONY: docker-image
docker-image: build-by-dev-builder ## Build docker image. docker-image: ## Build docker image.
mkdir -p ${ARCH} && \ docker build --network host -f docker/Dockerfile -t ${IMAGE_REGISTRY}:${IMAGE_TAG} .
cp ./target/${OUTPUT_DIR}/greptime ${ARCH}/greptime && \
docker build -f docker/ci/${BASE_IMAGE}/Dockerfile -t ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/greptimedb:${IMAGE_TAG} . && \
rm -r ${ARCH}
.PHONY: docker-image-buildx
docker-image-buildx: multi-platform-buildx ## Build docker image by buildx.
docker buildx build --builder ${BUILDX_BUILDER_NAME} \
--build-arg="CARGO_PROFILE=${CARGO_PROFILE}" \
--build-arg="FEATURES=${FEATURES}" \
--build-arg="OUTPUT_DIR=${OUTPUT_DIR}" \
-f docker/buildx/${BASE_IMAGE}/Dockerfile \
-t ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/greptimedb:${IMAGE_TAG} ${BUILDX_MULTI_PLATFORM_BUILD_OPTS} .
.PHONY: dev-builder
dev-builder: multi-platform-buildx ## Build dev-builder image.
docker buildx build --builder ${BUILDX_BUILDER_NAME} \
--build-arg="RUST_TOOLCHAIN=${RUST_TOOLCHAIN}" \
-f docker/dev-builder/${BASE_IMAGE}/Dockerfile \
-t ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:${IMAGE_TAG} ${BUILDX_MULTI_PLATFORM_BUILD_OPTS} .
.PHONY: multi-platform-buildx
multi-platform-buildx: ## Create buildx multi-platform builder.
docker buildx inspect ${BUILDX_BUILDER_NAME} || docker buildx create --name ${BUILDX_BUILDER_NAME} --driver docker-container --bootstrap --use
##@ Test ##@ Test
test: nextest ## Run unit and integration tests. test: nextest ## Run unit and integration tests.
cargo nextest run ${NEXTEST_OPTS} cargo nextest run --retries 3
.PHONY: nextest ## Install nextest tools. .PHONY: nextest ## Install nextest tools.
nextest: nextest:
@@ -134,27 +50,12 @@ check: ## Cargo check all the targets.
.PHONY: clippy .PHONY: clippy
clippy: ## Check clippy rules. clippy: ## Check clippy rules.
cargo clippy --workspace --all-targets -F pyo3_backend -- -D warnings cargo clippy --workspace --all-targets -- -D warnings
.PHONY: fmt-check .PHONY: fmt-check
fmt-check: ## Check code format. fmt-check: ## Check code format.
cargo fmt --all -- --check cargo fmt --all -- --check
.PHONY: start-etcd
start-etcd: ## Start single node etcd for testing purpose.
docker run --rm -d --network=host -p 2379-2380:2379-2380 ${ETCD_IMAGE}
.PHONY: stop-etcd
stop-etcd: ## Stop single node etcd for testing purpose.
docker stop $$(docker ps -q --filter ancestor=${ETCD_IMAGE})
.PHONY: run-it-in-container
run-it-in-container: start-etcd ## Run integration tests in dev-builder.
docker run --network=host \
-v ${PWD}:/greptimedb -v ${CARGO_REGISTRY_CACHE}:/root/.cargo/registry -v /tmp:/tmp \
-w /greptimedb ${IMAGE_REGISTRY}/${IMAGE_NAMESPACE}/dev-builder-${BASE_IMAGE}:latest \
make test sqlness-test BUILD_JOBS=${BUILD_JOBS}
##@ General ##@ General
# The help target prints out all targets with their descriptions organized # The help target prints out all targets with their descriptions organized
@@ -170,4 +71,4 @@ run-it-in-container: start-etcd ## Run integration tests in dev-builder.
.PHONY: help .PHONY: help
help: ## Display help messages. help: ## Display help messages.
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-30s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

View File

@@ -47,10 +47,14 @@ for years. Based on their best-practices, GreptimeDB is born to give you:
## Quick Start ## Quick Start
### [GreptimePlay](https://greptime.com/playground) ### GreptimePlay
Try out the features of GreptimeDB right from your browser. Try out the features of GreptimeDB right from your browser.
<a href="https://greptime.com/playground" target="_blank"><img
src="https://www.greptime.com/assets/greptime_play_button_colorful.1bbe2746.png"
alt="GreptimePlay" width="200px" /></a>
### Build ### Build
#### Build from Source #### Build from Source
@@ -102,7 +106,7 @@ Please see [the online document site](https://docs.greptime.com/getting-started/
Read the [complete getting started guide](https://docs.greptime.com/getting-started/overview#connect) on our [official document site](https://docs.greptime.com/). Read the [complete getting started guide](https://docs.greptime.com/getting-started/overview#connect) on our [official document site](https://docs.greptime.com/).
To write and query data, GreptimeDB is compatible with multiple [protocols and clients](https://docs.greptime.com/user-guide/clients/overview). To write and query data, GreptimeDB is compatible with multiple [protocols and clients](https://docs.greptime.com/user-guide/client/overview).
## Resources ## Resources
@@ -129,12 +133,8 @@ To write and query data, GreptimeDB is compatible with multiple [protocols and c
### SDK ### SDK
- [GreptimeDB C++ Client](https://github.com/GreptimeTeam/greptimedb-client-cpp) - [GreptimeDB Java
- [GreptimeDB Erlang Client](https://github.com/GreptimeTeam/greptimedb-client-erl) Client](https://github.com/GreptimeTeam/greptimedb-client-java)
- [GreptimeDB Go Client](https://github.com/GreptimeTeam/greptimedb-client-go)
- [GreptimeDB Java Client](https://github.com/GreptimeTeam/greptimedb-client-java)
- [GreptimeDB Python Client](https://github.com/GreptimeTeam/greptimedb-client-py) (WIP)
- [GreptimeDB Rust Client](https://github.com/GreptimeTeam/greptimedb-client-rust)
## Project Status ## Project Status

View File

@@ -7,7 +7,7 @@ license.workspace = true
[dependencies] [dependencies]
arrow.workspace = true arrow.workspace = true
clap = { version = "4.0", features = ["derive"] } clap = { version = "4.0", features = ["derive"] }
client = { workspace = true } client = { path = "../src/client" }
indicatif = "0.17.1" indicatif = "0.17.1"
itertools.workspace = true itertools.workspace = true
parquet.workspace = true parquet.workspace = true

View File

@@ -114,7 +114,7 @@ async fn write_data(
}; };
let now = Instant::now(); let now = Instant::now();
let _ = db.insert(requests).await.unwrap(); db.insert(requests).await.unwrap();
let elapsed = now.elapsed(); let elapsed = now.elapsed();
total_rpc_elapsed_ms += elapsed.as_millis(); total_rpc_elapsed_ms += elapsed.as_millis();
progress_bar.inc(row_count as _); progress_bar.inc(row_count as _);
@@ -377,16 +377,19 @@ fn create_table_expr() -> CreateTableExpr {
} }
fn query_set() -> HashMap<String, String> { fn query_set() -> HashMap<String, String> {
HashMap::from([ let mut ret = HashMap::new();
(
"count_all".to_string(), ret.insert(
format!("SELECT COUNT(*) FROM {TABLE_NAME};"), "count_all".to_string(),
), format!("SELECT COUNT(*) FROM {TABLE_NAME};"),
( );
"fare_amt_by_passenger".to_string(),
format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {TABLE_NAME} GROUP BY passenger_count"), ret.insert(
) "fare_amt_by_passenger".to_string(),
]) format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {TABLE_NAME} GROUP BY passenger_count")
);
ret
} }
async fn do_write(args: &Args, db: &Database) { async fn do_write(args: &Args, db: &Database) {
@@ -411,8 +414,7 @@ async fn do_write(args: &Args, db: &Database) {
let db = db.clone(); let db = db.clone();
let mpb = multi_progress_bar.clone(); let mpb = multi_progress_bar.clone();
let pb_style = progress_bar_style.clone(); let pb_style = progress_bar_style.clone();
let _ = write_jobs write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
} }
} }
while write_jobs.join_next().await.is_some() { while write_jobs.join_next().await.is_some() {
@@ -421,8 +423,7 @@ async fn do_write(args: &Args, db: &Database) {
let db = db.clone(); let db = db.clone();
let mpb = multi_progress_bar.clone(); let mpb = multi_progress_bar.clone();
let pb_style = progress_bar_style.clone(); let pb_style = progress_bar_style.clone();
let _ = write_jobs write_jobs.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
.spawn(async move { write_data(batch_size, &db, path, mpb, pb_style).await });
} }
} }
} }

View File

@@ -11,10 +11,6 @@ rpc_hostname = "127.0.0.1"
# The number of gRPC server worker threads, 8 by default. # The number of gRPC server worker threads, 8 by default.
rpc_runtime_size = 8 rpc_runtime_size = 8
[heartbeat]
# Interval for sending heartbeat messages to the Metasrv in milliseconds, 5000 by default.
interval_millis = 5000
# Metasrv client options. # Metasrv client options.
[meta_client_options] [meta_client_options]
# Metasrv address list. # Metasrv address list.
@@ -38,9 +34,8 @@ sync_write = false
# Storage options, see `standalone.example.toml`. # Storage options, see `standalone.example.toml`.
[storage] [storage]
# The working home directory.
data_home = "/tmp/greptimedb/"
type = "File" type = "File"
data_home = "/tmp/greptimedb/"
# TTL for all tables. Disabled by default. # TTL for all tables. Disabled by default.
# global_ttl = "7d" # global_ttl = "7d"
@@ -57,6 +52,8 @@ max_purge_tasks = 32
checkpoint_margin = 10 checkpoint_margin = 10
# Region manifest logs and checkpoints gc execution duration # Region manifest logs and checkpoints gc execution duration
gc_duration = '10m' gc_duration = '10m'
# Whether to try creating a manifest checkpoint on region opening
checkpoint_on_startup = false
# Storage flush options # Storage flush options
[storage.flush] [storage.flush]

View File

@@ -1,17 +1,10 @@
# Node running mode, see `standalone.example.toml`. # Node running mode, see `standalone.example.toml`.
mode = "distributed" mode = "distributed"
[heartbeat]
# Interval for sending heartbeat task to the Metasrv in milliseconds, 5000 by default.
interval_millis = 5000
# Interval for retry sending heartbeat task in milliseconds, 5000 by default.
retry_interval_millis = 5000
# HTTP server options, see `standalone.example.toml`. # HTTP server options, see `standalone.example.toml`.
[http_options] [http_options]
addr = "127.0.0.1:4000" addr = "127.0.0.1:4000"
timeout = "30s" timeout = "30s"
body_limit = "64MB"
# gRPC server options, see `standalone.example.toml`. # gRPC server options, see `standalone.example.toml`.
[grpc_options] [grpc_options]
@@ -49,16 +42,18 @@ runtime_size = 2
[influxdb_options] [influxdb_options]
enable = true enable = true
# Prometheus remote storage options, see `standalone.example.toml`. # Prometheus protocol options, see `standalone.example.toml`.
[prom_store_options] [prometheus_options]
enable = true enable = true
# Prometheus protocol options, see `standalone.example.toml`.
[prom_options]
addr = "127.0.0.1:4004"
# Metasrv client options, see `datanode.example.toml`. # Metasrv client options, see `datanode.example.toml`.
[meta_client_options] [meta_client_options]
metasrv_addrs = ["127.0.0.1:3002"] metasrv_addrs = ["127.0.0.1:3002"]
timeout_millis = 3000 timeout_millis = 3000
# DDL timeouts options.
ddl_timeout_millis = 10000
connect_timeout_millis = 5000 connect_timeout_millis = 5000
tcp_nodelay = true tcp_nodelay = true
@@ -66,11 +61,3 @@ tcp_nodelay = true
# [logging] # [logging]
# dir = "/tmp/greptimedb/logs" # dir = "/tmp/greptimedb/logs"
# level = "info" # level = "info"
# Datanode options.
[datanode]
# Datanode client options.
[datanode.client]
timeout = "10s"
connect_timeout = "10s"
tcp_nodelay = true

View File

@@ -1,5 +1,3 @@
# The working home directory.
data_home = "/tmp/metasrv/"
# The bind address of metasrv, "127.0.0.1:3002" by default. # The bind address of metasrv, "127.0.0.1:3002" by default.
bind_addr = "127.0.0.1:3002" bind_addr = "127.0.0.1:3002"
# The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. # The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
@@ -15,25 +13,8 @@ datanode_lease_secs = 15
selector = "LeaseBased" selector = "LeaseBased"
# Store data in memory, false by default. # Store data in memory, false by default.
use_memory_store = false use_memory_store = false
# Whether to enable greptimedb telemetry, true by default.
enable_telemetry = true
# Log options, see `standalone.example.toml` # Log options, see `standalone.example.toml`
# [logging] # [logging]
# dir = "/tmp/greptimedb/logs" # dir = "/tmp/greptimedb/logs"
# level = "info" # level = "info"
# Procedure storage options.
[procedure]
# Procedure max retry time.
max_retry_times = 12
# Initial retry delay of procedures, increases exponentially
retry_delay = "500ms"
# # Datanode options.
# [datanode]
# # Datanode client options.
# [datanode.client_options]
# timeout_millis = 10000
# connect_timeout_millis = 10000
# tcp_nodelay = true

View File

@@ -2,8 +2,6 @@
mode = "standalone" mode = "standalone"
# Whether to use in-memory catalog, `false` by default. # Whether to use in-memory catalog, `false` by default.
enable_memory_catalog = false enable_memory_catalog = false
# Whether to enable greptimedb telemetry, true by default.
enable_telemetry = true
# HTTP server options. # HTTP server options.
[http_options] [http_options]
@@ -11,9 +9,6 @@ enable_telemetry = true
addr = "127.0.0.1:4000" addr = "127.0.0.1:4000"
# HTTP request timeout, 30s by default. # HTTP request timeout, 30s by default.
timeout = "30s" timeout = "30s"
# HTTP request body limit, 64Mb by default.
# the following units are supported: B, KB, KiB, MB, MiB, GB, GiB, TB, TiB, PB, PiB
body_limit = "64MB"
# gRPC server options. # gRPC server options.
[grpc_options] [grpc_options]
@@ -71,11 +66,16 @@ runtime_size = 2
# Whether to enable InfluxDB protocol in HTTP API, true by default. # Whether to enable InfluxDB protocol in HTTP API, true by default.
enable = true enable = true
# Prometheus remote storage options # Prometheus protocol options.
[prom_store_options] [prometheus_options]
# Whether to enable Prometheus remote write and read in HTTP API, true by default. # Whether to enable Prometheus remote write and read in HTTP API, true by default.
enable = true enable = true
# Prom protocol options.
[prom_options]
# Prometheus API server address, "127.0.0.1:4004" by default.
addr = "127.0.0.1:4004"
# WAL options. # WAL options.
[wal] [wal]
# WAL data directory # WAL data directory
@@ -93,10 +93,10 @@ sync_write = false
# Storage options. # Storage options.
[storage] [storage]
# The working home directory.
data_home = "/tmp/greptimedb/"
# Storage type. # Storage type.
type = "File" type = "File"
# Data directory, "/tmp/greptimedb/data" by default.
data_home = "/tmp/greptimedb/"
# TTL for all tables. Disabled by default. # TTL for all tables. Disabled by default.
# global_ttl = "7d" # global_ttl = "7d"
@@ -116,6 +116,8 @@ max_purge_tasks = 32
checkpoint_margin = 10 checkpoint_margin = 10
# Region manifest logs and checkpoints gc execution duration # Region manifest logs and checkpoints gc execution duration
gc_duration = '10m' gc_duration = '10m'
# Whether to try creating a manifest checkpoint on region opening
checkpoint_on_startup = false
# Storage flush options # Storage flush options
[storage.flush] [storage.flush]

38
docker/Dockerfile Normal file
View File

@@ -0,0 +1,38 @@
FROM ubuntu:22.04 as builder
ENV LANG en_US.utf8
WORKDIR /greptimedb
# Install dependencies.
RUN apt-get update && apt-get install -y \
libssl-dev \
protobuf-compiler \
curl \
build-essential \
pkg-config \
python3 \
python3-dev \
python3-pip \
&& pip3 install --upgrade pip \
&& pip3 install pyarrow
# Install Rust.
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /root/.cargo/bin/:$PATH
# Build the project in release mode.
COPY . .
RUN cargo build --release
# Export the binary to the clean image.
# TODO(zyy17): Maybe should use the more secure container image.
FROM ubuntu:22.04 as base
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates
WORKDIR /greptime
COPY --from=builder /greptimedb/target/release/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH
ENTRYPOINT ["greptime"]

57
docker/aarch64/Dockerfile Normal file
View File

@@ -0,0 +1,57 @@
FROM ubuntu:22.04 as builder
ENV LANG en_US.utf8
WORKDIR /greptimedb
# Install dependencies.
RUN apt-get update && apt-get install -y \
libssl-dev \
protobuf-compiler \
curl \
build-essential \
pkg-config \
wget
# Install Rust.
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /root/.cargo/bin/:$PATH
# Install cross platform toolchain
RUN apt-get -y update && \
apt-get -y install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu && \
apt-get install binutils-aarch64-linux-gnu
COPY ./docker/aarch64/compile-python.sh ./docker/aarch64/
RUN chmod +x ./docker/aarch64/compile-python.sh && \
./docker/aarch64/compile-python.sh
COPY ./rust-toolchain.toml .
# Install rustup target for cross compiling.
RUN rustup target add aarch64-unknown-linux-gnu
COPY . .
# Update dependency, using separate `RUN` to separate cache
RUN cargo fetch
# This three env var is set in script, so I set it manually in dockerfile.
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/
ENV LIBRARY_PATH=$LIBRARY_PATH:/usr/local/lib/
ENV PY_INSTALL_PATH=/greptimedb/python_arm64_build
# Set the environment variable for cross compiling and compile it
# cross compiled python is `python3` in path, but pyo3 need `python` in path so alias it
# Build the project in release mode.
RUN export PYO3_CROSS_LIB_DIR=$PY_INSTALL_PATH/lib && \
alias python=python3 && \
cargo build --target aarch64-unknown-linux-gnu --release -F pyo3_backend
# Exporting the binary to the clean image
FROM ubuntu:22.04 as base
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates
WORKDIR /greptime
COPY --from=builder /greptimedb/target/aarch64-unknown-linux-gnu/release/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH
ENTRYPOINT ["greptime"]

View File

@@ -0,0 +1,87 @@
#!/usr/bin/env bash
set -e
# this script will download Python source code, compile it, and install it to /usr/local/lib
# then use this python to compile cross-compiled python for aarch64
ARCH=$1
PYTHON_VERSION=3.10.10
PYTHON_SOURCE_DIR=Python-${PYTHON_VERSION}
PYTHON_INSTALL_PATH_AMD64=${PWD}/python-${PYTHON_VERSION}/amd64
PYTHON_INSTALL_PATH_AARCH64=${PWD}/python-${PYTHON_VERSION}/aarch64
function download_python_source_code() {
wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz
tar -xvf Python-$PYTHON_VERSION.tgz
}
function compile_for_amd64_platform() {
mkdir -p "$PYTHON_INSTALL_PATH_AMD64"
echo "Compiling for amd64 platform..."
./configure \
--prefix="$PYTHON_INSTALL_PATH_AMD64" \
--enable-shared \
ac_cv_pthread_is_default=no ac_cv_pthread=yes ac_cv_cxx_thread=yes \
ac_cv_have_long_long_format=yes \
--disable-ipv6 ac_cv_file__dev_ptmx=no ac_cv_file__dev_ptc=no
make
make install
}
# explain Python compile options here a bit:s
# --enable-shared: enable building a shared Python library (default is no) but we do need it for calling from rust
# CC, CXX, AR, LD, RANLIB: set the compiler, archiver, linker, and ranlib programs to use
# build: the machine you are building on, host: the machine you will run the compiled program on
# --with-system-ffi: build _ctypes module using an installed ffi library, see Doc/library/ctypes.rst, not used in here TODO: could remove
# ac_cv_pthread_is_default=no ac_cv_pthread=yes ac_cv_cxx_thread=yes:
# allow cross-compiled python to have -pthread set for CXX, see https://github.com/python/cpython/pull/22525
# ac_cv_have_long_long_format=yes: target platform supports long long type
# disable-ipv6: disable ipv6 support, we don't need it in here
# ac_cv_file__dev_ptmx=no ac_cv_file__dev_ptc=no: disable pty support, we don't need it in here
function compile_for_aarch64_platform() {
export LD_LIBRARY_PATH=$PYTHON_INSTALL_PATH_AMD64/lib:$LD_LIBRARY_PATH
export LIBRARY_PATH=$PYTHON_INSTALL_PATH_AMD64/lib:$LIBRARY_PATH
export PATH=$PYTHON_INSTALL_PATH_AMD64/bin:$PATH
mkdir -p "$PYTHON_INSTALL_PATH_AARCH64"
echo "Compiling for aarch64 platform..."
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
echo "LIBRARY_PATH: $LIBRARY_PATH"
echo "PATH: $PATH"
./configure --build=x86_64-linux-gnu --host=aarch64-linux-gnu \
--prefix="$PYTHON_INSTALL_PATH_AARCH64" --enable-optimizations \
CC=aarch64-linux-gnu-gcc \
CXX=aarch64-linux-gnu-g++ \
AR=aarch64-linux-gnu-ar \
LD=aarch64-linux-gnu-ld \
RANLIB=aarch64-linux-gnu-ranlib \
--enable-shared \
ac_cv_pthread_is_default=no ac_cv_pthread=yes ac_cv_cxx_thread=yes \
ac_cv_have_long_long_format=yes \
--disable-ipv6 ac_cv_file__dev_ptmx=no ac_cv_file__dev_ptc=no
make
make altinstall
}
# Main script starts here.
download_python_source_code
# Enter the python source code directory.
cd $PYTHON_SOURCE_DIR || exit 1
# Build local python first, then build cross-compiled python.
compile_for_amd64_platform
# Clean the build directory.
make clean && make distclean
# Cross compile python for aarch64.
if [ "$ARCH" = "aarch64-unknown-linux-gnu" ]; then
compile_for_aarch64_platform
fi

View File

@@ -1,54 +0,0 @@
FROM centos:7 as builder
ARG CARGO_PROFILE
ARG FEATURES
ARG OUTPUT_DIR
ENV LANG en_US.utf8
WORKDIR /greptimedb
# Install dependencies
RUN ulimit -n 1024000 && yum groupinstall -y 'Development Tools'
RUN yum install -y epel-release \
openssl \
openssl-devel \
centos-release-scl \
rh-python38 \
rh-python38-python-devel \
which
# Install protoc
RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip
RUN unzip protoc-3.15.8-linux-x86_64.zip -d /usr/local/
# Install Rust
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /opt/rh/rh-python38/root/usr/bin:/usr/local/bin:/root/.cargo/bin/:$PATH
# Build the project in release mode.
RUN --mount=target=.,rw \
--mount=type=cache,target=/root/.cargo/registry \
make build \
CARGO_PROFILE=${CARGO_PROFILE} \
FEATURES=${FEATURES} \
TARGET_DIR=/out/target
# Export the binary to the clean image.
FROM centos:7 as base
ARG OUTPUT_DIR
RUN yum install -y epel-release \
openssl \
openssl-devel \
centos-release-scl \
rh-python38 \
rh-python38-python-devel \
which
WORKDIR /greptime
COPY --from=builder /out/target/${OUTPUT_DIR}/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH
ENTRYPOINT ["greptime"]

View File

@@ -1,57 +0,0 @@
FROM ubuntu:22.04 as builder
ARG CARGO_PROFILE
ARG FEATURES
ARG OUTPUT_DIR
ENV LANG en_US.utf8
WORKDIR /greptimedb
# Install dependencies.
RUN --mount=type=cache,target=/var/cache/apt \
apt-get update && apt-get install -y \
libssl-dev \
protobuf-compiler \
curl \
git \
build-essential \
pkg-config \
python3.10 \
python3.10-dev \
python3-pip
# Install Rust.
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /root/.cargo/bin/:$PATH
# Build the project in release mode.
RUN --mount=target=. \
--mount=type=cache,target=/root/.cargo/registry \
make build \
CARGO_PROFILE=${CARGO_PROFILE} \
FEATURES=${FEATURES} \
TARGET_DIR=/out/target
# Export the binary to the clean image.
# TODO(zyy17): Maybe should use the more secure container image.
FROM ubuntu:22.04 as base
ARG OUTPUT_DIR
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get \
-y install ca-certificates \
python3.10 \
python3.10-dev \
python3-pip \
curl
COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt
RUN python3 -m pip install -r /etc/greptime/requirements.txt
WORKDIR /greptime
COPY --from=builder /out/target/${OUTPUT_DIR}/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH
ENTRYPOINT ["greptime"]

View File

@@ -4,10 +4,9 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
ca-certificates \ ca-certificates \
python3.10 \ python3.10 \
python3.10-dev \ python3.10-dev \
python3-pip \ python3-pip
curl
COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt COPY requirements.txt /etc/greptime/requirements.txt
RUN python3 -m pip install -r /etc/greptime/requirements.txt RUN python3 -m pip install -r /etc/greptime/requirements.txt

View File

@@ -1,16 +0,0 @@
FROM centos:7
RUN yum install -y epel-release \
openssl \
openssl-devel \
centos-release-scl \
rh-python38 \
rh-python38-python-devel
ARG TARGETARCH
ADD $TARGETARCH/greptime /greptime/bin/
ENV PATH /greptime/bin/:$PATH
ENTRYPOINT ["greptime"]

View File

@@ -1,29 +0,0 @@
FROM centos:7 as builder
ENV LANG en_US.utf8
# Install dependencies
RUN ulimit -n 1024000 && yum groupinstall -y 'Development Tools'
RUN yum install -y epel-release \
openssl \
openssl-devel \
centos-release-scl \
rh-python38 \
rh-python38-python-devel \
which
# Install protoc
RUN curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip
RUN unzip protoc-3.15.8-linux-x86_64.zip -d /usr/local/
# Install Rust
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /opt/rh/rh-python38/root/usr/bin:/usr/local/bin:/root/.cargo/bin/:$PATH
# Install Rust toolchains.
ARG RUST_TOOLCHAIN
RUN rustup toolchain install ${RUST_TOOLCHAIN}
# Install nextest.
RUN cargo install cargo-nextest --locked

View File

@@ -1,36 +0,0 @@
FROM ubuntu:22.04
ENV LANG en_US.utf8
WORKDIR /greptimedb
# Install dependencies.
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libssl-dev \
tzdata \
protobuf-compiler \
curl \
ca-certificates \
git \
build-essential \
pkg-config \
python3.10 \
python3.10-dev \
python3-pip
RUN git config --global --add safe.directory /greptimedb
# Install Python dependencies.
COPY ./docker/python/requirements.txt /etc/greptime/requirements.txt
RUN python3 -m pip install -r /etc/greptime/requirements.txt
# Install Rust.
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /root/.cargo/bin/:$PATH
# Install Rust toolchains.
ARG RUST_TOOLCHAIN
RUN rustup toolchain install ${RUST_TOOLCHAIN}
# Install nextest.
RUN cargo install cargo-nextest --locked

View File

@@ -1,39 +0,0 @@
# TSBS benchmark - v0.3.2
## Environment
| | |
| --- | --- |
| CPU | AMD Ryzen 7 7735HS (8 core 3.2GHz) |
| Memory | 32GB |
| Disk | SOLIDIGM SSDPFKNU010TZ |
| OS | Ubuntu 22.04.2 LTS |
## Write performance
| Write buffer size | Ingest raterows/s |
| --- | --- |
| 512M | 139583.04 |
| 32M | 279250.52 |
## Query performance
| Query type | v0.3.2 write buffer 32M (ms) | v0.3.2 write buffer 512M (ms) | v0.3.1 write buffer 32M (ms) |
| --- | --- | --- | --- |
| cpu-max-all-1 | 921.12 | 241.23 | 553.63 |
| cpu-max-all-8 | 2657.66 | 502.78 | 3308.41 |
| double-groupby-1 | 28238.85 | 27367.42 | 52148.22 |
| double-groupby-5 | 33094.65 | 32421.89 | 56762.37 |
| double-groupby-all | 38565.89 | 38635.52 | 59596.80 |
| groupby-orderby-limit | 23321.60 | 22423.55 | 53983.23 |
| high-cpu-1 | 1167.04 | 254.15 | 832.41 |
| high-cpu-all | 32814.08 | 29906.94 | 62853.12 |
| lastpoint | 192045.05 | 153575.42 | NA |
| single-groupby-1-1-1 | 63.97 | 87.35 | 92.66 |
| single-groupby-1-1-12 | 666.24 | 326.98 | 781.50 |
| single-groupby-1-8-1 | 225.29 | 137.97 |281.95 |
| single-groupby-5-1-1 | 70.40 | 81.64 | 86.15 |
| single-groupby-5-1-12 | 722.75 | 356.01 | 805.18 |
| single-groupby-5-8-1 | 285.60 | 115.88 | 326.29 |

View File

@@ -1,303 +0,0 @@
---
Feature Name: table-engine-refactor
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/1869
Date: 2023-07-06
Author: "Yingwen <realevenyag@gmail.com>"
---
Refactor Table Engine
----------------------
# Summary
Refactor table engines to address several historical tech debts.
# Motivation
Both `Frontend` and `Datanode` have to deal with multiple regions in a table. This results in code duplication and additional burden to the `Datanode`.
Before:
```mermaid
graph TB
subgraph Frontend["Frontend"]
subgraph MyTable
A("region 0, 2 -> Datanode0")
B("region 1, 3 -> Datanode1")
end
end
MyTable --> MetaSrv
MetaSrv --> ETCD
MyTable-->TableEngine0
MyTable-->TableEngine1
subgraph Datanode0
Procedure0("procedure")
TableEngine0("table engine")
region0
region2
mytable0("my_table")
Procedure0-->mytable0
TableEngine0-->mytable0
mytable0-->region0
mytable0-->region2
end
subgraph Datanode1
Procedure1("procedure")
TableEngine1("table engine")
region1
region3
mytable1("my_table")
Procedure1-->mytable1
TableEngine1-->mytable1
mytable1-->region1
mytable1-->region3
end
subgraph manifest["table manifest"]
M0("my_table")
M1("regions: [0, 1, 2, 3]")
end
mytable1-->manifest
mytable0-->manifest
RegionManifest0("region manifest 0")
RegionManifest1("region manifest 1")
RegionManifest2("region manifest 2")
RegionManifest3("region manifest 3")
region0-->RegionManifest0
region1-->RegionManifest1
region2-->RegionManifest2
region3-->RegionManifest3
```
`Datanodes` can update the same manifest file for a table as regions are assigned to different nodes in the cluster. We also have to run procedures on `Datanode` to ensure the table manifest is consistent with region manifests. "Table" in a `Datanode` is a subset of the table's regions. The `Datanode` is much closer to `RegionServer` in `HBase` which only deals with regions.
In cluster mode, we store table metadata in etcd and table manifest. The table manifest becomes redundant. We can remove the table manifest if we refactor the table engines to region engines that only care about regions. What's more, we don't need to run those procedures on `Datanode`.
After:
```mermaid
graph TB
subgraph Frontend["Frontend"]
direction LR
subgraph MyTable
A("region 0, 2 -> Datanode0")
B("region 1, 3 -> Datanode1")
end
end
MyTable --> MetaSrv
MetaSrv --> ETCD
MyTable-->RegionEngine
MyTable-->RegionEngine1
subgraph Datanode0
RegionEngine("region engine")
region0
region2
RegionEngine-->region0
RegionEngine-->region2
end
subgraph Datanode1
RegionEngine1("region engine")
region1
region3
RegionEngine1-->region1
RegionEngine1-->region3
end
RegionManifest0("region manifest 0")
RegionManifest1("region manifest 1")
RegionManifest2("region manifest 2")
RegionManifest3("region manifest 3")
region0-->RegionManifest0
region1-->RegionManifest1
region2-->RegionManifest2
region3-->RegionManifest3
```
This RFC proposes to refactor table engines into region engines as a first step to make the `Datanode` acts like a `RegionServer`.
# Details
## Overview
We plan to refactor the `TableEngine` trait into `RegionEngine` gradually. This RFC focuses on the `mito` engine as it is the default table engine and the most complicated engine.
Currently, we built `MitoEngine` upon `StorageEngine` that manages regions of the `mito` engine. Since `MitoEngine` becomes a region engine, we could combine `StorageEngine` with `MitoEngine` to simplify our code structure.
The chart below shows the overall architecture of the `MitoEngine`.
```mermaid
classDiagram
class MitoEngine~LogStore~ {
-WorkerGroup workers
}
class MitoRegion {
+VersionControlRef version_control
-RegionId region_id
-String manifest_dir
-AtomicI64 last_flush_millis
+region_id() RegionId
+scan() ChunkReaderImpl
}
class RegionMap {
-HashMap&lt;RegionId, MitoRegionRef&gt; regions
}
class ChunkReaderImpl
class WorkerGroup {
-Vec~RegionWorker~ workers
}
class RegionWorker {
-RegionMap regions
-Sender sender
-JoinHandle handle
}
class RegionWorkerThread~LogStore~ {
-RegionMap regions
-Receiver receiver
-Wal~LogStore~ wal
-ObjectStore object_store
-MemtableBuilderRef memtable_builder
-FlushSchedulerRef~LogStore~ flush_scheduler
-FlushStrategy flush_strategy
-CompactionSchedulerRef~LogStore~ compaction_scheduler
-FilePurgerRef file_purger
}
class Wal~LogStore~ {
-LogStore log_store
}
class MitoConfig
MitoEngine~LogStore~ o-- MitoConfig
MitoEngine~LogStore~ o-- MitoRegion
MitoEngine~LogStore~ o-- WorkerGroup
MitoRegion o-- VersionControl
MitoRegion -- ChunkReaderImpl
WorkerGroup o-- RegionWorker
RegionWorker o-- RegionMap
RegionWorker -- RegionWorkerThread~LogStore~
RegionWorkerThread~LogStore~ o-- RegionMap
RegionWorkerThread~LogStore~ o-- Wal~LogStore~
```
We replace the `RegionWriter` with `RegionWorker` to process write requests and DDL requests.
## Metadata
We also merge region's metadata with table's metadata. It should make metadata much easier to maintain.
```mermaid
classDiagram
class VersionControl {
-CowCell~Version~ version
-AtomicU64 committed_sequence
}
class Version {
-RegionMetadataRef metadata
-MemtableVersionRef memtables
-LevelMetasRef ssts
-SequenceNumber flushed_sequence
-ManifestVersion manifest_version
}
class MemtableVersion {
-MemtableRef mutable
-Vec~MemtableRef~ immutables
+mutable_memtable() MemtableRef
+immutable_memtables() &[MemtableRef]
+freeze_mutable(MemtableRef new_mutable) MemtableVersion
}
class LevelMetas {
-LevelMetaVec levels
-AccessLayerRef sst_layer
-FilePurgerRef file_purger
-Option~i64~ compaction_time_window
}
class LevelMeta {
-Level level
-HashMap&lt;FileId, FileHandle&gt; files
}
class FileHandle {
-FileMeta meta
-bool compacting
-AtomicBool deleted
-AccessLayerRef sst_layer
-FilePurgerRef file_purger
}
class FileMeta {
+RegionId region_id
+FileId file_id
+Option&lt;Timestamp, Timestamp&gt; time_range
+Level level
+u64 file_size
}
VersionControl o-- Version
Version o-- RegionMetadata
Version o-- MemtableVersion
Version o-- LevelMetas
LevelMetas o-- LevelMeta
LevelMeta o-- FileHandle
FileHandle o-- FileMeta
class RegionMetadata {
+RegionId region_id
+VersionNumber version
+SchemaRef table_schema
+Vec~usize~ primary_key_indices
+Vec~usize~ value_indices
+ColumnId next_column_id
+TableOptions region_options
+DateTime~Utc~ created_on
+RegionSchemaRef region_schema
}
class RegionSchema {
-SchemaRef user_schema
-StoreSchemaRef store_schema
-ColumnsMetadataRef columns
}
class Schema
class StoreSchema {
-Vec~ColumnMetadata~ columns
-SchemaRef schema
-usize row_key_end
-usize user_column_end
}
class ColumnsMetadata {
-Vec~ColumnMetadata~ columns
-HashMap&lt;String, usize&gt; name_to_col_index
-usize row_key_end
-usize timestamp_key_index
-usize user_column_end
}
class ColumnMetadata
RegionMetadata o-- RegionSchema
RegionMetadata o-- Schema
RegionSchema o-- StoreSchema
RegionSchema o-- Schema
RegionSchema o-- ColumnsMetadata
StoreSchema o-- ColumnsMetadata
StoreSchema o-- Schema
StoreSchema o-- ColumnMetadata
ColumnsMetadata o-- ColumnMetadata
```
# Drawback
This is a breaking change.
# Future Work
- Rename `TableEngine` to `RegionEngine`
- Simplify schema relationship in the `mito` engine
- Refactor the `Datanode` into a `RegionServer`.

View File

@@ -1,202 +0,0 @@
---
Feature Name: metric-engine
Tracking Issue: TBD
Date: 2023-07-10
Author: "Ruihang Xia <waynestxia@gmail.com>"
---
# Summary
A new metric engine that can significantly enhance our ability to handle the tremendous number of small tables in scenarios like Prometheus metrics, by leveraging a synthetic wide table that offers storage and metadata multiplexing capabilities over the existing engine.
# Motivation
The concept "Table" in GreptimeDB is a bit "heavy" compared to other time-series storage like Prometheus or VictoriaMetrics. This has lots of disadvantages in aspects from performance, footprint, and storage to cost.
# Details
## Top level description
- User Interface
This feature will add a new type of storage engine. It might be available to be an option like `with ENGINE=mito` or an internal interface like auto create table on Prometheus remote write. From the user side, there is no difference from tables in mito engine. All the DDL like `CREATE`, `ALTER` and DML like `SELECT` should be supported.
- Implementation Overlook
This new engine doesn't re-implement low level components like file R/W etc. It's a wrapper layer over the existing mito engine, with extra storage and metadata multiplexing capabilities. I.e., it expose multiple table based on one mito engine table like this:
``` plaintext
┌───────────────┐ ┌───────────────┐ ┌───────────────┐
│ Metric Engine │ │ Metric Engine │ │ Metric Engine │
│ Table 1 │ │ Table 2 │ │ Table 3 │
└───────────────┘ └───────────────┘ └───────────────┘
▲ ▲ ▲
│ │ │
└───────────────┼───────────────────┘
┌─────────┴────────┐
│ Metric Region │
│ Engine │
│ ┌─────────────┤
│ │ Mito Region │
│ │ Engine │
└────▲─────────────┘
┌─────┴───────────────┐
│ │
│ Mito Engine Table │
│ │
└─────────────────────┘
```
The following parts will describe these implementation details:
- How to route these metric region tables and how those table are distributed
- How to maintain the schema and other metadata of the underlying mito engine table
- How to maintain the schema of metric engine table
- How the query goes
## Routing
Before this change, the region route rule was based on a group of partition keys. Relation of physical table to region is one-to-many.
``` rust
pub struct PartitionDef {
partition_columns: Vec<String>,
partition_bounds: Vec<PartitionBound>,
}
```
And for metric engine tables, the key difference is we split the concept of "physical table" and "logical table". Like the previous ASCII chart, multiple logical tables are based on one physical table. The relationship of logical table to region becomes many-to-many. Thus, we must include the table name (of logical table) into partition rules.
Consider the partition/route interface is a generic map of string array to region id, all we need to do is to insert logical table name into the request:
``` rust
fn route(request: Vec<String>) -> RegionId;
```
The next question is, where to do this conversion? The basic idea is to dispatch different routing behavior based on the engine type. Since we have all the necessary information in frontend, it's a good place to do that. And can leave meta server untouched. The essential change is to associate engine type with route rule.
## Physical Region Schema
The idea "physical wide table" is to perform column-level multiplexing. I.e., map all logical columns to physical columns by their names.
```
┌────────────┐ ┌────────────┐ ┌────────────┐
│ Table 1 │ │ Table 2 │ │ Table 3 │
├───┬────┬───┤ ├───┬────┬───┤ ├───┬────┬───┤
│C1 │ C2 │ C3│ │C1 │ C3 │ C5├──────┐ │C2 │ C4 │ C6│
└─┬─┴──┬─┴─┬─┘ ┌────┴───┴──┬─┴───┘ │ └─┬─┴──┬─┴─┬─┘
│ │ │ │ │ │ │ │ │
│ │ │ │ └──────────┐ │ │ │ │
│ │ │ │ │ │ │ │ │
│ │ │ │ ┌─────────────────┐ │ │ │ │ │
│ │ │ │ │ Physical Table │ │ │ │ │ │
│ │ │ │ ├──┬──┬──┬──┬──┬──┘ │ │ │ │ │
└────x───x───┴─►│C1│C2│C3│C4│C5│C6◄─┼─x────x────x───┘
│ │ └──┘▲─┘▲─┴─▲└─▲└──┘ │ │ │ │
│ │ │ │ │ │ │ │ │ │
├───x──────────┘ ├───x──x─────┘ │ │ │
│ │ │ │ │ │ │ │
│ └─────────────┘ │ └───────┘ │ │
│ │ │ │
└─────────────────────x───────────────┘ │
│ │
└────────────────────┘
```
This approach is very straightforward but has one problem. It only works when two columns have different semantic type (time index, tag or field) or data types but with the same name. E.g., `CREATE TABLE t1 (c1 timestamp(3) TIME INDEX)` and `CREATE TABLE t2 (c1 STRING PRIMARY KEY)`.
One possible workaround is to prefix each column with its data type and semantic type, like `_STRING_PK_c1`. However, considering the primary goal at present is to support data from monitoring metrics like Prometheus remote write, it's acceptable not to support this at first because data types are often simple and limited here.
The next point is changing the physical table's schema. This is only needed when creating a new logical table or altering the existing table. Typically speaking, table creating and altering are explicit. We only need to emit an add column request to underlying physical table on processing logical table's DDL. GreptimeDB can create or alter table automatically on some protocols, but the internal logic is the same.
Also for simplicity, we don't support shrinking the underlying table at first. This can be achieved by introducing mechanism on the physical column.
Frontend needs not to keep physical table's schema.
## Metadata of physical regions
Those metric engine regions need to store extra metadata like the schema of logical table or all logical table's name. That information is relatively simple and can be stored in a format like key-value pair. For now, we have to use another physical mito region for metadata. This involves an issue with region scheduling. Since we don't have the ability to perform affinity scheduling, the initial version will just assume the data region and metadata region are in the same instance. See alternatives - other storage for physical region's metadata for possible future improvement.
Here is the schema of metadata region and how we would use it. The `CREATE TABLE` clause of metadata region looks like the following. Notice that it wouldn't be actually created by SQL.
``` sql
CREATE TABLE metadata(
ts timestamp time index,
key string primary key,
value string
);
```
The `ts` field is just a placeholder -- for the constraints that a mito region must contain a time index field. It will always be `0`. The other two fields `key` and `value` will be used as a k-v storage. It contains two group of key
- `__table_<TABLE_NAME>` is used for marking table existence. It doesn't have value.
- `__column_<TABLE_NAME>_<COLUMN_NAME>` is used for marking table existence, the value is column's semantic type.
## Physical region implementation
This RFC proposes to add a new region implementation named "MetricRegion". As showed in the first chart, it's wrapped over the existing mito region. This section will describe the implementation details. Firstly, here is a chart shows how the region hierarchy looks like:
```plaintext
┌───────────────────────┐
│ Metric Region │
│ │
│ ┌────────┬──────────┤
│ │ Mito │ Mito │
│ │ Region │ Region │
│ │ for │ for │
│ │ Data │ Metadata │
└───┴────────┴──────────┘
```
All upper levels only see the Metric Region. E.g., Meta Server schedules on this region, or Frontend routes requests to this Metrics Region's id. To be scheduled (open or close etc.), Metric Region needs to implement its own procedures. Most of those procedures can be simply assembled from underlying Mito Regions', but those related to data like alter or drop will have its own new logic.
Another point is region id. Since the region id is used widely from meta server to persisted state, it's better to keep it unchanged. This means we can't use the same id for two regions, but one for each. To achieve this, this RFC proposes a concept named "region id group". A region id group is a group of region ids that are bound for different purposes. Like the two underlying regions here.
This preserves the first 8 bits of the `u32` region number for grouping. Each group has one main id (the first one) and other sub ids (the rest non-zero ids). All components other than the region implementation itself doesn't aware of the existence of region id group. They only see the main id. The region implementation is in response of managing and using the region id group.
```plaintext
63 31 23 0
┌────────────────────────────────────┬──────────┬──────────────────┐
│ Table Id(32) │ Group(8) │ Region Number(24)│
└────────────────────────────────────┴──────────┴──────────────────┘
Region Id(32)
```
## Routing in meta server
From previous sections, we can conclude the following points about routing:
- Each "logical table" has its own, universe unique table id.
- Logical table doesn't have physical region, they share the same physical region with other logical tables.
- Route rule of logical table's is a strict subset of physical table's.
To associate the logical table with physical region, we need to specify necessary information in the create table request. Specifically, the table type and its parent table. This require to change our gRPC proto's definition. And once meta recognize the table to create is a logical table, it will use the parent table's region to create route entry.
And to reduce the consumption of region failover (which need to update the physical table route info), we'd better to split the current route table structure into two parts:
```rust
region_route: Map<TableName, [RegionId]>,
node_route: Map<RegionId, NodeId>,
```
By doing this on each failover the meta server only needs to update the second `node_route` map and leave the first one untouched.
## Query
Like other existing components, a user query always starts in the frontend. In the planning phase, frontend needs to fetch related schemas of the queried table. This part is the same. I.e., changes in this RFC don't affect components above the `Table` abstraction.
# Alternatives
## Other routing method
We can also do this "special" route rule in the meta server. But there is no difference with the proposed method.
## Other storage for physical region's metadata
Once we have implemented the "region family" that allows multiple physical schemas exist in one region, we can store the metadata and table data into one region.
Before that, we can also let the `MetricRegion` holds a `KvBackend` to access the storage layer directly. But this breaks the abstraction in some way.
# Drawbacks
Since the physical storage is mixed together. It's hard to do fine-grained operations in table level. Like configuring TTL, memtable size or compaction strategy in table level. Or define different partition rules for different tables. For scenarios like this, it's better to move the table out of metrics engine and "upgrade" it to a normal mito engine table. This requires a migration process in a low cost. And we have to ensure data consistency during the migration, which may require a out-of-service period.

View File

@@ -1,175 +0,0 @@
---
Feature Name: table-trait-refactor
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/2065
Date: 2023-08-04
Author: "Ruihang Xia <waynestxia@gmail.com>"
---
Refactor Table Trait
--------------------
# Summary
Refactor `Table` trait to adapt the new region server architecture and make code more straightforward.
# Motivation
The `Table` is designed in the background of both frontend and datanode keeping the same concepts. And all the operations are served by a `Table`. However, in our practice, we found that not all the operations are suitable to be served by a `Table`. For example, the `Table` doesn't hold actual physical data itself, thus operations like write or alter are simply a proxy over underlying regions. And in the recent refactor to datanode ([rfc table-engine-refactor](./2023-07-06-table-engine-refactor.md)), we are changing datanode to region server that is only aware of `Region` things. This also calls for a refactor to the `Table` trait.
# Details
## Definitions
The current `Table` trait contains the following methods:
```rust
pub trait Table {
/// Get a reference to the schema for this table
fn schema(&self) -> SchemaRef;
/// Get a reference to the table info.
fn table_info(&self) -> TableInfoRef;
/// Get the type of this table for metadata/catalog purposes.
fn table_type(&self) -> TableType;
/// Insert values into table.
///
/// Returns number of inserted rows.
async fn insert(&self, _request: InsertRequest) -> Result<usize>;
/// Generate a record batch stream for querying.
async fn scan_to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream>;
/// Tests whether the table provider can make use of any or all filter expressions
/// to optimise data retrieval.
fn supports_filters_pushdown(&self, filters: &[&Expr]) -> Result<Vec<FilterPushDownType>>;
/// Alter table.
async fn alter(&self, _context: AlterContext, _request: &AlterTableRequest) -> Result<()>;
/// Delete rows in the table.
///
/// Returns number of deleted rows.
async fn delete(&self, _request: DeleteRequest) -> Result<usize>;
/// Flush table.
///
/// Options:
/// - region_number: specify region to flush.
/// - wait: Whether to wait until flush is done.
async fn flush(&self, region_number: Option<RegionNumber>, wait: Option<bool>) -> Result<()>;
/// Close the table.
async fn close(&self, _regions: &[RegionNumber]) -> Result<()>;
/// Get region stats in this table.
fn region_stats(&self) -> Result<Vec<RegionStat>>;
/// Return true if contains the region
fn contains_region(&self, _region: RegionNumber) -> Result<bool>;
/// Get statistics for this table, if available
fn statistics(&self) -> Option<TableStatistics>;
async fn compact(&self, region_number: Option<RegionNumber>, wait: Option<bool>) -> Result<()>;
}
```
We can divide those methods into three categories from the perspective of functionality:
| Retrieve Metadata | Manipulate Data | Read Data |
| :------------------------: | :-------------: | :--------------: |
| `schema` | `insert` | `scan_to_stream` |
| `table_info` | `alter` | |
| `table_type` | `delete` | |
| `supports_filter_pushdown` | `flush` | |
| `region_stats` | `close` | |
| `contains_region` | `compact` | |
| `statistics` | | |
And considering most of the access to metadata happens in frontend, like route or query; and all the persisted data are stored in regions; while only the query engine needs to read data. We can divide the `Table` trait into three concepts:
- struct `Table` provides metadata:
```rust
impl Table {
/// Get a reference to the schema for this table
fn schema(&self) -> SchemaRef;
/// Get a reference to the table info.
fn table_info(&self) -> TableInfoRef;
/// Get the type of this table for metadata/catalog purposes.
fn table_type(&self) -> TableType;
/// Get statistics for this table, if available
fn statistics(&self) -> Option<TableStatistics>;
fn to_data_source(&self) -> DataSourceRef;
}
```
- Requests to region server
- `InsertRequest`
- `AlterRequest`
- `DeleteRequest`
- `FlushRequest`
- `CompactRequest`
- `CloseRequest`
- trait `DataSource` provides data (`RecordBatch`)
```rust
trait DataSource {
fn get_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream>;
}
```
## Use `Table`
`Table` will only be used in frontend. It's constructed from the `OpenTableRequest` or `CreateTableRequest`.
`Table` also provides a method `to_data_source` to generate a `DataSource` from itself. But this method is only for non-`TableType::Base` tables (i.e., `TableType::View` and `TableType::Temporary`) because `TableType::Base` table doesn't hold actual data itself. Its `DataSource` should be constructed from the `Region` directly (in other words, it's a remote query).
And it requires some extra information to construct a `DataSource`, named `TableSourceProvider`:
```rust
type TableFactory = Arc<dyn Fn() -> DataSourceRef>;
pub enum TableSourceProvider {
Base,
View(LogicalPlan),
Temporary(TableFactory),
}
```
## Use `DataSource`
`DataSource` will be adapted to the `TableProvider` from DataFusion that can be `scan()`ed in a `TableScan` plan.
In frontend this is done in the planning phase. And datanode will have one implementation for `Region` to generate record batch stream.
## Interact with RegionServer
Previously, persisted state change operations were through the old `Table` trait, like said before. Now they will come from the action source, like the procedure or protocol handler directly to the region server. E.g., on alter table, the corresponding procedure will generate its `AlterRequest` and send it to regions. Or write request will be split in frontend handler, and sent to regions. `Table` only provides necessary metadata like route information if needed, but not the necessary part anymore.
## Implement temporary table
Temporary table is a special table that doesn't revolves to any persistent physical region. Examples are:
- the `Numbers` table for testing, which produces a record batch that contains 0-100 integers.
- tables in information schema. It is an interface for querying catalog's metadata. The contents are generated on the fly with information from `CatalogManager`. The `CatalogManager` can be held in `TableFactory`.
- Function table that produces data generated by a formula or a function. Like something that always `sin(current_timestamp())`.
## Relationship among those components
Here is a diagram to show the relationship among those components, and how they interact with each other.
```mermaid
erDiagram
CatalogManager ||--|{ Table : manages
Table ||--|{ DataStream : generates
Table ||--|{ Region : routes
Region ||--|{ DataStream : implements
DataStream }|..|| QueryEngine : adapts-to
Procedure ||--|{ Region : requests
Protocol ||--|{ Region : writes
Protocol ||--|{ QueryEngine : queries
```
# Drawback
This is a breaking change.

View File

@@ -1,90 +0,0 @@
---
Feature Name: Update Metadata in single transaction
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/1715
Date: 2023-08-13
Author: "Feng Yangsen <fengys1996@gmail.com>, Xu Wenkang <wenymedia@gmail.com>"
---
# Summary
Update Metadata in single transaction.
# Motivation
Currently, multiple transactions are involved during the procedure. This implementation is inefficient, and it's hard to make data consistent. Therefore, We can update multiple metadata in a single transaction.
# Details
Now we have the following table metadata keys:
**TableInfo**
```rust
// __table_info/{table_id}
pub struct TableInfoKey {
table_id: TableId,
}
pub struct TableInfoValue {
pub table_info: RawTableInfo,
version: u64,
}
```
**TableRoute**
```rust
// __table_route/{table_id}
pub struct NextTableRouteKey {
table_id: TableId,
}
pub struct TableRoute {
pub region_routes: Vec<RegionRoute>,
}
```
**DatanodeTable**
```rust
// __table_route/{datanode_id}/{table_id}
pub struct DatanodeTableKey {
datanode_id: DatanodeId,
table_id: TableId,
}
pub struct DatanodeTableValue {
pub table_id: TableId,
pub regions: Vec<RegionNumber>,
version: u64,
}
```
**TableNameKey**
```rust
// __table_name/{CatalogName}/{SchemaName}/{TableName}
pub struct TableNameKey<'a> {
pub catalog: &'a str,
pub schema: &'a str,
pub table: &'a str,
}
pub struct TableNameValue {
table_id: TableId,
}
```
These table metadata only updates in the following operations.
## Region Failover
It needs to update `TableRoute` key and `DatanodeTable` keys. If the `TableRoute` equals the Snapshot of `TableRoute` submitting the Failover task, then we can safely update these keys.
After submitting Failover tasks to acquire locks for execution, the `TableRoute` may be updated by another task. After acquiring the lock, we can get the latest `TableRoute` again and then execute it if needed.
## Create Table DDL
Creates all of the above keys. `TableRoute`, `TableInfo`, should be empty.
The **TableNameKey**'s lock will be held by the procedure framework.
## Drop Table DDL
`TableInfoKey` and `NextTableRouteKey` will be added with `__removed-` prefix, and the other above keys will be deleted. The transaction will not compare any keys.
## Alter Table DDL
1. Rename table, updates `TableInfo` and `TableName`. Compares `TableInfo`, and the new `TableNameKey` should be empty, and TableInfo should equal the Snapshot when submitting DDL.
The old and new **TableNameKey**'s lock will be held by the procedure framework.
2. Alter table, updates `TableInfo`. `TableInfo` should equal the Snapshot when submitting DDL.

View File

@@ -1,2 +1,2 @@
[toolchain] [toolchain]
channel = "nightly-2023-08-07" channel = "nightly-2023-05-03"

View File

@@ -2,14 +2,14 @@
# This script is used to download built dashboard assets from the "GreptimeTeam/dashboard" repository. # This script is used to download built dashboard assets from the "GreptimeTeam/dashboard" repository.
set -e -x set -e
declare -r SCRIPT_DIR=$(cd $(dirname ${0}) >/dev/null 2>&1 && pwd) declare -r SCRIPT_DIR=$(cd $(dirname ${0}) >/dev/null 2>&1 && pwd)
declare -r ROOT_DIR=$(dirname ${SCRIPT_DIR}) declare -r ROOT_DIR=$(dirname ${SCRIPT_DIR})
declare -r STATIC_DIR="$ROOT_DIR/src/servers/dashboard" declare -r STATIC_DIR="$ROOT_DIR/src/servers/dashboard"
OUT_DIR="${1:-$SCRIPT_DIR}" OUT_DIR="${1:-$SCRIPT_DIR}"
RELEASE_VERSION="$(cat $STATIC_DIR/VERSION | tr -d '\t\r\n ')" RELEASE_VERSION="$(cat $STATIC_DIR/VERSION)"
echo "Downloading assets to dir: $OUT_DIR" echo "Downloading assets to dir: $OUT_DIR"
cd $OUT_DIR cd $OUT_DIR

View File

@@ -61,16 +61,7 @@ if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
fi fi
echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}" echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}"
PACKAGE_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}.tar.gz"
if [ -n "${PACKAGE_NAME}" ]; then wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${BIN}-${OS_TYPE}-${ARCH_TYPE}.tgz"
wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}" tar xvf ${BIN}-${OS_TYPE}-${ARCH_TYPE}.tgz && rm ${BIN}-${OS_TYPE}-${ARCH_TYPE}.tgz && echo "Run './${BIN} --help' to get started"
# Extract the binary and clean the rest.
tar xvf "${PACKAGE_NAME}" && \
mv "${PACKAGE_NAME%.tar.gz}/${BIN}" "${PWD}" && \
rm -r "${PACKAGE_NAME}" && \
rm -r "${PACKAGE_NAME%.tar.gz}" && \
echo "Run './${BIN} --help' to get started"
fi
fi fi

View File

@@ -5,10 +5,11 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
common-base = { workspace = true } arrow-flight.workspace = true
common-error = { workspace = true } common-base = { path = "../common/base" }
common-time = { workspace = true } common-error = { path = "../common/error" }
datatypes = { workspace = true } common-time = { path = "../common/time" }
datatypes = { path = "../datatypes" }
greptime-proto.workspace = true greptime-proto.workspace = true
prost.workspace = true prost.workspace = true
snafu = { version = "0.7", features = ["backtraces"] } snafu = { version = "0.7", features = ["backtraces"] }
@@ -16,6 +17,3 @@ tonic.workspace = true
[build-dependencies] [build-dependencies]
tonic-build = "0.9" tonic-build = "0.9"
[dev-dependencies]
paste = "1.0"

View File

@@ -15,7 +15,7 @@
use std::any::Any; use std::any::Any;
use common_error::ext::ErrorExt; use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode; use common_error::prelude::StatusCode;
use datatypes::prelude::ConcreteDataType; use datatypes::prelude::ConcreteDataType;
use snafu::prelude::*; use snafu::prelude::*;
use snafu::Location; use snafu::Location;

File diff suppressed because it is too large Load Diff

View File

@@ -15,7 +15,7 @@
pub mod error; pub mod error;
pub mod helper; pub mod helper;
pub mod prom_store { pub mod prometheus {
pub mod remote { pub mod remote {
pub use greptime_proto::prometheus::remote::*; pub use greptime_proto::prometheus::remote::*;
} }

View File

@@ -1,26 +0,0 @@
[package]
name = "auth"
version.workspace = true
edition.workspace = true
license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
default = []
testing = []
[dependencies]
api.workspace = true
async-trait.workspace = true
common-error.workspace = true
digest = "0.10"
hex = { version = "0.4" }
secrecy = { version = "0.8", features = ["serde", "alloc"] }
sha1 = "0.10"
snafu.workspace = true
sql.workspace = true
tokio.workspace = true
[dev-dependencies]
common-test-util.workspace = true

View File

@@ -1,147 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use digest::Digest;
use secrecy::SecretString;
use sha1::Sha1;
use snafu::{ensure, OptionExt};
use crate::error::{IllegalParamSnafu, InvalidConfigSnafu, Result, UserPasswordMismatchSnafu};
use crate::user_info::DefaultUserInfo;
use crate::user_provider::static_user_provider::{StaticUserProvider, STATIC_USER_PROVIDER};
use crate::{UserInfoRef, UserProviderRef};
pub(crate) const DEFAULT_USERNAME: &str = "greptime";
/// construct a [`UserInfo`] impl with name
/// use default username `greptime` if None is provided
pub fn userinfo_by_name(username: Option<String>) -> UserInfoRef {
DefaultUserInfo::with_name(username.unwrap_or_else(|| DEFAULT_USERNAME.to_string()))
}
pub fn user_provider_from_option(opt: &String) -> Result<UserProviderRef> {
let (name, content) = opt.split_once(':').context(InvalidConfigSnafu {
value: opt.to_string(),
msg: "UserProviderOption must be in format `<option>:<value>`",
})?;
match name {
STATIC_USER_PROVIDER => {
let provider =
StaticUserProvider::try_from(content).map(|p| Arc::new(p) as UserProviderRef)?;
Ok(provider)
}
_ => InvalidConfigSnafu {
value: name.to_string(),
msg: "Invalid UserProviderOption",
}
.fail(),
}
}
type Username<'a> = &'a str;
type HostOrIp<'a> = &'a str;
#[derive(Debug, Clone)]
pub enum Identity<'a> {
UserId(Username<'a>, Option<HostOrIp<'a>>),
}
pub type HashedPassword<'a> = &'a [u8];
pub type Salt<'a> = &'a [u8];
/// Authentication information sent by the client.
pub enum Password<'a> {
PlainText(SecretString),
MysqlNativePassword(HashedPassword<'a>, Salt<'a>),
PgMD5(HashedPassword<'a>, Salt<'a>),
}
pub fn auth_mysql(
auth_data: HashedPassword,
salt: Salt,
username: &str,
save_pwd: &[u8],
) -> Result<()> {
ensure!(
auth_data.len() == 20,
IllegalParamSnafu {
msg: "Illegal mysql password length"
}
);
// ref: https://github.com/mysql/mysql-server/blob/a246bad76b9271cb4333634e954040a970222e0a/sql/auth/password.cc#L62
let hash_stage_2 = double_sha1(save_pwd);
let tmp = sha1_two(salt, &hash_stage_2);
// xor auth_data and tmp
let mut xor_result = [0u8; 20];
for i in 0..20 {
xor_result[i] = auth_data[i] ^ tmp[i];
}
let candidate_stage_2 = sha1_one(&xor_result);
if candidate_stage_2 == hash_stage_2 {
Ok(())
} else {
UserPasswordMismatchSnafu {
username: username.to_string(),
}
.fail()
}
}
fn sha1_two(input_1: &[u8], input_2: &[u8]) -> Vec<u8> {
let mut hasher = Sha1::new();
hasher.update(input_1);
hasher.update(input_2);
hasher.finalize().to_vec()
}
fn sha1_one(data: &[u8]) -> Vec<u8> {
let mut hasher = Sha1::new();
hasher.update(data);
hasher.finalize().to_vec()
}
fn double_sha1(data: &[u8]) -> Vec<u8> {
sha1_one(&sha1_one(data))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sha() {
let sha_1_answer: Vec<u8> = vec![
124, 74, 141, 9, 202, 55, 98, 175, 97, 229, 149, 32, 148, 61, 194, 100, 148, 248, 148,
27,
];
let sha_1 = sha1_one("123456".as_bytes());
assert_eq!(sha_1, sha_1_answer);
let double_sha1_answer: Vec<u8> = vec![
107, 180, 131, 126, 183, 67, 41, 16, 94, 228, 86, 141, 218, 125, 198, 126, 210, 202,
42, 217,
];
let double_sha1 = double_sha1("123456".as_bytes());
assert_eq!(double_sha1, double_sha1_answer);
let sha1_2_answer: Vec<u8> = vec![
132, 115, 215, 211, 99, 186, 164, 206, 168, 152, 217, 192, 117, 47, 240, 252, 142, 244,
37, 204,
];
let sha1_2 = sha1_two("123456".as_bytes(), "654321".as_bytes());
assert_eq!(sha1_2, sha1_2_answer);
}
}

View File

@@ -1,34 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod common;
pub mod error;
mod permission;
mod user_info;
mod user_provider;
#[cfg(feature = "testing")]
pub mod tests;
pub use common::{
auth_mysql, user_provider_from_option, userinfo_by_name, HashedPassword, Identity, Password,
};
pub use permission::{PermissionChecker, PermissionReq, PermissionResp};
pub use user_info::UserInfo;
pub use user_provider::UserProvider;
/// pub type alias
pub type UserInfoRef = std::sync::Arc<dyn UserInfo>;
pub type UserProviderRef = std::sync::Arc<dyn UserProvider>;
pub type PermissionCheckerRef = std::sync::Arc<dyn PermissionChecker>;

View File

@@ -1,64 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt::Debug;
use api::v1::greptime_request::Request;
use sql::statements::statement::Statement;
use crate::error::{PermissionDeniedSnafu, Result};
use crate::{PermissionCheckerRef, UserInfoRef};
#[derive(Debug, Clone)]
pub enum PermissionReq<'a> {
GrpcRequest(&'a Request),
SqlStatement(&'a Statement),
PromQuery,
Opentsdb,
LineProtocol,
PromStoreWrite,
PromStoreRead,
Otlp,
}
#[derive(Debug)]
pub enum PermissionResp {
Allow,
Reject,
}
pub trait PermissionChecker: Send + Sync {
fn check_permission(
&self,
user_info: Option<UserInfoRef>,
req: PermissionReq,
) -> Result<PermissionResp>;
}
impl PermissionChecker for Option<&PermissionCheckerRef> {
fn check_permission(
&self,
user_info: Option<UserInfoRef>,
req: PermissionReq,
) -> Result<PermissionResp> {
match self {
Some(checker) => match checker.check_permission(user_info, req) {
Ok(PermissionResp::Reject) => PermissionDeniedSnafu.fail(),
Ok(PermissionResp::Allow) => Ok(PermissionResp::Allow),
Err(e) => Err(e),
},
None => Ok(PermissionResp::Allow),
}
}
}

View File

@@ -1,47 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::fmt::Debug;
use std::sync::Arc;
use crate::UserInfoRef;
pub trait UserInfo: Debug + Sync + Send {
fn as_any(&self) -> &dyn Any;
fn username(&self) -> &str;
}
#[derive(Debug)]
pub(crate) struct DefaultUserInfo {
username: String,
}
impl DefaultUserInfo {
pub(crate) fn with_name(username: impl Into<String>) -> UserInfoRef {
Arc::new(Self {
username: username.into(),
})
}
}
impl UserInfo for DefaultUserInfo {
fn as_any(&self) -> &dyn Any {
self
}
fn username(&self) -> &str {
self.username.as_str()
}
}

View File

@@ -1,46 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) mod static_user_provider;
use crate::common::{Identity, Password};
use crate::error::Result;
use crate::UserInfoRef;
#[async_trait::async_trait]
pub trait UserProvider: Send + Sync {
fn name(&self) -> &str;
/// [`authenticate`] checks whether a user is valid and allowed to access the database.
async fn authenticate(&self, id: Identity<'_>, password: Password<'_>) -> Result<UserInfoRef>;
/// [`authorize`] checks whether a connection request
/// from a certain user to a certain catalog/schema is legal.
/// This method should be called after [`authenticate`].
async fn authorize(&self, catalog: &str, schema: &str, user_info: &UserInfoRef) -> Result<()>;
/// [`auth`] is a combination of [`authenticate`] and [`authorize`].
/// In most cases it's preferred for both convenience and performance.
async fn auth(
&self,
id: Identity<'_>,
password: Password<'_>,
catalog: &str,
schema: &str,
) -> Result<UserInfoRef> {
let user_info = self.authenticate(id, password).await?;
self.authorize(catalog, schema, &user_info).await?;
Ok(user_info)
}
}

View File

@@ -1,61 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![feature(assert_matches)]
use std::assert_matches::assert_matches;
use std::sync::Arc;
use api::v1::greptime_request::Request;
use auth::error::Error::InternalState;
use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq, PermissionResp, UserInfoRef};
use sql::statements::show::{ShowDatabases, ShowKind};
use sql::statements::statement::Statement;
struct DummyPermissionChecker;
impl PermissionChecker for DummyPermissionChecker {
fn check_permission(
&self,
_user_info: Option<UserInfoRef>,
req: PermissionReq,
) -> auth::error::Result<PermissionResp> {
match req {
PermissionReq::GrpcRequest(_) => Ok(PermissionResp::Allow),
PermissionReq::SqlStatement(_) => Ok(PermissionResp::Reject),
_ => Err(InternalState {
msg: "testing".to_string(),
}),
}
}
}
#[test]
fn test_permission_checker() {
let checker: PermissionCheckerRef = Arc::new(DummyPermissionChecker);
let grpc_result = checker.check_permission(
None,
PermissionReq::GrpcRequest(&Request::Query(Default::default())),
);
assert_matches!(grpc_result, Ok(PermissionResp::Allow));
let sql_result = checker.check_permission(
None,
PermissionReq::SqlStatement(&Statement::ShowDatabases(ShowDatabases::new(ShowKind::All))),
);
assert_matches!(sql_result, Ok(PermissionResp::Reject));
let err_result = checker.check_permission(None, PermissionReq::Opentsdb);
assert_matches!(err_result, Err(InternalState { msg }) if msg == "testing");
}

View File

@@ -8,45 +8,48 @@ license.workspace = true
testing = [] testing = []
[dependencies] [dependencies]
api = { workspace = true } api = { path = "../api" }
arc-swap = "1.0" arc-swap = "1.0"
arrow-schema.workspace = true arrow-schema.workspace = true
async-stream.workspace = true async-stream.workspace = true
async-trait = "0.1" async-trait = "0.1"
common-catalog = { workspace = true } backoff = { version = "0.4", features = ["tokio"] }
common-error = { workspace = true } common-catalog = { path = "../common/catalog" }
common-grpc = { workspace = true } common-error = { path = "../common/error" }
common-meta = { workspace = true } common-grpc = { path = "../common/grpc" }
common-query = { workspace = true } common-meta = { path = "../common/meta" }
common-recordbatch = { workspace = true } common-query = { path = "../common/query" }
common-runtime = { workspace = true } common-recordbatch = { path = "../common/recordbatch" }
common-telemetry = { workspace = true } common-runtime = { path = "../common/runtime" }
common-time = { workspace = true } common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
dashmap = "5.4" dashmap = "5.4"
datafusion.workspace = true datafusion.workspace = true
datatypes = { workspace = true } datatypes = { path = "../datatypes" }
futures = "0.3" futures = "0.3"
futures-util.workspace = true futures-util.workspace = true
lazy_static.workspace = true key-lock = "0.1"
meta-client = { workspace = true } lazy_static = "1.4"
meta-client = { path = "../meta-client" }
metrics.workspace = true metrics.workspace = true
moka = { version = "0.11", features = ["future"] } moka = { version = "0.11", features = ["future"] }
parking_lot = "0.12" parking_lot = "0.12"
regex.workspace = true regex = "1.6"
serde.workspace = true serde = "1.0"
serde_json = "1.0" serde_json = "1.0"
session = { workspace = true } session = { path = "../session" }
snafu = { version = "0.7", features = ["backtraces"] } snafu = { version = "0.7", features = ["backtraces"] }
store-api = { workspace = true } storage = { path = "../storage" }
table = { workspace = true } store-api = { path = "../store-api" }
table = { path = "../table" }
tokio.workspace = true tokio.workspace = true
[dev-dependencies] [dev-dependencies]
catalog = { workspace = true, features = ["testing"] } catalog = { path = ".", features = ["testing"] }
common-test-util = { path = "../common/test-util" }
chrono.workspace = true chrono.workspace = true
common-test-util = { workspace = true } log-store = { path = "../log-store" }
log-store = { workspace = true } mito = { path = "../mito", features = ["test"] }
mito = { workspace = true, features = ["test"] } object-store = { path = "../object-store" }
object-store = { workspace = true } storage = { path = "../storage" }
storage = { workspace = true }
tokio.workspace = true tokio.workspace = true

View File

@@ -16,10 +16,10 @@ use std::any::Any;
use std::fmt::Debug; use std::fmt::Debug;
use common_error::ext::{BoxedError, ErrorExt}; use common_error::ext::{BoxedError, ErrorExt};
use common_error::status_code::StatusCode; use common_error::prelude::{Snafu, StatusCode};
use datafusion::error::DataFusionError; use datafusion::error::DataFusionError;
use datatypes::prelude::ConcreteDataType; use datatypes::prelude::ConcreteDataType;
use snafu::{Location, Snafu}; use snafu::Location;
use tokio::task::JoinError; use tokio::task::JoinError;
use crate::DeregisterTableRequest; use crate::DeregisterTableRequest;
@@ -27,19 +27,6 @@ use crate::DeregisterTableRequest;
#[derive(Debug, Snafu)] #[derive(Debug, Snafu)]
#[snafu(visibility(pub))] #[snafu(visibility(pub))]
pub enum Error { pub enum Error {
#[snafu(display("Failed to list catalogs, source: {}", source))]
ListCatalogs {
location: Location,
source: BoxedError,
},
#[snafu(display("Failed to list {}'s schemas, source: {}", catalog, source))]
ListSchemas {
location: Location,
catalog: String,
source: BoxedError,
},
#[snafu(display( #[snafu(display(
"Failed to re-compile script due to internal error, source: {}", "Failed to re-compile script due to internal error, source: {}",
source source
@@ -205,18 +192,11 @@ pub enum Error {
source: BoxedError, source: BoxedError,
}, },
#[snafu(display(
"Failed to upgrade weak catalog manager reference. location: {}",
location
))]
UpgradeWeakCatalogManagerRef { location: Location },
#[snafu(display("Failed to execute system catalog table scan, source: {}", source))] #[snafu(display("Failed to execute system catalog table scan, source: {}", source))]
SystemCatalogTableScanExec { SystemCatalogTableScanExec {
location: Location, location: Location,
source: common_query::error::Error, source: common_query::error::Error,
}, },
#[snafu(display("Cannot parse catalog value, source: {}", source))] #[snafu(display("Cannot parse catalog value, source: {}", source))]
InvalidCatalogValue { InvalidCatalogValue {
location: Location, location: Location,
@@ -256,12 +236,6 @@ pub enum Error {
#[snafu(display("A generic error has occurred, msg: {}", msg))] #[snafu(display("A generic error has occurred, msg: {}", msg))]
Generic { msg: String, location: Location }, Generic { msg: String, location: Location },
#[snafu(display("Table metadata manager error: {}", source))]
TableMetadataManager {
source: common_meta::error::Error,
location: Location,
},
} }
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T> = std::result::Result<T, Error>;
@@ -282,9 +256,7 @@ impl ErrorExt for Error {
| Error::EmptyValue { .. } | Error::EmptyValue { .. }
| Error::ValueDeserialize { .. } => StatusCode::StorageUnavailable, | Error::ValueDeserialize { .. } => StatusCode::StorageUnavailable,
Error::Generic { .. } Error::Generic { .. } | Error::SystemCatalogTypeMismatch { .. } => StatusCode::Internal,
| Error::SystemCatalogTypeMismatch { .. }
| Error::UpgradeWeakCatalogManagerRef { .. } => StatusCode::Internal,
Error::ReadSystemCatalog { source, .. } | Error::CreateRecordBatch { source, .. } => { Error::ReadSystemCatalog { source, .. } | Error::CreateRecordBatch { source, .. } => {
source.status_code() source.status_code()
@@ -297,10 +269,6 @@ impl ErrorExt for Error {
StatusCode::InvalidArguments StatusCode::InvalidArguments
} }
Error::ListCatalogs { source, .. } | Error::ListSchemas { source, .. } => {
source.status_code()
}
Error::OpenSystemCatalog { source, .. } Error::OpenSystemCatalog { source, .. }
| Error::CreateSystemCatalog { source, .. } | Error::CreateSystemCatalog { source, .. }
| Error::InsertCatalogRecord { source, .. } | Error::InsertCatalogRecord { source, .. }
@@ -321,7 +289,6 @@ impl ErrorExt for Error {
Error::Unimplemented { .. } | Error::NotSupported { .. } => StatusCode::Unsupported, Error::Unimplemented { .. } | Error::NotSupported { .. } => StatusCode::Unsupported,
Error::QueryAccessDenied { .. } => StatusCode::AccessDenied, Error::QueryAccessDenied { .. } => StatusCode::AccessDenied,
Error::Datafusion { .. } => StatusCode::EngineExecuteQuery, Error::Datafusion { .. } => StatusCode::EngineExecuteQuery,
Error::TableMetadataManager { source, .. } => source.status_code(),
} }
} }

392
src/catalog/src/helper.rs Normal file
View File

@@ -0,0 +1,392 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use common_catalog::error::{
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
};
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize, Serializer};
use snafu::{ensure, OptionExt, ResultExt};
use table::metadata::{RawTableInfo, TableId, TableVersion};
pub const CATALOG_KEY_PREFIX: &str = "__c";
pub const SCHEMA_KEY_PREFIX: &str = "__s";
pub const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
pub const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
const ALPHANUMERICS_NAME_PATTERN: &str = "[a-zA-Z_][a-zA-Z0-9_]*";
lazy_static! {
static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
"^{CATALOG_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})$"
))
.unwrap();
}
lazy_static! {
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
"^{SCHEMA_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})$"
))
.unwrap();
}
lazy_static! {
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
"^{TABLE_GLOBAL_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})$"
))
.unwrap();
}
lazy_static! {
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
"^{TABLE_REGIONAL_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-([0-9]+)$"
))
.unwrap();
}
pub fn build_catalog_prefix() -> String {
format!("{CATALOG_KEY_PREFIX}-")
}
pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
format!("{SCHEMA_KEY_PREFIX}-{}-", catalog_name.as_ref())
}
pub fn build_table_global_prefix(
catalog_name: impl AsRef<str>,
schema_name: impl AsRef<str>,
) -> String {
format!(
"{TABLE_GLOBAL_KEY_PREFIX}-{}-{}-",
catalog_name.as_ref(),
schema_name.as_ref()
)
}
pub fn build_table_regional_prefix(
catalog_name: impl AsRef<str>,
schema_name: impl AsRef<str>,
) -> String {
format!(
"{}-{}-{}-",
TABLE_REGIONAL_KEY_PREFIX,
catalog_name.as_ref(),
schema_name.as_ref()
)
}
/// Table global info has only one key across all datanodes so it does not have `node_id` field.
#[derive(Clone, Hash, Eq, PartialEq)]
pub struct TableGlobalKey {
pub catalog_name: String,
pub schema_name: String,
pub table_name: String,
}
impl Display for TableGlobalKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(TABLE_GLOBAL_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.write_str(&self.schema_name)?;
f.write_str("-")?;
f.write_str(&self.table_name)
}
}
impl TableGlobalKey {
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
let key = s.as_ref();
let captures = TABLE_GLOBAL_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 4, InvalidCatalogSnafu { key });
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
table_name: captures[3].to_string(),
})
}
pub fn to_raw_key(&self) -> Vec<u8> {
self.to_string().into_bytes()
}
pub fn try_from_raw_key(key: &[u8]) -> Result<Self, Error> {
Self::parse(String::from_utf8_lossy(key))
}
}
/// Table global info contains necessary info for a datanode to create table regions, including
/// table id, table meta(schema...), region id allocation across datanodes.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct TableGlobalValue {
/// Id of datanode that created the global table info kv. only for debugging.
pub node_id: u64,
/// Allocation of region ids across all datanodes.
pub regions_id_map: HashMap<u64, Vec<u32>>,
pub table_info: RawTableInfo,
}
impl TableGlobalValue {
pub fn table_id(&self) -> TableId {
self.table_info.ident.table_id
}
pub fn engine(&self) -> &str {
&self.table_info.meta.engine
}
}
/// Table regional info that varies between datanode, so it contains a `node_id` field.
pub struct TableRegionalKey {
pub catalog_name: String,
pub schema_name: String,
pub table_name: String,
pub node_id: u64,
}
impl Display for TableRegionalKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(TABLE_REGIONAL_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.write_str(&self.schema_name)?;
f.write_str("-")?;
f.write_str(&self.table_name)?;
f.write_str("-")?;
f.serialize_u64(self.node_id)
}
}
impl TableRegionalKey {
pub fn parse<S: AsRef<str>>(s: S) -> Result<Self, Error> {
let key = s.as_ref();
let captures = TABLE_REGIONAL_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 5, InvalidCatalogSnafu { key });
let node_id = captures[4]
.to_string()
.parse()
.map_err(|_| InvalidCatalogSnafu { key }.build())?;
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
table_name: captures[3].to_string(),
node_id,
})
}
}
/// Regional table info of specific datanode, including table version on that datanode and
/// region ids allocated by metasrv.
#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct TableRegionalValue {
pub version: TableVersion,
pub regions_ids: Vec<u32>,
pub engine_name: Option<String>,
}
pub struct CatalogKey {
pub catalog_name: String,
}
impl Display for CatalogKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(CATALOG_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)
}
}
impl CatalogKey {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
let key = s.as_ref();
let captures = CATALOG_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 2, InvalidCatalogSnafu { key });
Ok(Self {
catalog_name: captures[1].to_string(),
})
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct CatalogValue;
pub struct SchemaKey {
pub catalog_name: String,
pub schema_name: String,
}
impl Display for SchemaKey {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(SCHEMA_KEY_PREFIX)?;
f.write_str("-")?;
f.write_str(&self.catalog_name)?;
f.write_str("-")?;
f.write_str(&self.schema_name)
}
}
impl SchemaKey {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
let key = s.as_ref();
let captures = SCHEMA_KEY_PATTERN
.captures(key)
.context(InvalidCatalogSnafu { key })?;
ensure!(captures.len() == 3, InvalidCatalogSnafu { key });
Ok(Self {
catalog_name: captures[1].to_string(),
schema_name: captures[2].to_string(),
})
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct SchemaValue;
macro_rules! define_catalog_value {
( $($val_ty: ty), *) => {
$(
impl $val_ty {
pub fn parse(s: impl AsRef<str>) -> Result<Self, Error> {
serde_json::from_str(s.as_ref())
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
}
pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self, Error> {
Self::parse(&String::from_utf8_lossy(bytes.as_ref()))
}
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
Ok(serde_json::to_string(self)
.context(SerializeCatalogEntryValueSnafu)?
.into_bytes())
}
}
)*
}
}
define_catalog_value!(
TableRegionalValue,
TableGlobalValue,
CatalogValue,
SchemaValue
);
#[cfg(test)]
mod tests {
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
use table::metadata::{RawTableMeta, TableIdent, TableType};
use super::*;
#[test]
fn test_parse_catalog_key() {
let key = "__c-C";
let catalog_key = CatalogKey::parse(key).unwrap();
assert_eq!("C", catalog_key.catalog_name);
assert_eq!(key, catalog_key.to_string());
}
#[test]
fn test_parse_schema_key() {
let key = "__s-C-S";
let schema_key = SchemaKey::parse(key).unwrap();
assert_eq!("C", schema_key.catalog_name);
assert_eq!("S", schema_key.schema_name);
assert_eq!(key, schema_key.to_string());
}
#[test]
fn test_parse_table_key() {
let key = "__tg-C-S-T";
let entry = TableGlobalKey::parse(key).unwrap();
assert_eq!("C", entry.catalog_name);
assert_eq!("S", entry.schema_name);
assert_eq!("T", entry.table_name);
assert_eq!(key, &entry.to_string());
}
#[test]
fn test_build_prefix() {
assert_eq!("__c-", build_catalog_prefix());
assert_eq!("__s-CATALOG-", build_schema_prefix("CATALOG"));
assert_eq!(
"__tg-CATALOG-SCHEMA-",
build_table_global_prefix("CATALOG", "SCHEMA")
);
}
#[test]
fn test_serialize_schema() {
let schema = Schema::new(vec![ColumnSchema::new(
"name",
ConcreteDataType::string_datatype(),
true,
)]);
let meta = RawTableMeta {
schema: RawSchema::from(&schema),
engine: "mito".to_string(),
created_on: chrono::DateTime::default(),
primary_key_indices: vec![0, 1],
next_column_id: 3,
engine_options: Default::default(),
value_indices: vec![2, 3],
options: Default::default(),
region_numbers: vec![1],
};
let table_info = RawTableInfo {
ident: TableIdent {
table_id: 42,
version: 1,
},
name: "table_1".to_string(),
desc: Some("blah".to_string()),
catalog_name: "catalog_1".to_string(),
schema_name: "schema_1".to_string(),
meta,
table_type: TableType::Base,
};
let value = TableGlobalValue {
node_id: 0,
regions_id_map: HashMap::from([(0, vec![1, 2, 3])]),
table_info,
};
let serialized = serde_json::to_string(&value).unwrap();
let deserialized = TableGlobalValue::parse(serialized).unwrap();
assert_eq!(value, deserialized);
}
#[test]
fn test_table_global_value_compatibility() {
let s = r#"{"node_id":1,"regions_id_map":{"1":[0]},"table_info":{"ident":{"table_id":1098,"version":1},"name":"container_cpu_limit","desc":"Created on insertion","catalog_name":"greptime","schema_name":"dd","meta":{"schema":{"column_schemas":[{"name":"container_id","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"container_name","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"docker_image","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"host","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"image_name","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"image_tag","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"interval","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"runtime","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"short_image","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"type","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"dd_value","data_type":{"Float64":{}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"ts","data_type":{"Timestamp":{"Millisecond":null}},"is_nullable":false,"is_time_index":true,"default_constraint":null,"metadata":{"greptime:time_index":"true"}},{"name":"git.repository_url","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}}],"timestamp_index":11,"version":1},"primary_key_indices":[0,1,2,3,4,5,6,7,8,9,12],"value_indices":[10,11],"engine":"mito","next_column_id":12,"region_numbers":[],"engine_options":{},"options":{},"created_on":"1970-01-01T00:00:00Z"},"table_type":"Base"}}"#;
TableGlobalValue::parse(s).unwrap();
}
}

View File

@@ -15,162 +15,150 @@
mod columns; mod columns;
mod tables; mod tables;
use std::collections::HashMap; use std::any::Any;
use std::sync::{Arc, Weak}; use std::sync::Arc;
use common_catalog::consts::INFORMATION_SCHEMA_NAME; use async_trait::async_trait;
use common_error::ext::BoxedError; use common_error::prelude::BoxedError;
use common_query::physical_plan::PhysicalPlanRef;
use common_query::prelude::Expr;
use common_recordbatch::{RecordBatchStreamAdaptor, SendableRecordBatchStream}; use common_recordbatch::{RecordBatchStreamAdaptor, SendableRecordBatchStream};
use datatypes::schema::SchemaRef; use datatypes::schema::SchemaRef;
use futures_util::StreamExt; use futures_util::StreamExt;
use snafu::ResultExt; use snafu::ResultExt;
use store_api::data_source::DataSource; use store_api::storage::ScanRequest;
use store_api::storage::{ScanRequest, TableId};
use table::error::{SchemaConversionSnafu, TablesRecordBatchSnafu}; use table::error::{SchemaConversionSnafu, TablesRecordBatchSnafu};
use table::metadata::{ use table::{Result as TableResult, Table, TableRef};
FilterPushDownType, TableInfoBuilder, TableInfoRef, TableMetaBuilder, TableType,
};
use table::thin_table::{ThinTable, ThinTableAdapter};
use table::TableRef;
use self::columns::InformationSchemaColumns; use self::columns::InformationSchemaColumns;
use crate::error::Result; use crate::error::Result;
use crate::information_schema::tables::InformationSchemaTables; use crate::information_schema::tables::InformationSchemaTables;
use crate::CatalogManager; use crate::{CatalogProviderRef, SchemaProvider};
pub const TABLES: &str = "tables"; const TABLES: &str = "tables";
pub const COLUMNS: &str = "columns"; const COLUMNS: &str = "columns";
pub struct InformationSchemaProvider { pub(crate) struct InformationSchemaProvider {
catalog_name: String, catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>, catalog_provider: CatalogProviderRef,
tables: Vec<String>,
} }
impl InformationSchemaProvider { impl InformationSchemaProvider {
pub fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self { pub(crate) fn new(catalog_name: String, catalog_provider: CatalogProviderRef) -> Self {
Self { Self {
catalog_name, catalog_name,
catalog_manager, catalog_provider,
tables: vec![TABLES.to_string(), COLUMNS.to_string()],
} }
} }
}
/// Build a map of [TableRef] in information schema. #[async_trait]
/// Including `tables` and `columns`. impl SchemaProvider for InformationSchemaProvider {
pub fn build( fn as_any(&self) -> &dyn Any {
catalog_name: String, self
catalog_manager: Weak<dyn CatalogManager>,
) -> HashMap<String, TableRef> {
let provider = Self::new(catalog_name, catalog_manager);
let mut schema = HashMap::new();
schema.insert(TABLES.to_owned(), provider.table(TABLES).unwrap());
schema.insert(COLUMNS.to_owned(), provider.table(COLUMNS).unwrap());
schema
} }
pub fn table(&self, name: &str) -> Option<TableRef> { async fn table_names(&self) -> Result<Vec<String>> {
self.information_table(name).map(|table| { Ok(self.tables.clone())
let table_info = Self::table_info(self.catalog_name.clone(), &table);
let filter_pushdown = FilterPushDownType::Unsupported;
let thin_table = ThinTable::new(table_info, filter_pushdown);
let data_source = Arc::new(InformationTableDataSource::new(table));
Arc::new(ThinTableAdapter::new(thin_table, data_source)) as _
})
} }
fn information_table(&self, name: &str) -> Option<InformationTableRef> { async fn table(&self, name: &str) -> Result<Option<TableRef>> {
match name.to_ascii_lowercase().as_str() { let stream_builder = match name.to_ascii_lowercase().as_ref() {
TABLES => Some(Arc::new(InformationSchemaTables::new( TABLES => Arc::new(InformationSchemaTables::new(
self.catalog_name.clone(), self.catalog_name.clone(),
self.catalog_manager.clone(), self.catalog_provider.clone(),
)) as _), )) as _,
COLUMNS => Some(Arc::new(InformationSchemaColumns::new( COLUMNS => Arc::new(InformationSchemaColumns::new(
self.catalog_name.clone(), self.catalog_name.clone(),
self.catalog_manager.clone(), self.catalog_provider.clone(),
)) as _), )) as _,
_ => None, _ => {
} return Ok(None);
} }
fn table_info(catalog_name: String, table: &InformationTableRef) -> TableInfoRef {
let table_meta = TableMetaBuilder::default()
.schema(table.schema())
.primary_key_indices(vec![])
.next_column_id(0)
.build()
.unwrap();
let table_info = TableInfoBuilder::default()
.table_id(table.table_id())
.name(table.table_name().to_owned())
.catalog_name(catalog_name)
.schema_name(INFORMATION_SCHEMA_NAME.to_owned())
.meta(table_meta)
.table_type(table.table_type())
.build()
.unwrap();
Arc::new(table_info)
}
}
trait InformationTable {
fn table_id(&self) -> TableId;
fn table_name(&self) -> &'static str;
fn schema(&self) -> SchemaRef;
fn to_stream(&self) -> Result<SendableRecordBatchStream>;
fn table_type(&self) -> TableType {
TableType::Temporary
}
}
type InformationTableRef = Arc<dyn InformationTable + Send + Sync>;
struct InformationTableDataSource {
table: InformationTableRef,
}
impl InformationTableDataSource {
fn new(table: InformationTableRef) -> Self {
Self { table }
}
fn try_project(&self, projection: &[usize]) -> std::result::Result<SchemaRef, BoxedError> {
let schema = self
.table
.schema()
.try_project(projection)
.context(SchemaConversionSnafu)
.map_err(BoxedError::new)?;
Ok(Arc::new(schema))
}
}
impl DataSource for InformationTableDataSource {
fn get_stream(
&self,
request: ScanRequest,
) -> std::result::Result<SendableRecordBatchStream, BoxedError> {
let projection = request.projection;
let projected_schema = match &projection {
Some(projection) => self.try_project(projection)?,
None => self.table.schema(),
}; };
Ok(Some(Arc::new(InformationTable::new(stream_builder))))
}
async fn table_exist(&self, name: &str) -> Result<bool> {
let normalized_name = name.to_ascii_lowercase();
Ok(self.tables.contains(&normalized_name))
}
}
// TODO(ruihang): make it a more generic trait:
// https://github.com/GreptimeTeam/greptimedb/pull/1639#discussion_r1205001903
pub trait InformationStreamBuilder: Send + Sync {
fn to_stream(&self) -> Result<SendableRecordBatchStream>;
fn schema(&self) -> SchemaRef;
}
pub struct InformationTable {
stream_builder: Arc<dyn InformationStreamBuilder>,
}
impl InformationTable {
pub fn new(stream_builder: Arc<dyn InformationStreamBuilder>) -> Self {
Self { stream_builder }
}
}
#[async_trait]
impl Table for InformationTable {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
self.stream_builder.schema()
}
fn table_info(&self) -> table::metadata::TableInfoRef {
unreachable!("Should not call table_info() of InformationTable directly")
}
/// Scan the table and returns a SendableRecordBatchStream.
async fn scan(
&self,
_projection: Option<&Vec<usize>>,
_filters: &[Expr],
// limit can be used to reduce the amount scanned
// from the datasource as a performance optimization.
// If set, it contains the amount of rows needed by the `LogicalPlan`,
// The datasource should return *at least* this number of rows if available.
_limit: Option<usize>,
) -> TableResult<PhysicalPlanRef> {
unimplemented!()
}
async fn scan_to_stream(&self, request: ScanRequest) -> TableResult<SendableRecordBatchStream> {
let projection = request.projection;
let projected_schema = if let Some(projection) = &projection {
Arc::new(
self.schema()
.try_project(projection)
.context(SchemaConversionSnafu)?,
)
} else {
self.schema()
};
let stream = self let stream = self
.table .stream_builder
.to_stream() .to_stream()
.map_err(BoxedError::new) .map_err(BoxedError::new)
.context(TablesRecordBatchSnafu) .context(TablesRecordBatchSnafu)?
.map_err(BoxedError::new)? .map(move |batch| {
.map(move |batch| match &projection { batch.and_then(|batch| {
Some(p) => batch.and_then(|b| b.try_project(p)), if let Some(projection) = &projection {
None => batch, batch.try_project(projection)
} else {
Ok(batch)
}
})
}); });
let stream = RecordBatchStreamAdaptor { let stream = RecordBatchStreamAdaptor {
schema: projected_schema, schema: projected_schema,
stream: Box::pin(stream), stream: Box::pin(stream),

View File

@@ -12,38 +12,33 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::sync::{Arc, Weak}; use std::sync::Arc;
use arrow_schema::SchemaRef as ArrowSchemaRef; use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::{ use common_catalog::consts::{
INFORMATION_SCHEMA_COLUMNS_TABLE_ID, INFORMATION_SCHEMA_NAME, SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_FIELD, SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX,
SEMANTIC_TYPE_PRIMARY_KEY, SEMANTIC_TYPE_TIME_INDEX,
}; };
use common_error::ext::BoxedError; use common_error::prelude::BoxedError;
use common_query::physical_plan::TaskContext; use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter; use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream}; use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::datasource::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter; use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream; use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, DataType}; use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::scalars::ScalarVectorBuilder; use datatypes::scalars::ScalarVectorBuilder;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::{StringVectorBuilder, VectorRef}; use datatypes::vectors::{StringVectorBuilder, VectorRef};
use snafu::{OptionExt, ResultExt}; use snafu::ResultExt;
use store_api::storage::TableId;
use super::tables::InformationSchemaTables; use super::InformationStreamBuilder;
use super::{InformationTable, COLUMNS, TABLES}; use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::error::{ use crate::CatalogProviderRef;
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::CatalogManager;
pub(super) struct InformationSchemaColumns { pub(super) struct InformationSchemaColumns {
schema: SchemaRef, schema: SchemaRef,
catalog_name: String, catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>, catalog_provider: CatalogProviderRef,
} }
const TABLE_CATALOG: &str = "table_catalog"; const TABLE_CATALOG: &str = "table_catalog";
@@ -54,43 +49,32 @@ const DATA_TYPE: &str = "data_type";
const SEMANTIC_TYPE: &str = "semantic_type"; const SEMANTIC_TYPE: &str = "semantic_type";
impl InformationSchemaColumns { impl InformationSchemaColumns {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self { pub(super) fn new(catalog_name: String, catalog_provider: CatalogProviderRef) -> Self {
Self { let schema = Arc::new(Schema::new(vec![
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false), ColumnSchema::new(TABLE_CATALOG, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false), ColumnSchema::new(TABLE_SCHEMA, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false), ColumnSchema::new(TABLE_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false), ColumnSchema::new(COLUMN_NAME, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false), ColumnSchema::new(DATA_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false), ColumnSchema::new(SEMANTIC_TYPE, ConcreteDataType::string_datatype(), false),
])) ]));
Self {
schema,
catalog_name,
catalog_provider,
}
} }
fn builder(&self) -> InformationSchemaColumnsBuilder { fn builder(&self) -> InformationSchemaColumnsBuilder {
InformationSchemaColumnsBuilder::new( InformationSchemaColumnsBuilder::new(
self.schema.clone(), self.schema.clone(),
self.catalog_name.clone(), self.catalog_name.clone(),
self.catalog_manager.clone(), self.catalog_provider.clone(),
) )
} }
} }
impl InformationTable for InformationSchemaColumns { impl InformationStreamBuilder for InformationSchemaColumns {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_COLUMNS_TABLE_ID
}
fn table_name(&self) -> &'static str {
COLUMNS
}
fn schema(&self) -> SchemaRef { fn schema(&self) -> SchemaRef {
self.schema.clone() self.schema.clone()
} }
@@ -119,7 +103,7 @@ impl InformationTable for InformationSchemaColumns {
struct InformationSchemaColumnsBuilder { struct InformationSchemaColumnsBuilder {
schema: SchemaRef, schema: SchemaRef,
catalog_name: String, catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>, catalog_provider: CatalogProviderRef,
catalog_names: StringVectorBuilder, catalog_names: StringVectorBuilder,
schema_names: StringVectorBuilder, schema_names: StringVectorBuilder,
@@ -130,15 +114,11 @@ struct InformationSchemaColumnsBuilder {
} }
impl InformationSchemaColumnsBuilder { impl InformationSchemaColumnsBuilder {
fn new( fn new(schema: SchemaRef, catalog_name: String, catalog_provider: CatalogProviderRef) -> Self {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self { Self {
schema, schema,
catalog_name, catalog_name,
catalog_manager, catalog_provider,
catalog_names: StringVectorBuilder::with_capacity(42), catalog_names: StringVectorBuilder::with_capacity(42),
schema_names: StringVectorBuilder::with_capacity(42), schema_names: StringVectorBuilder::with_capacity(42),
table_names: StringVectorBuilder::with_capacity(42), table_names: StringVectorBuilder::with_capacity(42),
@@ -151,44 +131,13 @@ impl InformationSchemaColumnsBuilder {
/// Construct the `information_schema.tables` virtual table /// Construct the `information_schema.tables` virtual table
async fn make_tables(&mut self) -> Result<RecordBatch> { async fn make_tables(&mut self) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone(); let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
for schema_name in catalog_manager.schema_names(&catalog_name).await? {
if !catalog_manager
.schema_exist(&catalog_name, &schema_name)
.await?
{
continue;
}
for table_name in catalog_manager
.table_names(&catalog_name, &schema_name)
.await?
{
let (keys, schema) = if let Some(table) = catalog_manager
.table(&catalog_name, &schema_name, &table_name)
.await?
{
let keys = &table.table_info().meta.primary_key_indices;
let schema = table.schema();
(keys.clone(), schema)
} else {
// TODO: this specific branch is only a workaround for FrontendCatalogManager.
if schema_name == INFORMATION_SCHEMA_NAME {
if table_name == COLUMNS {
(vec![], InformationSchemaColumns::schema())
} else if table_name == TABLES {
(vec![], InformationSchemaTables::schema())
} else {
continue;
}
} else {
continue;
}
};
for schema_name in self.catalog_provider.schema_names().await? {
let Some(schema) = self.catalog_provider.schema(&schema_name).await? else { continue };
for table_name in schema.table_names().await? {
let Some(table) = schema.table(&table_name).await? else { continue };
let keys = &table.table_info().meta.primary_key_indices;
let schema = table.schema();
for (idx, column) in schema.column_schemas().iter().enumerate() { for (idx, column) in schema.column_schemas().iter().enumerate() {
let semantic_type = if column.is_time_index() { let semantic_type = if column.is_time_index() {
SEMANTIC_TYPE_TIME_INDEX SEMANTIC_TYPE_TIME_INDEX

View File

@@ -12,78 +12,60 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::sync::{Arc, Weak}; use std::sync::Arc;
use arrow_schema::SchemaRef as ArrowSchemaRef; use arrow_schema::SchemaRef as ArrowSchemaRef;
use common_catalog::consts::{ use common_catalog::consts::INFORMATION_SCHEMA_NAME;
INFORMATION_SCHEMA_COLUMNS_TABLE_ID, INFORMATION_SCHEMA_NAME, use common_error::prelude::BoxedError;
INFORMATION_SCHEMA_TABLES_TABLE_ID,
};
use common_error::ext::BoxedError;
use common_query::physical_plan::TaskContext; use common_query::physical_plan::TaskContext;
use common_recordbatch::adapter::RecordBatchStreamAdapter; use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream}; use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use datafusion::datasource::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter; use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream; use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef}; use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder}; use datatypes::vectors::{StringVectorBuilder, UInt32VectorBuilder};
use snafu::{OptionExt, ResultExt}; use snafu::ResultExt;
use store_api::storage::TableId;
use table::metadata::TableType; use table::metadata::TableType;
use super::{COLUMNS, TABLES}; use crate::error::{CreateRecordBatchSnafu, InternalSnafu, Result};
use crate::error::{ use crate::information_schema::InformationStreamBuilder;
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu, use crate::CatalogProviderRef;
};
use crate::information_schema::InformationTable;
use crate::CatalogManager;
pub(super) struct InformationSchemaTables { pub(super) struct InformationSchemaTables {
schema: SchemaRef, schema: SchemaRef,
catalog_name: String, catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>, catalog_provider: CatalogProviderRef,
} }
impl InformationSchemaTables { impl InformationSchemaTables {
pub(super) fn new(catalog_name: String, catalog_manager: Weak<dyn CatalogManager>) -> Self { pub(super) fn new(catalog_name: String, catalog_provider: CatalogProviderRef) -> Self {
Self { let schema = Arc::new(Schema::new(vec![
schema: Self::schema(),
catalog_name,
catalog_manager,
}
}
pub(crate) fn schema() -> SchemaRef {
Arc::new(Schema::new(vec![
ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false), ColumnSchema::new("table_catalog", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false), ColumnSchema::new("table_schema", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false), ColumnSchema::new("table_name", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_type", ConcreteDataType::string_datatype(), false), ColumnSchema::new("table_type", ConcreteDataType::string_datatype(), false),
ColumnSchema::new("table_id", ConcreteDataType::uint32_datatype(), true), ColumnSchema::new("table_id", ConcreteDataType::uint32_datatype(), true),
ColumnSchema::new("engine", ConcreteDataType::string_datatype(), true), ColumnSchema::new("engine", ConcreteDataType::string_datatype(), true),
])) ]));
Self {
schema,
catalog_name,
catalog_provider,
}
} }
fn builder(&self) -> InformationSchemaTablesBuilder { fn builder(&self) -> InformationSchemaTablesBuilder {
InformationSchemaTablesBuilder::new( InformationSchemaTablesBuilder::new(
self.schema.clone(), self.schema.clone(),
self.catalog_name.clone(), self.catalog_name.clone(),
self.catalog_manager.clone(), self.catalog_provider.clone(),
) )
} }
} }
impl InformationTable for InformationSchemaTables { impl InformationStreamBuilder for InformationSchemaTables {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_TABLES_TABLE_ID
}
fn table_name(&self) -> &'static str {
TABLES
}
fn schema(&self) -> SchemaRef { fn schema(&self) -> SchemaRef {
self.schema.clone() self.schema.clone()
} }
@@ -115,7 +97,7 @@ impl InformationTable for InformationSchemaTables {
struct InformationSchemaTablesBuilder { struct InformationSchemaTablesBuilder {
schema: SchemaRef, schema: SchemaRef,
catalog_name: String, catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>, catalog_provider: CatalogProviderRef,
catalog_names: StringVectorBuilder, catalog_names: StringVectorBuilder,
schema_names: StringVectorBuilder, schema_names: StringVectorBuilder,
@@ -126,15 +108,11 @@ struct InformationSchemaTablesBuilder {
} }
impl InformationSchemaTablesBuilder { impl InformationSchemaTablesBuilder {
fn new( fn new(schema: SchemaRef, catalog_name: String, catalog_provider: CatalogProviderRef) -> Self {
schema: SchemaRef,
catalog_name: String,
catalog_manager: Weak<dyn CatalogManager>,
) -> Self {
Self { Self {
schema, schema,
catalog_name, catalog_name,
catalog_manager, catalog_provider,
catalog_names: StringVectorBuilder::with_capacity(42), catalog_names: StringVectorBuilder::with_capacity(42),
schema_names: StringVectorBuilder::with_capacity(42), schema_names: StringVectorBuilder::with_capacity(42),
table_names: StringVectorBuilder::with_capacity(42), table_names: StringVectorBuilder::with_capacity(42),
@@ -147,60 +125,24 @@ impl InformationSchemaTablesBuilder {
/// Construct the `information_schema.tables` virtual table /// Construct the `information_schema.tables` virtual table
async fn make_tables(&mut self) -> Result<RecordBatch> { async fn make_tables(&mut self) -> Result<RecordBatch> {
let catalog_name = self.catalog_name.clone(); let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
for schema_name in catalog_manager.schema_names(&catalog_name).await? { for schema_name in self.catalog_provider.schema_names().await? {
if !catalog_manager if schema_name == INFORMATION_SCHEMA_NAME {
.schema_exist(&catalog_name, &schema_name)
.await?
{
continue; continue;
} }
for table_name in catalog_manager let Some(schema) = self.catalog_provider.schema(&schema_name).await? else { continue };
.table_names(&catalog_name, &schema_name) for table_name in schema.table_names().await? {
.await? let Some(table) = schema.table(&table_name).await? else { continue };
{ let table_info = table.table_info();
if let Some(table) = catalog_manager self.add_table(
.table(&catalog_name, &schema_name, &table_name) &catalog_name,
.await? &schema_name,
{ &table_name,
let table_info = table.table_info(); table.table_type(),
self.add_table( Some(table_info.ident.table_id),
&catalog_name, Some(&table_info.meta.engine),
&schema_name, );
&table_name,
table.table_type(),
Some(table_info.ident.table_id),
Some(&table_info.meta.engine),
);
} else {
// TODO: this specific branch is only a workaround for FrontendCatalogManager.
if schema_name == INFORMATION_SCHEMA_NAME {
if table_name == COLUMNS {
self.add_table(
&catalog_name,
&schema_name,
&table_name,
TableType::Temporary,
Some(INFORMATION_SCHEMA_COLUMNS_TABLE_ID),
None,
);
} else if table_name == TABLES {
self.add_table(
&catalog_name,
&schema_name,
&table_name,
TableType::Temporary,
Some(INFORMATION_SCHEMA_TABLES_TABLE_ID),
None,
);
}
}
};
} }
} }

View File

@@ -14,63 +14,79 @@
#![feature(trait_upcasting)] #![feature(trait_upcasting)]
#![feature(assert_matches)] #![feature(assert_matches)]
#![feature(try_blocks)]
use std::any::Any; use std::any::Any;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Formatter};
use std::sync::Arc; use std::sync::Arc;
use api::v1::meta::{RegionStat, TableIdent, TableName}; use api::v1::meta::{RegionStat, TableName};
use common_telemetry::{info, warn}; use common_telemetry::{info, warn};
use snafu::ResultExt; use snafu::ResultExt;
use table::engine::{EngineContext, TableEngineRef}; use table::engine::{EngineContext, TableEngineRef};
use table::metadata::{TableId, TableType}; use table::metadata::TableId;
use table::requests::CreateTableRequest; use table::requests::CreateTableRequest;
use table::TableRef; use table::TableRef;
use crate::error::{CreateTableSnafu, Result}; use crate::error::{CreateTableSnafu, Result};
pub use crate::schema::{SchemaProvider, SchemaProviderRef};
pub mod error; pub mod error;
pub mod information_schema; pub mod helper;
pub(crate) mod information_schema;
pub mod local; pub mod local;
mod metrics; mod metrics;
pub mod remote; pub mod remote;
pub mod schema;
pub mod system; pub mod system;
pub mod table_source; pub mod table_source;
pub mod tables; pub mod tables;
/// Represents a catalog, comprising a number of named schemas.
#[async_trait::async_trait] #[async_trait::async_trait]
pub trait CatalogManager: Send + Sync { pub trait CatalogProvider: Sync + Send {
/// Returns the catalog provider as [`Any`](std::any::Any)
/// so that it can be downcast to a specific implementation.
fn as_any(&self) -> &dyn Any; fn as_any(&self) -> &dyn Any;
/// Retrieves the list of available schema names in this catalog.
async fn schema_names(&self) -> Result<Vec<String>>;
/// Registers schema to this catalog.
async fn register_schema(
&self,
name: String,
schema: SchemaProviderRef,
) -> Result<Option<SchemaProviderRef>>;
/// Retrieves a specific schema from the catalog by name, provided it exists.
async fn schema(&self, name: &str) -> Result<Option<SchemaProviderRef>>;
}
pub type CatalogProviderRef = Arc<dyn CatalogProvider>;
#[async_trait::async_trait]
pub trait CatalogManager: Send + Sync {
/// Starts a catalog manager. /// Starts a catalog manager.
async fn start(&self) -> Result<()>; async fn start(&self) -> Result<()>;
/// Registers a catalog to catalog manager, returns whether the catalog exist before. async fn register_catalog(
async fn register_catalog(self: Arc<Self>, name: String) -> Result<bool>; &self,
name: String,
/// Register a schema with catalog name and schema name. Retuens whether the catalog: CatalogProviderRef,
/// schema registered. ) -> Result<Option<CatalogProviderRef>>;
///
/// # Errors
///
/// This method will/should fail if catalog not exist
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool>;
/// Deregisters a database within given catalog/schema to catalog manager
async fn deregister_schema(&self, request: DeregisterSchemaRequest) -> Result<bool>;
/// Registers a table within given catalog/schema to catalog manager, /// Registers a table within given catalog/schema to catalog manager,
/// returns whether the table registered. /// returns whether the table registered.
///
/// # Errors
///
/// This method will/should fail if catalog or schema not exist
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool>; async fn register_table(&self, request: RegisterTableRequest) -> Result<bool>;
/// Deregisters a table within given catalog/schema to catalog manager /// Deregisters a table within given catalog/schema to catalog manager,
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()>; /// returns whether the table deregistered.
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool>;
/// Register a schema with catalog name and schema name. Retuens whether the
/// schema registered.
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool>;
/// Rename a table to [RenameTableRequest::new_table_name], returns whether the table is renamed. /// Rename a table to [RenameTableRequest::new_table_name], returns whether the table is renamed.
async fn rename_table(&self, request: RenameTableRequest) -> Result<bool>; async fn rename_table(&self, request: RenameTableRequest) -> Result<bool>;
@@ -81,15 +97,9 @@ pub trait CatalogManager: Send + Sync {
async fn catalog_names(&self) -> Result<Vec<String>>; async fn catalog_names(&self) -> Result<Vec<String>>;
async fn schema_names(&self, catalog: &str) -> Result<Vec<String>>; async fn catalog(&self, catalog: &str) -> Result<Option<CatalogProviderRef>>;
async fn table_names(&self, catalog: &str, schema: &str) -> Result<Vec<String>>; async fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>>;
async fn catalog_exist(&self, catalog: &str) -> Result<bool>;
async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool>;
async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool>;
/// Returns the table by catalog, schema and table name. /// Returns the table by catalog, schema and table name.
async fn table( async fn table(
@@ -98,6 +108,8 @@ pub trait CatalogManager: Send + Sync {
schema: &str, schema: &str,
table_name: &str, table_name: &str,
) -> Result<Option<TableRef>>; ) -> Result<Option<TableRef>>;
fn as_any(&self) -> &dyn Any;
} }
pub type CatalogManagerRef = Arc<dyn CatalogManager>; pub type CatalogManagerRef = Arc<dyn CatalogManager>;
@@ -152,15 +164,17 @@ pub struct DeregisterTableRequest {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct DeregisterSchemaRequest { pub struct RegisterSchemaRequest {
pub catalog: String, pub catalog: String,
pub schema: String, pub schema: String,
} }
#[derive(Debug, Clone)] pub trait CatalogProviderFactory {
pub struct RegisterSchemaRequest { fn create(&self, catalog_name: String) -> CatalogProviderRef;
pub catalog: String, }
pub schema: String,
pub trait SchemaProviderFactory {
fn create(&self, catalog_name: String, schema_name: String) -> SchemaProviderRef;
} }
pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>( pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
@@ -188,7 +202,7 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
table_name, table_name,
), ),
})?; })?;
let _ = manager manager
.register_table(RegisterTableRequest { .register_table(RegisterTableRequest {
catalog: catalog_name.clone(), catalog: catalog_name.clone(),
schema: schema_name.clone(), schema: schema_name.clone(),
@@ -217,51 +231,31 @@ pub async fn datanode_stat(catalog_manager: &CatalogManagerRef) -> (u64, Vec<Reg
let mut region_number: u64 = 0; let mut region_number: u64 = 0;
let mut region_stats = Vec::new(); let mut region_stats = Vec::new();
let Ok(catalog_names) = catalog_manager.catalog_names().await else { let Ok(catalog_names) = catalog_manager.catalog_names().await else { return (region_number, region_stats) };
return (region_number, region_stats);
};
for catalog_name in catalog_names { for catalog_name in catalog_names {
let Ok(schema_names) = catalog_manager.schema_names(&catalog_name).await else { let Ok(Some(catalog)) = catalog_manager.catalog(&catalog_name).await else { continue };
continue;
}; let Ok(schema_names) = catalog.schema_names().await else { continue };
for schema_name in schema_names { for schema_name in schema_names {
let Ok(table_names) = catalog_manager let Ok(Some(schema)) = catalog.schema(&schema_name).await else { continue };
.table_names(&catalog_name, &schema_name)
.await let Ok(table_names) = schema.table_names().await else { continue };
else {
continue;
};
for table_name in table_names { for table_name in table_names {
let Ok(Some(table)) = catalog_manager let Ok(Some(table)) = schema.table(&table_name).await else { continue };
.table(&catalog_name, &schema_name, &table_name)
.await
else {
continue;
};
if table.table_type() != TableType::Base { let region_numbers = &table.table_info().meta.region_numbers;
continue;
}
let table_info = table.table_info();
let region_numbers = &table_info.meta.region_numbers;
region_number += region_numbers.len() as u64; region_number += region_numbers.len() as u64;
let engine = &table_info.meta.engine; let engine = &table.table_info().meta.engine;
let table_id = table_info.ident.table_id;
match table.region_stats() { match table.region_stats() {
Ok(stats) => { Ok(stats) => {
let stats = stats.into_iter().map(|stat| RegionStat { let stats = stats.into_iter().map(|stat| RegionStat {
region_id: stat.region_id, region_id: stat.region_id,
table_ident: Some(TableIdent { table_name: Some(TableName {
table_id, catalog_name: catalog_name.clone(),
table_name: Some(TableName { schema_name: schema_name.clone(),
catalog_name: catalog_name.clone(), table_name: table_name.clone(),
schema_name: schema_name.clone(),
table_name: table_name.clone(),
}),
engine: engine.clone(),
}), }),
approximate_bytes: stat.disk_usage_bytes as i64, approximate_bytes: stat.disk_usage_bytes as i64,
attrs: HashMap::from([("engine_name".to_owned(), engine.clone())]), attrs: HashMap::from([("engine_name".to_owned(), engine.clone())]),

View File

@@ -16,4 +16,6 @@ pub mod manager;
pub mod memory; pub mod memory;
pub use manager::LocalCatalogManager; pub use manager::LocalCatalogManager;
pub use memory::{new_memory_catalog_manager, MemoryCatalogManager}; pub use memory::{
new_memory_catalog_list, MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider,
};

View File

@@ -18,8 +18,7 @@ use std::sync::Arc;
use common_catalog::consts::{ use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID,
MITO_ENGINE, NUMBERS_TABLE_ID, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID, MITO_ENGINE, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
SYSTEM_CATALOG_TABLE_NAME,
}; };
use common_catalog::format_full_table_name; use common_catalog::format_full_table_name;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream}; use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
@@ -33,7 +32,7 @@ use table::engine::manager::TableEngineManagerRef;
use table::engine::EngineContext; use table::engine::EngineContext;
use table::metadata::TableId; use table::metadata::TableId;
use table::requests::OpenTableRequest; use table::requests::OpenTableRequest;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME}; use table::table::numbers::NumbersTable;
use table::table::TableIdProvider; use table::table::TableIdProvider;
use table::TableRef; use table::TableRef;
@@ -41,17 +40,18 @@ use crate::error::{
self, CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu, self, CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu,
Result, SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu, Result, SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu,
SystemCatalogTypeMismatchSnafu, TableEngineNotFoundSnafu, TableExistsSnafu, TableNotExistSnafu, SystemCatalogTypeMismatchSnafu, TableEngineNotFoundSnafu, TableExistsSnafu, TableNotExistSnafu,
TableNotFoundSnafu, UnimplementedSnafu, TableNotFoundSnafu,
}; };
use crate::local::memory::MemoryCatalogManager; use crate::local::memory::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
use crate::system::{ use crate::system::{
decode_system_catalog, Entry, SystemCatalogTable, TableEntry, ENTRY_TYPE_INDEX, KEY_INDEX, decode_system_catalog, Entry, SystemCatalogTable, TableEntry, ENTRY_TYPE_INDEX, KEY_INDEX,
VALUE_INDEX, VALUE_INDEX,
}; };
use crate::tables::SystemCatalog; use crate::tables::SystemCatalog;
use crate::{ use crate::{
handle_system_table_request, CatalogManager, DeregisterSchemaRequest, DeregisterTableRequest, handle_system_table_request, CatalogManager, CatalogProviderRef, DeregisterTableRequest,
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest, RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest,
SchemaProviderRef,
}; };
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs. /// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
@@ -74,11 +74,11 @@ impl LocalCatalogManager {
engine_name: MITO_ENGINE, engine_name: MITO_ENGINE,
})?; })?;
let table = SystemCatalogTable::new(engine.clone()).await?; let table = SystemCatalogTable::new(engine.clone()).await?;
let memory_catalog_manager = crate::local::memory::new_memory_catalog_manager()?; let memory_catalog_list = crate::local::memory::new_memory_catalog_list()?;
let system_catalog = Arc::new(SystemCatalog::new(table)); let system_catalog = Arc::new(SystemCatalog::new(table));
Ok(Self { Ok(Self {
system: system_catalog, system: system_catalog,
catalogs: memory_catalog_manager, catalogs: memory_catalog_list,
engine_manager, engine_manager,
next_table_id: AtomicU32::new(MIN_USER_TABLE_ID), next_table_id: AtomicU32::new(MIN_USER_TABLE_ID),
init_lock: Mutex::new(false), init_lock: Mutex::new(false),
@@ -116,43 +116,26 @@ impl LocalCatalogManager {
} }
async fn init_system_catalog(&self) -> Result<()> { async fn init_system_catalog(&self) -> Result<()> {
// register default catalog and default schema let system_schema = Arc::new(MemorySchemaProvider::new());
system_schema.register_table_sync(
SYSTEM_CATALOG_TABLE_NAME.to_string(),
self.system.information_schema.system.clone(),
)?;
let system_catalog = Arc::new(MemoryCatalogProvider::new());
system_catalog.register_schema_sync(INFORMATION_SCHEMA_NAME.to_string(), system_schema)?;
self.catalogs self.catalogs
.register_catalog_sync(DEFAULT_CATALOG_NAME.to_string())?; .register_catalog_sync(SYSTEM_CATALOG_NAME.to_string(), system_catalog)?;
self.catalogs.register_schema_sync(RegisterSchemaRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
})?;
// register SystemCatalogTable let default_catalog = Arc::new(MemoryCatalogProvider::new());
self.catalogs let default_schema = Arc::new(MemorySchemaProvider::new());
.register_catalog_sync(SYSTEM_CATALOG_NAME.to_string())?;
self.catalogs.register_schema_sync(RegisterSchemaRequest {
catalog: SYSTEM_CATALOG_NAME.to_string(),
schema: INFORMATION_SCHEMA_NAME.to_string(),
})?;
let register_table_req = RegisterTableRequest {
catalog: SYSTEM_CATALOG_NAME.to_string(),
schema: INFORMATION_SCHEMA_NAME.to_string(),
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
table_id: SYSTEM_CATALOG_TABLE_ID,
table: self.system.information_schema.system.as_table_ref(),
};
self.catalogs.register_table(register_table_req).await?;
// Add numbers table for test // Add numbers table for test
let register_number_table_req = RegisterTableRequest { let table = Arc::new(NumbersTable::default());
catalog: DEFAULT_CATALOG_NAME.to_string(), default_schema.register_table_sync("numbers".to_string(), table)?;
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: NUMBERS_TABLE_NAME.to_string(),
table_id: NUMBERS_TABLE_ID,
table: NumbersTable::table(NUMBERS_TABLE_ID),
};
default_catalog.register_schema_sync(DEFAULT_SCHEMA_NAME.to_string(), default_schema)?;
self.catalogs self.catalogs
.register_table(register_number_table_req) .register_catalog_sync(DEFAULT_CATALOG_NAME.to_string(), default_catalog)?;
.await?;
Ok(()) Ok(())
} }
@@ -224,25 +207,30 @@ impl LocalCatalogManager {
for entry in entries { for entry in entries {
match entry { match entry {
Entry::Catalog(c) => { Entry::Catalog(c) => {
self.catalogs self.catalogs.register_catalog_if_absent(
.register_catalog_sync(c.catalog_name.clone())?; c.catalog_name.clone(),
Arc::new(MemoryCatalogProvider::new()),
);
info!("Register catalog: {}", c.catalog_name); info!("Register catalog: {}", c.catalog_name);
} }
Entry::Schema(s) => { Entry::Schema(s) => {
let req = RegisterSchemaRequest { self.catalogs
catalog: s.catalog_name.clone(), .catalog(&s.catalog_name)
schema: s.schema_name.clone(), .await?
}; .context(CatalogNotFoundSnafu {
let _ = self.catalogs.register_schema_sync(req)?; catalog_name: &s.catalog_name,
})?
.register_schema(
s.schema_name.clone(),
Arc::new(MemorySchemaProvider::new()),
)
.await?;
info!("Registered schema: {:?}", s); info!("Registered schema: {:?}", s);
} }
Entry::Table(t) => { Entry::Table(t) => {
max_table_id = max_table_id.max(t.table_id);
if t.is_deleted {
continue;
}
self.open_and_register_table(&t).await?; self.open_and_register_table(&t).await?;
info!("Registered table: {:?}", t); info!("Registered table: {:?}", t);
max_table_id = max_table_id.max(t.table_id);
} }
} }
} }
@@ -257,11 +245,23 @@ impl LocalCatalogManager {
} }
async fn open_and_register_table(&self, t: &TableEntry) -> Result<()> { async fn open_and_register_table(&self, t: &TableEntry) -> Result<()> {
self.check_catalog_schema_exist(&t.catalog_name, &t.schema_name) let catalog =
.await?; self.catalogs
.catalog(&t.catalog_name)
.await?
.context(CatalogNotFoundSnafu {
catalog_name: &t.catalog_name,
})?;
let schema = catalog
.schema(&t.schema_name)
.await?
.context(SchemaNotFoundSnafu {
catalog: &t.catalog_name,
schema: &t.schema_name,
})?;
let context = EngineContext {}; let context = EngineContext {};
let open_request = OpenTableRequest { let request = OpenTableRequest {
catalog_name: t.catalog_name.clone(), catalog_name: t.catalog_name.clone(),
schema_name: t.schema_name.clone(), schema_name: t.schema_name.clone(),
table_name: t.table_name.clone(), table_name: t.table_name.clone(),
@@ -275,8 +275,8 @@ impl LocalCatalogManager {
engine_name: &t.engine, engine_name: &t.engine,
})?; })?;
let table_ref = engine let option = engine
.open_table(&context, open_request) .open_table(&context, request)
.await .await
.with_context(|_| OpenTableSnafu { .with_context(|_| OpenTableSnafu {
table_info: format!( table_info: format!(
@@ -291,48 +291,7 @@ impl LocalCatalogManager {
), ),
})?; })?;
let register_request = RegisterTableRequest { schema.register_table(t.table_name.clone(), option).await?;
catalog: t.catalog_name.clone(),
schema: t.schema_name.clone(),
table_name: t.table_name.clone(),
table_id: t.table_id,
table: table_ref,
};
let _ = self.catalogs.register_table(register_request).await?;
Ok(())
}
async fn check_state(&self) -> Result<()> {
let started = self.init_lock.lock().await;
ensure!(
*started,
IllegalManagerStateSnafu {
msg: "Catalog manager not started",
}
);
Ok(())
}
async fn check_catalog_schema_exist(
&self,
catalog_name: &str,
schema_name: &str,
) -> Result<()> {
if !self.catalogs.catalog_exist(catalog_name).await? {
return CatalogNotFoundSnafu { catalog_name }.fail()?;
}
if !self
.catalogs
.schema_exist(catalog_name, schema_name)
.await?
{
return SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
}
.fail()?;
}
Ok(()) Ok(())
} }
} }
@@ -353,21 +312,34 @@ impl CatalogManager for LocalCatalogManager {
} }
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> { async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
self.check_state().await?; let started = self.init_lock.lock().await;
let catalog_name = request.catalog.clone(); ensure!(
let schema_name = request.schema.clone(); *started,
IllegalManagerStateSnafu {
msg: "Catalog manager not started",
}
);
self.check_catalog_schema_exist(&catalog_name, &schema_name) let catalog_name = &request.catalog;
.await?; let schema_name = &request.schema;
let catalog = self
.catalogs
.catalog(catalog_name)
.await?
.context(CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog
.schema(schema_name)
.await?
.with_context(|| SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?;
{ {
let _lock = self.register_lock.lock().await; let _lock = self.register_lock.lock().await;
if let Some(existing) = self if let Some(existing) = schema.table(&request.table_name).await? {
.catalogs
.table(&request.catalog, &request.schema, &request.table_name)
.await?
{
if existing.table_info().ident.table_id != request.table_id { if existing.table_info().ident.table_id != request.table_id {
error!( error!(
"Unexpected table register request: {:?}, existing: {:?}", "Unexpected table register request: {:?}, existing: {:?}",
@@ -376,8 +348,8 @@ impl CatalogManager for LocalCatalogManager {
); );
return TableExistsSnafu { return TableExistsSnafu {
table: format_full_table_name( table: format_full_table_name(
&catalog_name, catalog_name,
&schema_name, schema_name,
&request.table_name, &request.table_name,
), ),
} }
@@ -386,25 +358,24 @@ impl CatalogManager for LocalCatalogManager {
// Try to register table with same table id, just ignore. // Try to register table with same table id, just ignore.
Ok(false) Ok(false)
} else { } else {
// table does not exist
let engine = request.table.table_info().meta.engine.to_string(); let engine = request.table.table_info().meta.engine.to_string();
let table_name = request.table_name.clone(); // table does not exist
let table_id = request.table_id; self.system
let _ = self.catalogs.register_table(request).await?;
let _ = self
.system
.register_table( .register_table(
catalog_name.clone(), catalog_name.clone(),
schema_name.clone(), schema_name.clone(),
table_name, request.table_name.clone(),
table_id, request.table_id,
engine, engine,
) )
.await?; .await?;
schema
.register_table(request.table_name, request.table)
.await?;
increment_gauge!( increment_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT, crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
1.0, 1.0,
&[crate::metrics::db_label(&catalog_name, &schema_name)], &[crate::metrics::db_label(catalog_name, schema_name)],
); );
Ok(true) Ok(true)
} }
@@ -412,27 +383,41 @@ impl CatalogManager for LocalCatalogManager {
} }
async fn rename_table(&self, request: RenameTableRequest) -> Result<bool> { async fn rename_table(&self, request: RenameTableRequest) -> Result<bool> {
self.check_state().await?; let started = self.init_lock.lock().await;
ensure!(
*started,
IllegalManagerStateSnafu {
msg: "Catalog manager not started",
}
);
let catalog_name = &request.catalog; let catalog_name = &request.catalog;
let schema_name = &request.schema; let schema_name = &request.schema;
self.check_catalog_schema_exist(catalog_name, schema_name) let catalog = self
.await?; .catalogs
.catalog(catalog_name)
.await?
.context(CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog
.schema(schema_name)
.await?
.with_context(|| SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?;
let _lock = self.register_lock.lock().await;
ensure!( ensure!(
self.catalogs !schema.table_exist(&request.new_table_name).await?,
.table(catalog_name, schema_name, &request.new_table_name)
.await?
.is_none(),
TableExistsSnafu { TableExistsSnafu {
table: &request.new_table_name table: &request.new_table_name
} }
); );
let old_table = schema
let _lock = self.register_lock.lock().await; .table(&request.table_name)
let old_table = self
.catalogs
.table(catalog_name, schema_name, &request.table_name)
.await? .await?
.context(TableNotExistSnafu { .context(TableNotExistSnafu {
table: &request.table_name, table: &request.table_name,
@@ -440,8 +425,7 @@ impl CatalogManager for LocalCatalogManager {
let engine = old_table.table_info().meta.engine.to_string(); let engine = old_table.table_info().meta.engine.to_string();
// rename table in system catalog // rename table in system catalog
let _ = self self.system
.system
.register_table( .register_table(
catalog_name.clone(), catalog_name.clone(),
schema_name.clone(), schema_name.clone(),
@@ -451,11 +435,18 @@ impl CatalogManager for LocalCatalogManager {
) )
.await?; .await?;
self.catalogs.rename_table(request).await let renamed = schema
.rename_table(&request.table_name, request.new_table_name.clone())
.await
.is_ok();
Ok(renamed)
} }
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()> { async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
self.check_state().await?; {
let started = *self.init_lock.lock().await;
ensure!(started, IllegalManagerStateSnafu { msg: "not started" });
}
{ {
let _ = self.register_lock.lock().await; let _ = self.register_lock.lock().await;
@@ -482,45 +473,52 @@ impl CatalogManager for LocalCatalogManager {
} }
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> { async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
self.check_state().await?; let started = self.init_lock.lock().await;
ensure!(
*started,
IllegalManagerStateSnafu {
msg: "Catalog manager not started",
}
);
let catalog_name = &request.catalog; let catalog_name = &request.catalog;
let schema_name = &request.schema; let schema_name = &request.schema;
if !self.catalogs.catalog_exist(catalog_name).await? { let catalog = self
return CatalogNotFoundSnafu { catalog_name }.fail()?; .catalogs
} .catalog(catalog_name)
.await?
.context(CatalogNotFoundSnafu { catalog_name })?;
{ {
let _lock = self.register_lock.lock().await; let _lock = self.register_lock.lock().await;
ensure!( ensure!(
!self catalog.schema(schema_name).await?.is_none(),
.catalogs
.schema_exist(catalog_name, schema_name)
.await?,
SchemaExistsSnafu { SchemaExistsSnafu {
schema: schema_name, schema: schema_name,
} }
); );
let _ = self self.system
.system .register_schema(request.catalog, schema_name.clone())
.register_schema(request.catalog.clone(), schema_name.clone()) .await?;
catalog
.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))
.await?; .await?;
self.catalogs.register_schema_sync(request)
}
}
async fn deregister_schema(&self, _request: DeregisterSchemaRequest) -> Result<bool> { Ok(true)
UnimplementedSnafu {
operation: "deregister schema",
} }
.fail()
} }
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> { async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
let catalog_name = request.create_table_request.catalog_name.clone(); let catalog_name = request.create_table_request.catalog_name.clone();
let schema_name = request.create_table_request.schema_name.clone(); let schema_name = request.create_table_request.schema_name.clone();
ensure!(
!*self.init_lock.lock().await,
IllegalManagerStateSnafu {
msg: "Catalog manager already started",
}
);
let mut sys_table_requests = self.system_table_requests.lock().await; let mut sys_table_requests = self.system_table_requests.lock().await;
sys_table_requests.push(request); sys_table_requests.push(request);
increment_gauge!( increment_gauge!(
@@ -531,8 +529,15 @@ impl CatalogManager for LocalCatalogManager {
Ok(()) Ok(())
} }
async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool> { async fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>> {
self.catalogs.schema_exist(catalog, schema).await self.catalogs
.catalog(catalog)
.await?
.context(CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.schema(schema)
.await
} }
async fn table( async fn table(
@@ -541,37 +546,39 @@ impl CatalogManager for LocalCatalogManager {
schema_name: &str, schema_name: &str,
table_name: &str, table_name: &str,
) -> Result<Option<TableRef>> { ) -> Result<Option<TableRef>> {
self.catalogs let catalog = self
.table(catalog_name, schema_name, table_name) .catalogs
.await .catalog(catalog_name)
.await?
.context(CatalogNotFoundSnafu { catalog_name })?;
let schema = catalog
.schema(schema_name)
.await?
.with_context(|| SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?;
schema.table(table_name).await
} }
async fn catalog_exist(&self, catalog: &str) -> Result<bool> { async fn catalog(&self, catalog: &str) -> Result<Option<CatalogProviderRef>> {
if catalog.eq_ignore_ascii_case(SYSTEM_CATALOG_NAME) { if catalog.eq_ignore_ascii_case(SYSTEM_CATALOG_NAME) {
Ok(true) Ok(Some(self.system.clone()))
} else { } else {
self.catalogs.catalog_exist(catalog).await self.catalogs.catalog(catalog).await
} }
} }
async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
self.catalogs.table_exist(catalog, schema, table).await
}
async fn catalog_names(&self) -> Result<Vec<String>> { async fn catalog_names(&self) -> Result<Vec<String>> {
self.catalogs.catalog_names().await self.catalogs.catalog_names().await
} }
async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> { async fn register_catalog(
self.catalogs.schema_names(catalog_name).await &self,
} name: String,
catalog: CatalogProviderRef,
async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> { ) -> Result<Option<CatalogProviderRef>> {
self.catalogs.table_names(catalog_name, schema_name).await self.catalogs.register_catalog(name, catalog).await
}
async fn register_catalog(self: Arc<Self>, name: String) -> Result<bool> {
self.catalogs.clone().register_catalog(name).await
} }
fn as_any(&self) -> &dyn Any { fn as_any(&self) -> &dyn Any {
@@ -597,7 +604,6 @@ mod tests {
table_name: "T1".to_string(), table_name: "T1".to_string(),
table_id: 1, table_id: 1,
engine: MITO_ENGINE.to_string(), engine: MITO_ENGINE.to_string(),
is_deleted: false,
}), }),
Entry::Catalog(CatalogEntry { Entry::Catalog(CatalogEntry {
catalog_name: "C2".to_string(), catalog_name: "C2".to_string(),
@@ -619,7 +625,6 @@ mod tests {
table_name: "T2".to_string(), table_name: "T2".to_string(),
table_id: 2, table_id: 2,
engine: MITO_ENGINE.to_string(), engine: MITO_ENGINE.to_string(),
is_deleted: false,
}), }),
]; ];
let res = LocalCatalogManager::sort_entries(vec); let res = LocalCatalogManager::sort_entries(vec);

View File

@@ -16,35 +16,51 @@ use std::any::Any;
use std::collections::hash_map::Entry; use std::collections::hash_map::Entry;
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, RwLock, Weak}; use std::sync::{Arc, RwLock};
use common_catalog::consts::{ use async_trait::async_trait;
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MIN_USER_TABLE_ID, use common_catalog::consts::MIN_USER_TABLE_ID;
}; use common_telemetry::error;
use metrics::{decrement_gauge, increment_gauge}; use metrics::{decrement_gauge, increment_gauge};
use snafu::OptionExt; use snafu::{ensure, OptionExt};
use table::metadata::TableId; use table::metadata::TableId;
use table::table::TableIdProvider; use table::table::TableIdProvider;
use table::TableRef; use table::TableRef;
use crate::error::{ use crate::error::{
CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, TableNotFoundSnafu, self, CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, TableNotFoundSnafu,
}; };
use crate::information_schema::InformationSchemaProvider; use crate::schema::SchemaProvider;
use crate::{ use crate::{
CatalogManager, DeregisterSchemaRequest, DeregisterTableRequest, RegisterSchemaRequest, CatalogManager, CatalogProvider, CatalogProviderRef, DeregisterTableRequest,
RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest, RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, RenameTableRequest,
SchemaProviderRef,
}; };
type SchemaEntries = HashMap<String, HashMap<String, TableRef>>;
/// Simple in-memory list of catalogs /// Simple in-memory list of catalogs
pub struct MemoryCatalogManager { pub struct MemoryCatalogManager {
/// Collection of catalogs containing schemas and ultimately Tables /// Collection of catalogs containing schemas and ultimately Tables
pub catalogs: RwLock<HashMap<String, SchemaEntries>>, pub catalogs: RwLock<HashMap<String, CatalogProviderRef>>,
pub table_id: AtomicU32, pub table_id: AtomicU32,
} }
impl Default for MemoryCatalogManager {
fn default() -> Self {
let manager = Self {
table_id: AtomicU32::new(MIN_USER_TABLE_ID),
catalogs: Default::default(),
};
let default_catalog = Arc::new(MemoryCatalogProvider::new());
manager
.register_catalog_sync("greptime".to_string(), default_catalog.clone())
.unwrap();
default_catalog
.register_schema_sync("public".to_string(), Arc::new(MemorySchemaProvider::new()))
.unwrap();
manager
}
}
#[async_trait::async_trait] #[async_trait::async_trait]
impl TableIdProvider for MemoryCatalogManager { impl TableIdProvider for MemoryCatalogManager {
async fn next_table_id(&self) -> table::error::Result<TableId> { async fn next_table_id(&self) -> table::error::Result<TableId> {
@@ -60,75 +76,81 @@ impl CatalogManager for MemoryCatalogManager {
} }
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> { async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
self.register_table_sync(request) let schema = self
} .catalog(&request.catalog)
.context(CatalogNotFoundSnafu {
async fn rename_table(&self, request: RenameTableRequest) -> Result<bool> {
let mut catalogs = self.catalogs.write().unwrap();
let schema = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog, catalog_name: &request.catalog,
})? })?
.get_mut(&request.schema) .schema(&request.schema)
.with_context(|| SchemaNotFoundSnafu { .await?
.context(SchemaNotFoundSnafu {
catalog: &request.catalog, catalog: &request.catalog,
schema: &request.schema, schema: &request.schema,
})?; })?;
increment_gauge!(
// check old and new table names
if !schema.contains_key(&request.table_name) {
return TableNotFoundSnafu {
table_info: request.table_name.to_string(),
}
.fail()?;
}
if schema.contains_key(&request.new_table_name) {
return TableExistsSnafu {
table: &request.new_table_name,
}
.fail();
}
let table = schema.remove(&request.table_name).unwrap();
let _ = schema.insert(request.new_table_name, table);
Ok(true)
}
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<()> {
self.deregister_table_sync(request)
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
self.register_schema_sync(request)
}
async fn deregister_schema(&self, request: DeregisterSchemaRequest) -> Result<bool> {
let mut catalogs = self.catalogs.write().unwrap();
let schemas = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?;
let table_count = schemas
.remove(&request.schema)
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?
.len();
decrement_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT, crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
table_count as f64,
&[crate::metrics::db_label(&request.catalog, &request.schema)],
);
decrement_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT,
1.0, 1.0,
&[crate::metrics::db_label(&request.catalog, &request.schema)], &[crate::metrics::db_label(&request.catalog, &request.schema)],
); );
schema
.register_table(request.table_name, request.table)
.await
.map(|v| v.is_none())
}
async fn rename_table(&self, request: RenameTableRequest) -> Result<bool> {
let catalog = self
.catalog(&request.catalog)
.context(CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?;
let schema =
catalog
.schema(&request.schema)
.await?
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?;
Ok(schema
.rename_table(&request.table_name, request.new_table_name)
.await
.is_ok())
}
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
let schema = self
.catalog(&request.catalog)
.context(CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?
.schema(&request.schema)
.await?
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?;
decrement_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
1.0,
&[crate::metrics::db_label(&request.catalog, &request.schema)],
);
schema
.deregister_table(&request.table_name)
.await
.map(|v| v.is_some())
}
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
let catalog = self
.catalog(&request.catalog)
.context(CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?;
catalog
.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))
.await?;
increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT, 1.0);
Ok(true) Ok(true)
} }
@@ -137,8 +159,12 @@ impl CatalogManager for MemoryCatalogManager {
Ok(()) Ok(())
} }
async fn schema_exist(&self, catalog: &str, schema: &str) -> Result<bool> { async fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>> {
self.schema_exist_sync(catalog, schema) if let Some(c) = self.catalog(catalog) {
c.schema(schema).await
} else {
Ok(None)
}
} }
async fn table( async fn table(
@@ -147,69 +173,27 @@ impl CatalogManager for MemoryCatalogManager {
schema: &str, schema: &str,
table_name: &str, table_name: &str,
) -> Result<Option<TableRef>> { ) -> Result<Option<TableRef>> {
let result = try { let Some(catalog) = self
self.catalogs .catalog(catalog) else { return Ok(None)};
.read() let Some(s) = catalog.schema(schema).await? else { return Ok(None) };
.unwrap() s.table(table_name).await
.get(catalog)?
.get(schema)?
.get(table_name)
.cloned()?
};
Ok(result)
} }
async fn catalog_exist(&self, catalog: &str) -> Result<bool> { async fn catalog(&self, catalog: &str) -> Result<Option<CatalogProviderRef>> {
self.catalog_exist_sync(catalog) Ok(self.catalogs.read().unwrap().get(catalog).cloned())
}
async fn table_exist(&self, catalog: &str, schema: &str, table: &str) -> Result<bool> {
let catalogs = self.catalogs.read().unwrap();
Ok(catalogs
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.get(schema)
.with_context(|| SchemaNotFoundSnafu { catalog, schema })?
.contains_key(table))
} }
async fn catalog_names(&self) -> Result<Vec<String>> { async fn catalog_names(&self) -> Result<Vec<String>> {
Ok(self.catalogs.read().unwrap().keys().cloned().collect()) Ok(self.catalogs.read().unwrap().keys().cloned().collect())
} }
async fn schema_names(&self, catalog_name: &str) -> Result<Vec<String>> { async fn register_catalog(
Ok(self &self,
.catalogs name: String,
.read() catalog: CatalogProviderRef,
.unwrap() ) -> Result<Option<CatalogProviderRef>> {
.get(catalog_name) increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_CATALOG_COUNT, 1.0);
.with_context(|| CatalogNotFoundSnafu { catalog_name })? self.register_catalog_sync(name, catalog)
.keys()
.cloned()
.collect())
}
async fn table_names(&self, catalog_name: &str, schema_name: &str) -> Result<Vec<String>> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog_name)
.with_context(|| CatalogNotFoundSnafu { catalog_name })?
.get(schema_name)
.with_context(|| SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?
.keys()
.cloned()
.collect())
}
async fn register_catalog(self: Arc<Self>, name: String) -> Result<bool> {
self.register_catalog_sync(name)
} }
fn as_any(&self) -> &dyn Any { fn as_any(&self) -> &dyn Any {
@@ -218,288 +202,313 @@ impl CatalogManager for MemoryCatalogManager {
} }
impl MemoryCatalogManager { impl MemoryCatalogManager {
/// Creates a manager with some default setups /// Registers a catalog and return `None` if no catalog with the same name was already
/// (e.g. default catalog/schema and information schema) /// registered, or `Some` with the previously registered catalog.
pub fn with_default_setup() -> Arc<Self> { pub fn register_catalog_if_absent(
let manager = Arc::new(Self { &self,
table_id: AtomicU32::new(MIN_USER_TABLE_ID), name: String,
catalogs: Default::default(), catalog: CatalogProviderRef,
}); ) -> Option<CatalogProviderRef> {
// Safety: default catalog/schema is registered in order so no CatalogNotFound error will occur
manager
.register_catalog_sync(DEFAULT_CATALOG_NAME.to_string())
.unwrap();
manager
.register_schema_sync(RegisterSchemaRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
})
.unwrap();
manager
}
fn schema_exist_sync(&self, catalog: &str, schema: &str) -> Result<bool> {
Ok(self
.catalogs
.read()
.unwrap()
.get(catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: catalog,
})?
.contains_key(schema))
}
fn catalog_exist_sync(&self, catalog: &str) -> Result<bool> {
Ok(self.catalogs.read().unwrap().get(catalog).is_some())
}
/// Registers a catalog if it does not exist and returns false if the schema exists.
pub fn register_catalog_sync(self: &Arc<Self>, name: String) -> Result<bool> {
let mut catalogs = self.catalogs.write().unwrap(); let mut catalogs = self.catalogs.write().unwrap();
let entry = catalogs.entry(name);
match catalogs.entry(name.clone()) { match entry {
Entry::Vacant(e) => { Entry::Occupied(v) => Some(v.get().clone()),
let catalog = self.create_catalog_entry(name); Entry::Vacant(v) => {
e.insert(catalog); v.insert(catalog);
increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_CATALOG_COUNT, 1.0); None
Ok(true)
} }
Entry::Occupied(_) => Ok(false),
} }
} }
pub fn deregister_table_sync(&self, request: DeregisterTableRequest) -> Result<()> { pub fn register_catalog_sync(
&self,
name: String,
catalog: CatalogProviderRef,
) -> Result<Option<CatalogProviderRef>> {
let mut catalogs = self.catalogs.write().unwrap(); let mut catalogs = self.catalogs.write().unwrap();
let schema = catalogs Ok(catalogs.insert(name, catalog))
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?
.get_mut(&request.schema)
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?;
let result = schema.remove(&request.table_name);
if result.is_some() {
decrement_gauge!(
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT,
1.0,
&[crate::metrics::db_label(&request.catalog, &request.schema)],
);
}
Ok(())
} }
/// Registers a schema if it does not exist. fn catalog(&self, catalog_name: &str) -> Option<CatalogProviderRef> {
/// It returns an error if the catalog does not exist, self.catalogs.read().unwrap().get(catalog_name).cloned()
/// and returns false if the schema exists. }
pub fn register_schema_sync(&self, request: RegisterSchemaRequest) -> Result<bool> { }
let mut catalogs = self.catalogs.write().unwrap();
let catalog = catalogs
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?;
match catalog.entry(request.schema) { impl Default for MemoryCatalogProvider {
Entry::Vacant(e) => { fn default() -> Self {
e.insert(HashMap::new()); Self::new()
increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT, 1.0); }
Ok(true) }
}
Entry::Occupied(_) => Ok(false), /// Simple in-memory implementation of a catalog.
pub struct MemoryCatalogProvider {
schemas: RwLock<HashMap<String, Arc<dyn SchemaProvider>>>,
}
impl MemoryCatalogProvider {
/// Instantiates a new MemoryCatalogProvider with an empty collection of schemas.
pub fn new() -> Self {
Self {
schemas: RwLock::new(HashMap::new()),
} }
} }
/// Registers a schema and returns an error if the catalog or schema does not exist. pub fn schema_names_sync(&self) -> Result<Vec<String>> {
pub fn register_table_sync(&self, request: RegisterTableRequest) -> Result<bool> { let schemas = self.schemas.read().unwrap();
let mut catalogs = self.catalogs.write().unwrap(); Ok(schemas.keys().cloned().collect())
let schema = catalogs }
.get_mut(&request.catalog)
.with_context(|| CatalogNotFoundSnafu {
catalog_name: &request.catalog,
})?
.get_mut(&request.schema)
.with_context(|| SchemaNotFoundSnafu {
catalog: &request.catalog,
schema: &request.schema,
})?;
if schema.contains_key(&request.table_name) { pub fn register_schema_sync(
return TableExistsSnafu { &self,
table: &request.table_name, name: String,
} schema: SchemaProviderRef,
.fail(); ) -> Result<Option<SchemaProviderRef>> {
} let mut schemas = self.schemas.write().unwrap();
schema.insert(request.table_name, request.table); ensure!(
increment_gauge!( !schemas.contains_key(&name),
crate::metrics::METRIC_CATALOG_MANAGER_TABLE_COUNT, error::SchemaExistsSnafu { schema: &name }
1.0,
&[crate::metrics::db_label(&request.catalog, &request.schema)],
); );
Ok(true) increment_gauge!(crate::metrics::METRIC_CATALOG_MANAGER_SCHEMA_COUNT, 1.0);
Ok(schemas.insert(name, schema))
} }
fn create_catalog_entry(self: &Arc<Self>, catalog: String) -> SchemaEntries { pub fn schema_sync(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>> {
let information_schema = InformationSchemaProvider::build( let schemas = self.schemas.read().unwrap();
catalog, Ok(schemas.get(name).cloned())
Arc::downgrade(self) as Weak<dyn CatalogManager>, }
); }
let mut catalog = HashMap::new();
catalog.insert(INFORMATION_SCHEMA_NAME.to_string(), information_schema); #[async_trait::async_trait]
catalog impl CatalogProvider for MemoryCatalogProvider {
fn as_any(&self) -> &dyn Any {
self
} }
#[cfg(any(test, feature = "testing"))] async fn schema_names(&self) -> Result<Vec<String>> {
pub fn new_with_table(table: TableRef) -> Arc<Self> { self.schema_names_sync()
let manager = Self::with_default_setup(); }
let catalog = &table.table_info().catalog_name;
let schema = &table.table_info().schema_name;
if !manager.catalog_exist_sync(catalog).unwrap() { async fn register_schema(
manager.register_catalog_sync(catalog.to_string()).unwrap(); &self,
name: String,
schema: SchemaProviderRef,
) -> Result<Option<SchemaProviderRef>> {
self.register_schema_sync(name, schema)
}
async fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>> {
self.schema_sync(name)
}
}
/// Simple in-memory implementation of a schema.
pub struct MemorySchemaProvider {
tables: RwLock<HashMap<String, TableRef>>,
}
impl MemorySchemaProvider {
/// Instantiates a new MemorySchemaProvider with an empty collection of tables.
pub fn new() -> Self {
Self {
tables: RwLock::new(HashMap::new()),
} }
}
if !manager.schema_exist_sync(catalog, schema).unwrap() { pub fn register_table_sync(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
manager let mut tables = self.tables.write().unwrap();
.register_schema_sync(RegisterSchemaRequest { if let Some(existing) = tables.get(name.as_str()) {
catalog: catalog.to_string(), // if table with the same name but different table id exists, then it's a fatal bug
schema: schema.to_string(), if existing.table_info().ident.table_id != table.table_info().ident.table_id {
}) error!(
.unwrap(); "Unexpected table register: {:?}, existing: {:?}",
table.table_info(),
existing.table_info()
);
return TableExistsSnafu { table: name }.fail()?;
}
Ok(Some(existing.clone()))
} else {
Ok(tables.insert(name, table))
} }
}
let request = RegisterTableRequest { pub fn rename_table_sync(&self, name: &str, new_name: String) -> Result<TableRef> {
catalog: catalog.to_string(), let mut tables = self.tables.write().unwrap();
schema: schema.to_string(), let Some(table) = tables.remove(name) else {
table_name: table.table_info().name.clone(), return TableNotFoundSnafu {
table_id: table.table_info().ident.table_id, table_info: name.to_string(),
table, }
.fail()?;
}; };
let _ = manager.register_table_sync(request).unwrap(); let e = match tables.entry(new_name) {
manager Entry::Vacant(e) => e,
Entry::Occupied(e) => {
return TableExistsSnafu { table: e.key() }.fail();
}
};
e.insert(table.clone());
Ok(table)
}
pub fn table_exist_sync(&self, name: &str) -> Result<bool> {
let tables = self.tables.read().unwrap();
Ok(tables.contains_key(name))
}
pub fn deregister_table_sync(&self, name: &str) -> Result<Option<TableRef>> {
let mut tables = self.tables.write().unwrap();
Ok(tables.remove(name))
}
}
impl Default for MemorySchemaProvider {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl SchemaProvider for MemorySchemaProvider {
fn as_any(&self) -> &dyn Any {
self
}
async fn table_names(&self) -> Result<Vec<String>> {
let tables = self.tables.read().unwrap();
Ok(tables.keys().cloned().collect())
}
async fn table(&self, name: &str) -> Result<Option<TableRef>> {
let tables = self.tables.read().unwrap();
Ok(tables.get(name).cloned())
}
async fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
self.register_table_sync(name, table)
}
async fn rename_table(&self, name: &str, new_name: String) -> Result<TableRef> {
self.rename_table_sync(name, new_name)
}
async fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
self.deregister_table_sync(name)
}
async fn table_exist(&self, name: &str) -> Result<bool> {
self.table_exist_sync(name)
} }
} }
/// Create a memory catalog list contains a numbers table for test /// Create a memory catalog list contains a numbers table for test
pub fn new_memory_catalog_manager() -> Result<Arc<MemoryCatalogManager>> { pub fn new_memory_catalog_list() -> Result<Arc<MemoryCatalogManager>> {
Ok(MemoryCatalogManager::with_default_setup()) Ok(Arc::new(MemoryCatalogManager::default()))
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use common_catalog::consts::*; use common_catalog::consts::*;
use common_error::ext::ErrorExt; use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode; use common_error::prelude::StatusCode;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME}; use table::table::numbers::NumbersTable;
use super::*; use super::*;
#[tokio::test] #[tokio::test]
async fn test_new_memory_catalog_list() { async fn test_new_memory_catalog_list() {
let catalog_list = new_memory_catalog_manager().unwrap(); let catalog_list = new_memory_catalog_list().unwrap();
let default_catalog = CatalogManager::catalog(&*catalog_list, DEFAULT_CATALOG_NAME)
let register_request = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: NUMBERS_TABLE_NAME.to_string(),
table_id: NUMBERS_TABLE_ID,
table: NumbersTable::table(NUMBERS_TABLE_ID),
};
let _ = catalog_list.register_table(register_request).await.unwrap();
let table = catalog_list
.table(
DEFAULT_CATALOG_NAME,
DEFAULT_SCHEMA_NAME,
NUMBERS_TABLE_NAME,
)
.await
.unwrap();
let _ = table.unwrap();
assert!(catalog_list
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "not_exists")
.await .await
.unwrap() .unwrap()
.is_none()); .unwrap();
let default_schema = default_catalog
.schema(DEFAULT_SCHEMA_NAME)
.await
.unwrap()
.unwrap();
default_schema
.register_table("numbers".to_string(), Arc::new(NumbersTable::default()))
.await
.unwrap();
let table = default_schema.table("numbers").await.unwrap();
assert!(table.is_some());
assert!(default_schema.table("not_exists").await.unwrap().is_none());
} }
#[tokio::test] #[tokio::test]
async fn test_mem_manager_rename_table() { async fn test_mem_provider() {
let catalog = MemoryCatalogManager::with_default_setup(); let provider = MemorySchemaProvider::new();
let table_name = "test_table"; let table_name = "numbers";
assert!(!catalog assert!(!provider.table_exist_sync(table_name).unwrap());
.table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name) provider.deregister_table_sync(table_name).unwrap();
.await let test_table = NumbersTable::default();
.unwrap()); // register table successfully
assert!(provider
.register_table_sync(table_name.to_string(), Arc::new(test_table))
.unwrap()
.is_none());
assert!(provider.table_exist_sync(table_name).unwrap());
let other_table = NumbersTable::new(12);
let result = provider.register_table_sync(table_name.to_string(), Arc::new(other_table));
let err = result.err().unwrap();
assert_eq!(StatusCode::TableAlreadyExists, err.status_code());
}
#[tokio::test]
async fn test_mem_provider_rename_table() {
let provider = MemorySchemaProvider::new();
let table_name = "num";
assert!(!provider.table_exist_sync(table_name).unwrap());
let test_table: TableRef = Arc::new(NumbersTable::default());
// register test table // register test table
let table_id = 2333; assert!(provider
let register_request = RegisterTableRequest { .register_table_sync(table_name.to_string(), test_table.clone())
catalog: DEFAULT_CATALOG_NAME.to_string(), .unwrap()
schema: DEFAULT_SCHEMA_NAME.to_string(), .is_none());
table_name: table_name.to_string(), assert!(provider.table_exist_sync(table_name).unwrap());
table_id,
table: NumbersTable::table(table_id),
};
assert!(catalog.register_table(register_request).await.unwrap());
assert!(catalog
.table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap());
// rename test table // rename test table
let new_table_name = "test_table_renamed"; let new_table_name = "numbers";
let rename_request = RenameTableRequest { provider
catalog: DEFAULT_CATALOG_NAME.to_string(), .rename_table_sync(table_name, new_table_name.to_string())
schema: DEFAULT_SCHEMA_NAME.to_string(), .unwrap();
table_name: table_name.to_string(),
new_table_name: new_table_name.to_string(),
table_id,
};
let _ = catalog.rename_table(rename_request).await.unwrap();
// test old table name not exist // test old table name not exist
assert!(!catalog assert!(!provider.table_exist_sync(table_name).unwrap());
.table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name) provider.deregister_table_sync(table_name).unwrap();
.await
.unwrap());
// test new table name exists // test new table name exists
assert!(catalog assert!(provider.table_exist_sync(new_table_name).unwrap());
.table_exist(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name) let registered_table = provider.table(new_table_name).await.unwrap().unwrap();
.await assert_eq!(
.unwrap()); registered_table.table_info().ident.table_id,
let registered_table = catalog test_table.table_info().ident.table_id
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name) );
.await
.unwrap()
.unwrap();
assert_eq!(registered_table.table_info().ident.table_id, table_id);
let dup_register_request = RegisterTableRequest { let other_table = Arc::new(NumbersTable::new(2));
catalog: DEFAULT_CATALOG_NAME.to_string(), let result = provider
schema: DEFAULT_SCHEMA_NAME.to_string(), .register_table(new_table_name.to_string(), other_table)
table_name: new_table_name.to_string(), .await;
table_id: table_id + 1,
table: NumbersTable::table(table_id + 1),
};
let result = catalog.register_table(dup_register_request).await;
let err = result.err().unwrap(); let err = result.err().unwrap();
assert_eq!(StatusCode::TableAlreadyExists, err.status_code()); assert_eq!(StatusCode::TableAlreadyExists, err.status_code());
} }
#[tokio::test] #[tokio::test]
async fn test_catalog_rename_table() { async fn test_catalog_rename_table() {
let catalog = MemoryCatalogManager::with_default_setup(); let catalog = MemoryCatalogManager::default();
let table_name = "num"; let schema = catalog
let table_id = 2333; .schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
let table = NumbersTable::table(table_id); .await
.unwrap()
.unwrap();
// register table // register table
let table_name = "num";
let table_id = 2333;
let table: TableRef = Arc::new(NumbersTable::new(table_id));
let register_table_req = RegisterTableRequest { let register_table_req = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(), catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(), schema: DEFAULT_SCHEMA_NAME.to_string(),
@@ -508,11 +517,7 @@ mod tests {
table, table,
}; };
assert!(catalog.register_table(register_table_req).await.unwrap()); assert!(catalog.register_table(register_table_req).await.unwrap());
assert!(catalog assert!(schema.table_exist(table_name).await.unwrap());
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap()
.is_some());
// rename table // rename table
let new_table_name = "numbers_new"; let new_table_name = "numbers_new";
@@ -524,16 +529,8 @@ mod tests {
table_id, table_id,
}; };
assert!(catalog.rename_table(rename_table_req).await.unwrap()); assert!(catalog.rename_table(rename_table_req).await.unwrap());
assert!(catalog assert!(!schema.table_exist(table_name).await.unwrap());
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name) assert!(schema.table_exist(new_table_name).await.unwrap());
.await
.unwrap()
.is_none());
assert!(catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
.await
.unwrap()
.is_some());
let registered_table = catalog let registered_table = catalog
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name) .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, new_table_name)
@@ -544,87 +541,52 @@ mod tests {
} }
#[test] #[test]
pub fn test_register_catalog_sync() { pub fn test_register_if_absent() {
let list = MemoryCatalogManager::with_default_setup(); let list = MemoryCatalogManager::default();
assert!(list assert!(list
.register_catalog_sync("test_catalog".to_string()) .register_catalog_if_absent(
.unwrap()); "test_catalog".to_string(),
assert!(!list Arc::new(MemoryCatalogProvider::new())
.register_catalog_sync("test_catalog".to_string()) )
.unwrap()); .is_none());
list.register_catalog_if_absent(
"test_catalog".to_string(),
Arc::new(MemoryCatalogProvider::new()),
)
.unwrap();
list.as_any()
.downcast_ref::<MemoryCatalogManager>()
.unwrap();
} }
#[tokio::test] #[tokio::test]
pub async fn test_catalog_deregister_table() { pub async fn test_catalog_deregister_table() {
let catalog = MemoryCatalogManager::with_default_setup(); let catalog = MemoryCatalogManager::default();
let table_name = "foo_table"; let schema = catalog
.schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
.await
.unwrap()
.unwrap();
let register_table_req = RegisterTableRequest { let register_table_req = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(), catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(), schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(), table_name: "numbers".to_string(),
table_id: 2333, table_id: 2333,
table: NumbersTable::table(2333), table: Arc::new(NumbersTable::default()),
}; };
let _ = catalog.register_table(register_table_req).await.unwrap(); catalog.register_table(register_table_req).await.unwrap();
assert!(catalog assert!(schema.table_exist("numbers").await.unwrap());
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap()
.is_some());
let deregister_table_req = DeregisterTableRequest { let deregister_table_req = DeregisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(), catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(), schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(), table_name: "numbers".to_string(),
}; };
catalog catalog
.deregister_table(deregister_table_req) .deregister_table(deregister_table_req)
.await .await
.unwrap(); .unwrap();
assert!(catalog assert!(!schema.table_exist("numbers").await.unwrap());
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, table_name)
.await
.unwrap()
.is_none());
}
#[tokio::test]
async fn test_catalog_deregister_schema() {
let catalog = MemoryCatalogManager::with_default_setup();
// Registers a catalog, a schema, and a table.
let catalog_name = "foo_catalog".to_string();
let schema_name = "foo_schema".to_string();
let table_name = "foo_table".to_string();
let schema = RegisterSchemaRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
};
let table = RegisterTableRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
table_name,
table_id: 0,
table: NumbersTable::table(0),
};
catalog
.clone()
.register_catalog(catalog_name.clone())
.await
.unwrap();
catalog.register_schema(schema).await.unwrap();
catalog.register_table(table).await.unwrap();
let request = DeregisterSchemaRequest {
catalog: catalog_name.clone(),
schema: schema_name.clone(),
};
assert!(catalog.deregister_schema(request).await.unwrap());
assert!(!catalog
.schema_exist(&catalog_name, &schema_name)
.await
.unwrap());
} }
} }

View File

@@ -12,10 +12,17 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::any::Any;
use std::fmt::Debug;
use std::pin::Pin;
use std::sync::Arc; use std::sync::Arc;
pub use client::{CachedMetaKvBackend, MetaKvBackend}; pub use client::{CachedMetaKvBackend, MetaKvBackend};
pub use manager::RemoteCatalogManager; use futures::Stream;
use futures_util::StreamExt;
pub use manager::{RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider};
use crate::error::Error;
mod client; mod client;
mod manager; mod manager;
@@ -24,9 +31,130 @@ mod manager;
pub mod mock; pub mod mock;
pub mod region_alive_keeper; pub mod region_alive_keeper;
#[derive(Debug, Clone)]
pub struct Kv(pub Vec<u8>, pub Vec<u8>);
pub type ValueIter<'a, E> = Pin<Box<dyn Stream<Item = Result<Kv, E>> + Send + 'a>>;
#[async_trait::async_trait]
pub trait KvBackend: Send + Sync {
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b;
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error>;
/// Compare and set value of key. `expect` is the expected value, if backend's current value associated
/// with key is the same as `expect`, the value will be updated to `val`.
///
/// - If the compare-and-set operation successfully updated value, this method will return an `Ok(Ok())`
/// - If associated value is not the same as `expect`, no value will be updated and an `Ok(Err(Vec<u8>))`
/// will be returned, the `Err(Vec<u8>)` indicates the current associated value of key.
/// - If any error happens during operation, an `Err(Error)` will be returned.
async fn compare_and_set(
&self,
key: &[u8],
expect: &[u8],
val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error>;
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error>;
async fn delete(&self, key: &[u8]) -> Result<(), Error> {
self.delete_range(key, &[]).await
}
/// Default get is implemented based on `range` method.
async fn get(&self, key: &[u8]) -> Result<Option<Kv>, Error> {
let mut iter = self.range(key);
while let Some(r) = iter.next().await {
let kv = r?;
if kv.0 == key {
return Ok(Some(kv));
}
}
return Ok(None);
}
/// MoveValue atomically renames the key to the given updated key.
async fn move_value(&self, from_key: &[u8], to_key: &[u8]) -> Result<(), Error>;
fn as_any(&self) -> &dyn Any;
}
pub type KvBackendRef = Arc<dyn KvBackend>;
#[async_trait::async_trait] #[async_trait::async_trait]
pub trait KvCacheInvalidator: Send + Sync { pub trait KvCacheInvalidator: Send + Sync {
async fn invalidate_key(&self, key: &[u8]); async fn invalidate_key(&self, key: &[u8]);
} }
pub type KvCacheInvalidatorRef = Arc<dyn KvCacheInvalidator>; pub type KvCacheInvalidatorRef = Arc<dyn KvCacheInvalidator>;
#[cfg(test)]
mod tests {
use async_stream::stream;
use super::*;
struct MockKvBackend {}
#[async_trait::async_trait]
impl KvBackend for MockKvBackend {
fn range<'a, 'b>(&'a self, _key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b,
{
Box::pin(stream!({
for i in 0..3 {
yield Ok(Kv(
i.to_string().as_bytes().to_vec(),
i.to_string().as_bytes().to_vec(),
))
}
}))
}
async fn set(&self, _key: &[u8], _val: &[u8]) -> Result<(), Error> {
unimplemented!()
}
async fn compare_and_set(
&self,
_key: &[u8],
_expect: &[u8],
_val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
unimplemented!()
}
async fn delete_range(&self, _key: &[u8], _end: &[u8]) -> Result<(), Error> {
unimplemented!()
}
async fn move_value(&self, _from_key: &[u8], _to_key: &[u8]) -> Result<(), Error> {
unimplemented!()
}
fn as_any(&self) -> &dyn Any {
self
}
}
#[tokio::test]
async fn test_get() {
let backend = MockKvBackend {};
let result = backend.get(0.to_string().as_bytes()).await;
assert_eq!(0.to_string().as_bytes(), result.unwrap().unwrap().0);
let result = backend.get(1.to_string().as_bytes()).await;
assert_eq!(1.to_string().as_bytes(), result.unwrap().unwrap().0);
let result = backend.get(2.to_string().as_bytes()).await;
assert_eq!(2.to_string().as_bytes(), result.unwrap().unwrap().0);
let result = backend.get(3.to_string().as_bytes()).await;
assert!(result.unwrap().is_none());
}
}

View File

@@ -17,59 +17,54 @@ use std::fmt::Debug;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use common_error::ext::BoxedError; use async_stream::stream;
use common_meta::error::Error::{CacheNotGet, GetKvCache};
use common_meta::error::{CacheNotGetSnafu, Error, MetaSrvSnafu, Result};
use common_meta::kv_backend::{KvBackend, KvBackendRef, TxnService};
use common_meta::rpc::store::{ use common_meta::rpc::store::{
BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest, CompareAndPutRequest, DeleteRangeRequest, MoveValueRequest, PutRequest, RangeRequest,
BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest,
DeleteRangeResponse, MoveValueRequest, MoveValueResponse, PutRequest, PutResponse,
RangeRequest, RangeResponse,
}; };
use common_meta::rpc::KeyValue; use common_telemetry::{info, timer};
use common_telemetry::timer;
use meta_client::client::MetaClient; use meta_client::client::MetaClient;
use moka::future::{Cache, CacheBuilder}; use moka::future::{Cache, CacheBuilder};
use snafu::{OptionExt, ResultExt}; use snafu::ResultExt;
use super::KvCacheInvalidator; use super::KvCacheInvalidator;
use crate::error::{Error, GenericSnafu, MetaSrvSnafu, Result};
use crate::metrics::{METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET}; use crate::metrics::{METRIC_CATALOG_KV_GET, METRIC_CATALOG_KV_REMOTE_GET};
use crate::remote::{Kv, KvBackend, KvBackendRef, ValueIter};
const CACHE_MAX_CAPACITY: u64 = 10000; const CACHE_MAX_CAPACITY: u64 = 10000;
const CACHE_TTL_SECOND: u64 = 10 * 60; const CACHE_TTL_SECOND: u64 = 10 * 60;
const CACHE_TTI_SECOND: u64 = 5 * 60; const CACHE_TTI_SECOND: u64 = 5 * 60;
pub type CacheBackendRef = Arc<Cache<Vec<u8>, KeyValue>>; pub type CacheBackendRef = Arc<Cache<Vec<u8>, Option<Kv>>>;
pub struct CachedMetaKvBackend { pub struct CachedMetaKvBackend {
kv_backend: KvBackendRef, kv_backend: KvBackendRef,
cache: CacheBackendRef, cache: CacheBackendRef,
name: String,
}
impl TxnService for CachedMetaKvBackend {
type Error = Error;
} }
#[async_trait::async_trait] #[async_trait::async_trait]
impl KvBackend for CachedMetaKvBackend { impl KvBackend for CachedMetaKvBackend {
fn name(&self) -> &str { fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
&self.name where
'a: 'b,
{
self.kv_backend.range(key)
} }
fn as_any(&self) -> &dyn Any { async fn get(&self, key: &[u8]) -> Result<Option<Kv>> {
self let _timer = timer!(METRIC_CATALOG_KV_GET);
let init = async {
let _timer = timer!(METRIC_CATALOG_KV_REMOTE_GET);
self.kv_backend.get(key).await
};
let schema_provider = self.cache.try_get_with_by_ref(key, init).await;
schema_provider.map_err(|e| GenericSnafu { msg: e.to_string() }.build())
} }
async fn range(&self, req: RangeRequest) -> Result<RangeResponse> { async fn set(&self, key: &[u8], val: &[u8]) -> Result<()> {
self.kv_backend.range(req).await let ret = self.kv_backend.set(key, val).await;
}
async fn put(&self, req: PutRequest) -> Result<PutResponse> {
let key = &req.key.clone();
let ret = self.kv_backend.put(req).await;
if ret.is_ok() { if ret.is_ok() {
self.invalidate_key(key).await; self.invalidate_key(key).await;
@@ -78,32 +73,8 @@ impl KvBackend for CachedMetaKvBackend {
ret ret
} }
async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> { async fn delete(&self, key: &[u8]) -> Result<()> {
let keys = req let ret = self.kv_backend.delete_range(key, &[]).await;
.kvs
.iter()
.map(|kv| kv.key().to_vec())
.collect::<Vec<_>>();
let resp = self.kv_backend.batch_put(req).await;
if resp.is_ok() {
for key in keys {
self.invalidate_key(&key).await;
}
}
resp
}
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
self.kv_backend.batch_get(req).await
}
async fn compare_and_put(&self, req: CompareAndPutRequest) -> Result<CompareAndPutResponse> {
let key = &req.key.clone();
let ret = self.kv_backend.compare_and_put(req).await;
if ret.is_ok() { if ret.is_ok() {
self.invalidate_key(key).await; self.invalidate_key(key).await;
@@ -112,51 +83,28 @@ impl KvBackend for CachedMetaKvBackend {
ret ret
} }
async fn delete_range(&self, mut req: DeleteRangeRequest) -> Result<DeleteRangeResponse> { async fn delete_range(&self, _key: &[u8], _end: &[u8]) -> Result<()> {
let prev_kv = req.prev_kv; // TODO(fys): implement it
unimplemented!()
req.prev_kv = true;
let resp = self.kv_backend.delete_range(req).await;
match resp {
Ok(mut resp) => {
for prev_kv in resp.prev_kvs.iter() {
self.invalidate_key(prev_kv.key()).await;
}
if !prev_kv {
resp.prev_kvs = vec![];
}
Ok(resp)
}
Err(e) => Err(e),
}
} }
async fn batch_delete(&self, mut req: BatchDeleteRequest) -> Result<BatchDeleteResponse> { async fn compare_and_set(
let prev_kv = req.prev_kv; &self,
key: &[u8],
expect: &[u8],
val: &[u8],
) -> Result<std::result::Result<(), Option<Vec<u8>>>> {
let ret = self.kv_backend.compare_and_set(key, expect, val).await;
req.prev_kv = true; if ret.is_ok() {
let resp = self.kv_backend.batch_delete(req).await; self.invalidate_key(key).await;
match resp {
Ok(mut resp) => {
for prev_kv in resp.prev_kvs.iter() {
self.invalidate_key(prev_kv.key()).await;
}
if !prev_kv {
resp.prev_kvs = vec![];
}
Ok(resp)
}
Err(e) => Err(e),
} }
ret
} }
async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse> { async fn move_value(&self, from_key: &[u8], to_key: &[u8]) -> Result<()> {
let from_key = &req.from_key.clone(); let ret = self.kv_backend.move_value(from_key, to_key).await;
let to_key = &req.to_key.clone();
let ret = self.kv_backend.move_value(req).await;
if ret.is_ok() { if ret.is_ok() {
self.invalidate_key(from_key).await; self.invalidate_key(from_key).await;
@@ -166,31 +114,8 @@ impl KvBackend for CachedMetaKvBackend {
ret ret
} }
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> { fn as_any(&self) -> &dyn Any {
let _timer = timer!(METRIC_CATALOG_KV_GET); self
let init = async {
let _timer = timer!(METRIC_CATALOG_KV_REMOTE_GET);
self.kv_backend.get(key).await.map(|val| {
val.with_context(|| CacheNotGetSnafu {
key: String::from_utf8_lossy(key),
})
})?
};
// currently moka doesn't have `optionally_try_get_with_by_ref`
// TODO(fys): change to moka method when available
// https://github.com/moka-rs/moka/issues/254
match self.cache.try_get_with_by_ref(key, init).await {
Ok(val) => Ok(Some(val)),
Err(e) => match e.as_ref() {
CacheNotGet { .. } => Ok(None),
_ => Err(e),
},
}
.map_err(|e| GetKvCache {
err_msg: e.to_string(),
})
} }
} }
@@ -203,8 +128,15 @@ impl KvCacheInvalidator for CachedMetaKvBackend {
impl CachedMetaKvBackend { impl CachedMetaKvBackend {
pub fn new(client: Arc<MetaClient>) -> Self { pub fn new(client: Arc<MetaClient>) -> Self {
let cache = Arc::new(
CacheBuilder::new(CACHE_MAX_CAPACITY)
.time_to_live(Duration::from_secs(CACHE_TTL_SECOND))
.time_to_idle(Duration::from_secs(CACHE_TTI_SECOND))
.build(),
);
let kv_backend = Arc::new(MetaKvBackend { client }); let kv_backend = Arc::new(MetaKvBackend { client });
Self::wrap(kv_backend)
Self { kv_backend, cache }
} }
pub fn wrap(kv_backend: KvBackendRef) -> Self { pub fn wrap(kv_backend: KvBackendRef) -> Self {
@@ -215,12 +147,7 @@ impl CachedMetaKvBackend {
.build(), .build(),
); );
let name = format!("CachedKvBackend({})", kv_backend.name()); Self { kv_backend, cache }
Self {
kv_backend,
cache,
name,
}
} }
pub fn cache(&self) -> &CacheBackendRef { pub fn cache(&self) -> &CacheBackendRef {
@@ -233,97 +160,88 @@ pub struct MetaKvBackend {
pub client: Arc<MetaClient>, pub client: Arc<MetaClient>,
} }
impl TxnService for MetaKvBackend {
type Error = Error;
}
/// Implement `KvBackend` trait for `MetaKvBackend` instead of opendal's `Accessor` since /// Implement `KvBackend` trait for `MetaKvBackend` instead of opendal's `Accessor` since
/// `MetaClient`'s range method can return both keys and values, which can reduce IO overhead /// `MetaClient`'s range method can return both keys and values, which can reduce IO overhead
/// comparing to `Accessor`'s list and get method. /// comparing to `Accessor`'s list and get method.
#[async_trait::async_trait] #[async_trait::async_trait]
impl KvBackend for MetaKvBackend { impl KvBackend for MetaKvBackend {
fn name(&self) -> &str { fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
"MetaKvBackend" where
'a: 'b,
{
let key = key.to_vec();
Box::pin(stream!({
let mut resp = self
.client
.range(RangeRequest::new().with_prefix(key))
.await
.context(MetaSrvSnafu)?;
let kvs = resp.take_kvs();
for mut kv in kvs.into_iter() {
yield Ok(Kv(kv.take_key(), kv.take_value()))
}
}))
} }
async fn range(&self, req: RangeRequest) -> Result<RangeResponse> { async fn get(&self, key: &[u8]) -> Result<Option<Kv>> {
self.client
.range(req)
.await
.map_err(BoxedError::new)
.context(MetaSrvSnafu)
}
async fn get(&self, key: &[u8]) -> Result<Option<KeyValue>> {
let mut response = self let mut response = self
.client .client
.range(RangeRequest::new().with_key(key)) .range(RangeRequest::new().with_key(key))
.await .await
.map_err(BoxedError::new)
.context(MetaSrvSnafu)?; .context(MetaSrvSnafu)?;
Ok(response.take_kvs().get_mut(0).map(|kv| KeyValue { Ok(response
key: kv.take_key(), .take_kvs()
value: kv.take_value(), .get_mut(0)
})) .map(|kv| Kv(kv.take_key(), kv.take_value())))
} }
async fn batch_put(&self, req: BatchPutRequest) -> Result<BatchPutResponse> { async fn set(&self, key: &[u8], val: &[u8]) -> Result<()> {
self.client let req = PutRequest::new()
.batch_put(req) .with_key(key.to_vec())
.await .with_value(val.to_vec());
.map_err(BoxedError::new) let _ = self.client.put(req).await.context(MetaSrvSnafu)?;
.context(MetaSrvSnafu) Ok(())
} }
async fn put(&self, req: PutRequest) -> Result<PutResponse> { async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<()> {
self.client let req = DeleteRangeRequest::new().with_range(key.to_vec(), end.to_vec());
.put(req) let resp = self.client.delete_range(req).await.context(MetaSrvSnafu)?;
.await info!(
.map_err(BoxedError::new) "Delete range, key: {}, end: {}, deleted: {}",
.context(MetaSrvSnafu) String::from_utf8_lossy(key),
String::from_utf8_lossy(end),
resp.deleted()
);
Ok(())
} }
async fn delete_range(&self, req: DeleteRangeRequest) -> Result<DeleteRangeResponse> { async fn compare_and_set(
self.client
.delete_range(req)
.await
.map_err(BoxedError::new)
.context(MetaSrvSnafu)
}
async fn batch_delete(&self, req: BatchDeleteRequest) -> Result<BatchDeleteResponse> {
self.client
.batch_delete(req)
.await
.map_err(BoxedError::new)
.context(MetaSrvSnafu)
}
async fn batch_get(&self, req: BatchGetRequest) -> Result<BatchGetResponse> {
self.client
.batch_get(req)
.await
.map_err(BoxedError::new)
.context(MetaSrvSnafu)
}
async fn compare_and_put(
&self, &self,
request: CompareAndPutRequest, key: &[u8],
) -> Result<CompareAndPutResponse> { expect: &[u8],
self.client val: &[u8],
) -> Result<std::result::Result<(), Option<Vec<u8>>>> {
let request = CompareAndPutRequest::new()
.with_key(key.to_vec())
.with_expect(expect.to_vec())
.with_value(val.to_vec());
let mut response = self
.client
.compare_and_put(request) .compare_and_put(request)
.await .await
.map_err(BoxedError::new) .context(MetaSrvSnafu)?;
.context(MetaSrvSnafu) if response.is_success() {
Ok(Ok(()))
} else {
Ok(Err(response.take_prev_kv().map(|v| v.value().to_vec())))
}
} }
async fn move_value(&self, req: MoveValueRequest) -> Result<MoveValueResponse> { async fn move_value(&self, from_key: &[u8], to_key: &[u8]) -> Result<()> {
self.client let req = MoveValueRequest::new(from_key, to_key);
.move_value(req) self.client.move_value(req).await.context(MetaSrvSnafu)?;
.await Ok(())
.map_err(BoxedError::new)
.context(MetaSrvSnafu)
} }
fn as_any(&self) -> &dyn Any { fn as_any(&self) -> &dyn Any {

File diff suppressed because it is too large Load Diff

View File

@@ -12,25 +12,162 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::collections::HashMap; use std::any::Any;
use std::sync::{Arc, RwLock as StdRwLock}; use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, HashMap};
use std::fmt::{Display, Formatter};
use std::str::FromStr;
use std::sync::Arc;
use async_stream::stream;
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_recordbatch::RecordBatch; use common_recordbatch::RecordBatch;
use common_telemetry::logging::info;
use datatypes::data_type::ConcreteDataType; use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema}; use datatypes::schema::{ColumnSchema, Schema};
use datatypes::vectors::StringVector; use datatypes::vectors::StringVector;
use table::engine::{CloseTableResult, EngineContext, TableEngine}; use serde::Serializer;
use table::engine::{CloseTableResult, EngineContext, TableEngine, TableReference};
use table::metadata::TableId; use table::metadata::TableId;
use table::requests::{ use table::requests::{
AlterTableRequest, CloseTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest, AlterTableRequest, CloseTableRequest, CreateTableRequest, DropTableRequest, OpenTableRequest,
TruncateTableRequest,
}; };
use table::test_util::MemTable; use table::test_util::MemTable;
use table::TableRef; use table::TableRef;
use tokio::sync::RwLock;
use crate::error::Error;
use crate::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
use crate::remote::{Kv, KvBackend, ValueIter};
pub struct MockKvBackend {
map: RwLock<BTreeMap<Vec<u8>, Vec<u8>>>,
}
impl Default for MockKvBackend {
fn default() -> Self {
let mut map = BTreeMap::default();
let catalog_value = CatalogValue {}.as_bytes().unwrap();
let schema_value = SchemaValue {}.as_bytes().unwrap();
let default_catalog_key = CatalogKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
}
.to_string();
let default_schema_key = SchemaKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
}
.to_string();
// create default catalog and schema
map.insert(default_catalog_key.into(), catalog_value);
map.insert(default_schema_key.into(), schema_value);
let map = RwLock::new(map);
Self { map }
}
}
impl Display for MockKvBackend {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
futures::executor::block_on(async {
let map = self.map.read().await;
for (k, v) in map.iter() {
f.serialize_str(&String::from_utf8_lossy(k))?;
f.serialize_str(" -> ")?;
f.serialize_str(&String::from_utf8_lossy(v))?;
f.serialize_str("\n")?;
}
Ok(())
})
}
}
#[async_trait::async_trait]
impl KvBackend for MockKvBackend {
fn range<'a, 'b>(&'a self, key: &[u8]) -> ValueIter<'b, Error>
where
'a: 'b,
{
let prefix = key.to_vec();
let prefix_string = String::from_utf8_lossy(&prefix).to_string();
Box::pin(stream!({
let maps = self.map.read().await.clone();
for (k, v) in maps.range(prefix.clone()..) {
let key_string = String::from_utf8_lossy(k).to_string();
let matches = key_string.starts_with(&prefix_string);
if matches {
yield Ok(Kv(k.clone(), v.clone()))
} else {
info!("Stream finished");
return;
}
}
}))
}
async fn set(&self, key: &[u8], val: &[u8]) -> Result<(), Error> {
let mut map = self.map.write().await;
map.insert(key.to_vec(), val.to_vec());
Ok(())
}
async fn compare_and_set(
&self,
key: &[u8],
expect: &[u8],
val: &[u8],
) -> Result<Result<(), Option<Vec<u8>>>, Error> {
let mut map = self.map.write().await;
let existing = map.entry(key.to_vec());
match existing {
Entry::Vacant(e) => {
if expect.is_empty() {
e.insert(val.to_vec());
Ok(Ok(()))
} else {
Ok(Err(None))
}
}
Entry::Occupied(mut existing) => {
if existing.get() == expect {
existing.insert(val.to_vec());
Ok(Ok(()))
} else {
Ok(Err(Some(existing.get().clone())))
}
}
}
}
async fn delete_range(&self, key: &[u8], end: &[u8]) -> Result<(), Error> {
let mut map = self.map.write().await;
if end.is_empty() {
let _ = map.remove(key);
} else {
let start = key.to_vec();
let end = end.to_vec();
let range = start..end;
map.retain(|k, _| !range.contains(k));
}
Ok(())
}
async fn move_value(&self, _from_key: &[u8], _to_key: &[u8]) -> Result<(), Error> {
unimplemented!()
}
fn as_any(&self) -> &dyn Any {
self
}
}
#[derive(Default)] #[derive(Default)]
pub struct MockTableEngine { pub struct MockTableEngine {
tables: StdRwLock<HashMap<TableId, TableRef>>, tables: RwLock<HashMap<String, TableRef>>,
} }
#[async_trait::async_trait] #[async_trait::async_trait]
@@ -45,8 +182,21 @@ impl TableEngine for MockTableEngine {
_ctx: &EngineContext, _ctx: &EngineContext,
request: CreateTableRequest, request: CreateTableRequest,
) -> table::Result<TableRef> { ) -> table::Result<TableRef> {
let table_id = request.id; let table_name = request.table_name.clone();
let catalog_name = request.catalog_name.clone();
let schema_name = request.schema_name.clone();
let table_full_name =
TableReference::full(&catalog_name, &schema_name, &table_name).to_string();
let default_table_id = "0".to_owned();
let table_id = TableId::from_str(
request
.table_options
.extra_options
.get("table_id")
.unwrap_or(&default_table_id),
)
.unwrap();
let schema = Arc::new(Schema::new(vec![ColumnSchema::new( let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
"name", "name",
ConcreteDataType::string_datatype(), ConcreteDataType::string_datatype(),
@@ -56,16 +206,16 @@ impl TableEngine for MockTableEngine {
let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _]; let data = vec![Arc::new(StringVector::from(vec!["a", "b", "c"])) as _];
let record_batch = RecordBatch::new(schema, data).unwrap(); let record_batch = RecordBatch::new(schema, data).unwrap();
let table: TableRef = Arc::new(MemTable::new_with_catalog( let table: TableRef = Arc::new(MemTable::new_with_catalog(
&request.table_name, &table_name,
record_batch, record_batch,
table_id, table_id,
request.catalog_name, catalog_name,
request.schema_name, schema_name,
vec![0], vec![0],
)) as Arc<_>; )) as Arc<_>;
let mut tables = self.tables.write().unwrap(); let mut tables = self.tables.write().await;
let _ = tables.insert(table_id, table.clone() as TableRef); tables.insert(table_full_name, table.clone() as TableRef);
Ok(table) Ok(table)
} }
@@ -74,7 +224,7 @@ impl TableEngine for MockTableEngine {
_ctx: &EngineContext, _ctx: &EngineContext,
request: OpenTableRequest, request: OpenTableRequest,
) -> table::Result<Option<TableRef>> { ) -> table::Result<Option<TableRef>> {
Ok(self.tables.read().unwrap().get(&request.table_id).cloned()) Ok(self.tables.read().await.get(&request.table_name).cloned())
} }
async fn alter_table( async fn alter_table(
@@ -88,13 +238,25 @@ impl TableEngine for MockTableEngine {
fn get_table( fn get_table(
&self, &self,
_ctx: &EngineContext, _ctx: &EngineContext,
table_id: TableId, table_ref: &TableReference,
) -> table::Result<Option<TableRef>> { ) -> table::Result<Option<TableRef>> {
Ok(self.tables.read().unwrap().get(&table_id).cloned()) futures::executor::block_on(async {
Ok(self
.tables
.read()
.await
.get(&table_ref.to_string())
.cloned())
})
} }
fn table_exists(&self, _ctx: &EngineContext, table_id: TableId) -> bool { fn table_exists(&self, _ctx: &EngineContext, table_ref: &TableReference) -> bool {
self.tables.read().unwrap().contains_key(&table_id) futures::executor::block_on(async {
self.tables
.read()
.await
.contains_key(&table_ref.to_string())
})
} }
async fn drop_table( async fn drop_table(
@@ -110,19 +272,15 @@ impl TableEngine for MockTableEngine {
_ctx: &EngineContext, _ctx: &EngineContext,
request: CloseTableRequest, request: CloseTableRequest,
) -> table::Result<CloseTableResult> { ) -> table::Result<CloseTableResult> {
let _ = self.tables.write().unwrap().remove(&request.table_id); let _ = self
.tables
.write()
.await
.remove(&request.table_ref().to_string());
Ok(CloseTableResult::Released(vec![])) Ok(CloseTableResult::Released(vec![]))
} }
async fn close(&self) -> table::Result<()> { async fn close(&self) -> table::Result<()> {
Ok(()) Ok(())
} }
async fn truncate_table(
&self,
_ctx: &EngineContext,
_request: TruncateTableRequest,
) -> table::Result<bool> {
Ok(true)
}
} }

View File

@@ -29,7 +29,6 @@ use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionNumber; use store_api::storage::RegionNumber;
use table::engine::manager::TableEngineManagerRef; use table::engine::manager::TableEngineManagerRef;
use table::engine::{CloseTableResult, EngineContext, TableEngineRef}; use table::engine::{CloseTableResult, EngineContext, TableEngineRef};
use table::metadata::TableId;
use table::requests::CloseTableRequest; use table::requests::CloseTableRequest;
use table::TableRef; use table::TableRef;
use tokio::sync::{mpsc, oneshot, Mutex}; use tokio::sync::{mpsc, oneshot, Mutex};
@@ -37,13 +36,11 @@ use tokio::task::JoinHandle;
use tokio::time::{Duration, Instant}; use tokio::time::{Duration, Instant};
use crate::error::{Result, TableEngineNotFoundSnafu}; use crate::error::{Result, TableEngineNotFoundSnafu};
use crate::local::MemoryCatalogManager;
use crate::DeregisterTableRequest;
/// [RegionAliveKeepers] manages all [RegionAliveKeeper] in a scope of tables. /// [RegionAliveKeepers] manages all [RegionAliveKeeper] in a scope of tables.
pub struct RegionAliveKeepers { pub struct RegionAliveKeepers {
table_engine_manager: TableEngineManagerRef, table_engine_manager: TableEngineManagerRef,
keepers: Arc<Mutex<HashMap<TableId, Arc<RegionAliveKeeper>>>>, keepers: Arc<Mutex<HashMap<TableIdent, Arc<RegionAliveKeeper>>>>,
heartbeat_interval_millis: u64, heartbeat_interval_millis: u64,
started: AtomicBool, started: AtomicBool,
@@ -68,18 +65,12 @@ impl RegionAliveKeepers {
} }
} }
pub async fn find_keeper(&self, table_id: TableId) -> Option<Arc<RegionAliveKeeper>> { pub async fn find_keeper(&self, table_ident: &TableIdent) -> Option<Arc<RegionAliveKeeper>> {
self.keepers.lock().await.get(&table_id).cloned() self.keepers.lock().await.get(table_ident).cloned()
} }
pub async fn register_table( pub async fn register_table(&self, table_ident: TableIdent, table: TableRef) -> Result<()> {
&self, let keeper = self.find_keeper(&table_ident).await;
table_ident: TableIdent,
table: TableRef,
catalog_manager: Arc<MemoryCatalogManager>,
) -> Result<()> {
let table_id = table_ident.table_id;
let keeper = self.find_keeper(table_id).await;
if keeper.is_some() { if keeper.is_some() {
return Ok(()); return Ok(());
} }
@@ -93,7 +84,6 @@ impl RegionAliveKeepers {
let keeper = Arc::new(RegionAliveKeeper::new( let keeper = Arc::new(RegionAliveKeeper::new(
table_engine, table_engine,
catalog_manager,
table_ident.clone(), table_ident.clone(),
self.heartbeat_interval_millis, self.heartbeat_interval_millis,
)); ));
@@ -102,7 +92,7 @@ impl RegionAliveKeepers {
} }
let mut keepers = self.keepers.lock().await; let mut keepers = self.keepers.lock().await;
let _ = keepers.insert(table_id, keeper.clone()); keepers.insert(table_ident.clone(), keeper.clone());
if self.started.load(Ordering::Relaxed) { if self.started.load(Ordering::Relaxed) {
keeper.start().await; keeper.start().await;
@@ -118,16 +108,15 @@ impl RegionAliveKeepers {
&self, &self,
table_ident: &TableIdent, table_ident: &TableIdent,
) -> Option<Arc<RegionAliveKeeper>> { ) -> Option<Arc<RegionAliveKeeper>> {
let table_id = table_ident.table_id; self.keepers.lock().await.remove(table_ident).map(|x| {
self.keepers.lock().await.remove(&table_id).map(|x| {
info!("Deregister RegionAliveKeeper for table {table_ident}"); info!("Deregister RegionAliveKeeper for table {table_ident}");
x x
}) })
} }
pub async fn register_region(&self, region_ident: &RegionIdent) { pub async fn register_region(&self, region_ident: &RegionIdent) {
let table_id = region_ident.table_ident.table_id; let table_ident = &region_ident.table_ident;
let Some(keeper) = self.find_keeper(table_id).await else { let Some(keeper) = self.find_keeper(table_ident).await else {
// Alive keeper could be affected by lagging msg, just warn and ignore. // Alive keeper could be affected by lagging msg, just warn and ignore.
warn!("Alive keeper for region {region_ident} is not found!"); warn!("Alive keeper for region {region_ident} is not found!");
return; return;
@@ -136,8 +125,8 @@ impl RegionAliveKeepers {
} }
pub async fn deregister_region(&self, region_ident: &RegionIdent) { pub async fn deregister_region(&self, region_ident: &RegionIdent) {
let table_id = region_ident.table_ident.table_id; let table_ident = &region_ident.table_ident;
let Some(keeper) = self.find_keeper(table_id).await else { let Some(keeper) = self.find_keeper(table_ident).await else {
// Alive keeper could be affected by lagging msg, just warn and ignore. // Alive keeper could be affected by lagging msg, just warn and ignore.
warn!("Alive keeper for region {region_ident} is not found!"); warn!("Alive keeper for region {region_ident} is not found!");
return; return;
@@ -189,8 +178,7 @@ impl HeartbeatResponseHandler for RegionAliveKeepers {
} }
}; };
let table_id = table_ident.table_id; let Some(keeper) = self.keepers.lock().await.get(&table_ident).cloned() else {
let Some(keeper) = self.keepers.lock().await.get(&table_id).cloned() else {
// Alive keeper could be affected by lagging msg, just warn and ignore. // Alive keeper could be affected by lagging msg, just warn and ignore.
warn!("Alive keeper for table {table_ident} is not found!"); warn!("Alive keeper for table {table_ident} is not found!");
continue; continue;
@@ -211,7 +199,6 @@ impl HeartbeatResponseHandler for RegionAliveKeepers {
/// Datanode, it will "extend" the region's "lease", with a deadline for [RegionAliveKeeper] to /// Datanode, it will "extend" the region's "lease", with a deadline for [RegionAliveKeeper] to
/// countdown. /// countdown.
pub struct RegionAliveKeeper { pub struct RegionAliveKeeper {
catalog_manager: Arc<MemoryCatalogManager>,
table_engine: TableEngineRef, table_engine: TableEngineRef,
table_ident: TableIdent, table_ident: TableIdent,
countdown_task_handles: Arc<Mutex<HashMap<RegionNumber, Arc<CountdownTaskHandle>>>>, countdown_task_handles: Arc<Mutex<HashMap<RegionNumber, Arc<CountdownTaskHandle>>>>,
@@ -222,12 +209,10 @@ pub struct RegionAliveKeeper {
impl RegionAliveKeeper { impl RegionAliveKeeper {
fn new( fn new(
table_engine: TableEngineRef, table_engine: TableEngineRef,
catalog_manager: Arc<MemoryCatalogManager>,
table_ident: TableIdent, table_ident: TableIdent,
heartbeat_interval_millis: u64, heartbeat_interval_millis: u64,
) -> Self { ) -> Self {
Self { Self {
catalog_manager,
table_engine, table_engine,
table_ident, table_ident,
countdown_task_handles: Arc::new(Mutex::new(HashMap::new())), countdown_task_handles: Arc::new(Mutex::new(HashMap::new())),
@@ -252,36 +237,18 @@ impl RegionAliveKeeper {
let countdown_task_handles = Arc::downgrade(&self.countdown_task_handles); let countdown_task_handles = Arc::downgrade(&self.countdown_task_handles);
let on_task_finished = async move { let on_task_finished = async move {
if let Some(x) = countdown_task_handles.upgrade() { if let Some(x) = countdown_task_handles.upgrade() {
let _ = x.lock().await.remove(&region); x.lock().await.remove(&region);
} // Else the countdown task handles map could be dropped because the keeper is dropped. } // Else the countdown task handles map could be dropped because the keeper is dropped.
}; };
let catalog_manager = self.catalog_manager.clone();
let ident = self.table_ident.clone();
let handle = Arc::new(CountdownTaskHandle::new( let handle = Arc::new(CountdownTaskHandle::new(
self.table_engine.clone(), self.table_engine.clone(),
self.table_ident.clone(), self.table_ident.clone(),
region, region,
move |result: Option<CloseTableResult>| { || on_task_finished,
if matches!(result, Some(CloseTableResult::Released(_))) {
let result = catalog_manager.deregister_table_sync(DeregisterTableRequest {
catalog: ident.catalog.to_string(),
schema: ident.schema.to_string(),
table_name: ident.table.to_string(),
});
info!(
"Deregister table: {} after countdown task finished, result: {result:?}",
ident.table_id
);
} else {
debug!("Countdown task returns: {result:?}");
}
on_task_finished
},
)); ));
let mut handles = self.countdown_task_handles.lock().await; let mut handles = self.countdown_task_handles.lock().await;
let _ = handles.insert(region, handle.clone()); handles.insert(region, handle.clone());
if self.started.load(Ordering::Relaxed) { if self.started.load(Ordering::Relaxed) {
handle.start(self.heartbeat_interval_millis).await; handle.start(self.heartbeat_interval_millis).await;
@@ -345,10 +312,6 @@ impl RegionAliveKeeper {
} }
deadline deadline
} }
pub fn table_ident(&self) -> &TableIdent {
&self.table_ident
}
} }
#[derive(Debug)] #[derive(Debug)]
@@ -376,7 +339,7 @@ impl CountdownTaskHandle {
table_engine: TableEngineRef, table_engine: TableEngineRef,
table_ident: TableIdent, table_ident: TableIdent,
region: RegionNumber, region: RegionNumber,
on_task_finished: impl FnOnce(Option<CloseTableResult>) -> Fut + Send + 'static, on_task_finished: impl FnOnce() -> Fut + Send + 'static,
) -> Self ) -> Self
where where
Fut: Future<Output = ()> + Send, Fut: Future<Output = ()> + Send,
@@ -390,8 +353,8 @@ impl CountdownTaskHandle {
rx, rx,
}; };
let handler = common_runtime::spawn_bg(async move { let handler = common_runtime::spawn_bg(async move {
let result = countdown_task.run().await; countdown_task.run().await;
on_task_finished(result).await; on_task_finished().await;
}); });
Self { Self {
@@ -443,8 +406,7 @@ struct CountdownTask {
} }
impl CountdownTask { impl CountdownTask {
// returns true if async fn run(&mut self) {
async fn run(&mut self) -> Option<CloseTableResult> {
// 30 years. See `Instant::far_future`. // 30 years. See `Instant::far_future`.
let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 30); let far_future = Instant::now() + Duration::from_secs(86400 * 365 * 30);
@@ -498,11 +460,10 @@ impl CountdownTask {
"Region {region} of table {table_ident} is closed, result: {result:?}. \ "Region {region} of table {table_ident} is closed, result: {result:?}. \
RegionAliveKeeper out.", RegionAliveKeeper out.",
); );
return Some(result); break;
} }
} }
} }
None
} }
async fn close_region(&self) -> CloseTableResult { async fn close_region(&self) -> CloseTableResult {
@@ -514,7 +475,6 @@ impl CountdownTask {
catalog_name: table_ident.catalog.clone(), catalog_name: table_ident.catalog.clone(),
schema_name: table_ident.schema.clone(), schema_name: table_ident.schema.clone(),
table_name: table_ident.table.clone(), table_name: table_ident.table.clone(),
table_id: table_ident.table_id,
region_numbers: vec![region], region_numbers: vec![region],
flush: true, flush: true,
}; };
@@ -539,7 +499,7 @@ mod test {
use common_meta::heartbeat::mailbox::HeartbeatMailbox; use common_meta::heartbeat::mailbox::HeartbeatMailbox;
use datatypes::schema::RawSchema; use datatypes::schema::RawSchema;
use table::engine::manager::MemoryTableEngineManager; use table::engine::manager::MemoryTableEngineManager;
use table::engine::TableEngine; use table::engine::{TableEngine, TableReference};
use table::requests::{CreateTableRequest, TableOptions}; use table::requests::{CreateTableRequest, TableOptions};
use table::test_util::EmptyTable; use table::test_util::EmptyTable;
@@ -578,16 +538,11 @@ mod test {
table_options: TableOptions::default(), table_options: TableOptions::default(),
engine: "MockTableEngine".to_string(), engine: "MockTableEngine".to_string(),
})); }));
let catalog_manager = MemoryCatalogManager::new_with_table(table.clone());
keepers keepers
.register_table(table_ident.clone(), table, catalog_manager) .register_table(table_ident.clone(), table)
.await .await
.unwrap(); .unwrap();
assert!(keepers assert!(keepers.keepers.lock().await.contains_key(&table_ident));
.keepers
.lock()
.await
.contains_key(&table_ident.table_id));
(table_ident, keepers) (table_ident, keepers)
} }
@@ -642,7 +597,7 @@ mod test {
.keepers .keepers
.lock() .lock()
.await .await
.get(&table_ident.table_id) .get(&table_ident)
.cloned() .cloned()
.unwrap(); .unwrap();
@@ -689,7 +644,7 @@ mod test {
}) })
.await; .await;
let mut regions = keepers let mut regions = keepers
.find_keeper(table_ident.table_id) .find_keeper(&table_ident)
.await .await
.unwrap() .unwrap()
.countdown_task_handles .countdown_task_handles
@@ -716,13 +671,12 @@ mod test {
table_id: 1024, table_id: 1024,
engine: "mito".to_string(), engine: "mito".to_string(),
}; };
let catalog_manager = MemoryCatalogManager::with_default_setup(); let keeper = RegionAliveKeeper::new(table_engine, table_ident, 1000);
let keeper = RegionAliveKeeper::new(table_engine, catalog_manager, table_ident, 1000);
let region = 1; let region = 1;
assert!(keeper.find_handle(&region).await.is_none()); assert!(keeper.find_handle(&region).await.is_none());
keeper.register_region(region).await; keeper.register_region(region).await;
let _ = keeper.find_handle(&region).await.unwrap(); assert!(keeper.find_handle(&region).await.is_some());
let ten_seconds_later = || Instant::now() + Duration::from_secs(10); let ten_seconds_later = || Instant::now() + Duration::from_secs(10);
@@ -760,12 +714,12 @@ mod test {
table_engine.clone(), table_engine.clone(),
table_ident.clone(), table_ident.clone(),
1, 1,
|_| async move { finished_clone.store(true, Ordering::Relaxed) }, || async move { finished_clone.store(true, Ordering::Relaxed) },
); );
let tx = handle.tx.clone(); let tx = handle.tx.clone();
// assert countdown task is running // assert countdown task is running
tx.send(CountdownCommand::Start(5000)).await.unwrap(); assert!(tx.send(CountdownCommand::Start(5000)).await.is_ok());
assert!(!finished.load(Ordering::Relaxed)); assert!(!finished.load(Ordering::Relaxed));
drop(handle); drop(handle);
@@ -782,7 +736,7 @@ mod test {
let finished = Arc::new(AtomicBool::new(false)); let finished = Arc::new(AtomicBool::new(false));
let finished_clone = finished.clone(); let finished_clone = finished.clone();
let handle = CountdownTaskHandle::new(table_engine, table_ident, 1, |_| async move { let handle = CountdownTaskHandle::new(table_engine, table_ident, 1, || async move {
finished_clone.store(true, Ordering::Relaxed) finished_clone.store(true, Ordering::Relaxed)
}); });
handle.tx.send(CountdownCommand::Start(100)).await.unwrap(); handle.tx.send(CountdownCommand::Start(100)).await.unwrap();
@@ -797,9 +751,8 @@ mod test {
let catalog = "my_catalog"; let catalog = "my_catalog";
let schema = "my_schema"; let schema = "my_schema";
let table = "my_table"; let table = "my_table";
let table_id = 1;
let request = CreateTableRequest { let request = CreateTableRequest {
id: table_id, id: 1,
catalog_name: catalog.to_string(), catalog_name: catalog.to_string(),
schema_name: schema.to_string(), schema_name: schema.to_string(),
table_name: table.to_string(), table_name: table.to_string(),
@@ -815,15 +768,16 @@ mod test {
table_options: TableOptions::default(), table_options: TableOptions::default(),
engine: "mito".to_string(), engine: "mito".to_string(),
}; };
let table_ref = TableReference::full(catalog, schema, table);
let table_engine = Arc::new(MockTableEngine::default()); let table_engine = Arc::new(MockTableEngine::default());
let _ = table_engine.create_table(ctx, request).await.unwrap(); table_engine.create_table(ctx, request).await.unwrap();
let table_ident = TableIdent { let table_ident = TableIdent {
catalog: catalog.to_string(), catalog: catalog.to_string(),
schema: schema.to_string(), schema: schema.to_string(),
table: table.to_string(), table: table.to_string(),
table_id, table_id: 1024,
engine: "mito".to_string(), engine: "mito".to_string(),
}; };
let (tx, rx) = mpsc::channel(10); let (tx, rx) = mpsc::channel(10);
@@ -833,7 +787,7 @@ mod test {
region: 1, region: 1,
rx, rx,
}; };
let _handle = common_runtime::spawn_bg(async move { common_runtime::spawn_bg(async move {
task.run().await; task.run().await;
}); });
@@ -859,9 +813,9 @@ mod test {
.unwrap(); .unwrap();
// assert the table is closed after deadline is reached // assert the table is closed after deadline is reached
assert!(table_engine.table_exists(ctx, table_id)); assert!(table_engine.table_exists(ctx, &table_ref));
// spare 500ms for the task to close the table // spare 500ms for the task to close the table
tokio::time::sleep(Duration::from_millis(2000)).await; tokio::time::sleep(Duration::from_millis(2000)).await;
assert!(!table_engine.table_exists(ctx, table_id)); assert!(!table_engine.table_exists(ctx, &table_ref));
} }
} }

69
src/catalog/src/schema.rs Normal file
View File

@@ -0,0 +1,69 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::sync::Arc;
use async_trait::async_trait;
use table::TableRef;
use crate::error::{NotSupportedSnafu, Result};
/// Represents a schema, comprising a number of named tables.
#[async_trait]
pub trait SchemaProvider: Sync + Send {
/// Returns the schema provider as [`Any`](std::any::Any)
/// so that it can be downcast to a specific implementation.
fn as_any(&self) -> &dyn Any;
/// Retrieves the list of available table names in this schema.
async fn table_names(&self) -> Result<Vec<String>>;
/// Retrieves a specific table from the schema by name, provided it exists.
async fn table(&self, name: &str) -> Result<Option<TableRef>>;
/// If supported by the implementation, adds a new table to this schema.
/// If a table of the same name existed before, it returns "Table already exists" error.
async fn register_table(&self, name: String, _table: TableRef) -> Result<Option<TableRef>> {
NotSupportedSnafu {
op: format!("register_table({name}, <table>)"),
}
.fail()
}
/// If supported by the implementation, renames an existing table from this schema and returns it.
/// If no table of that name exists, returns "Table not found" error.
async fn rename_table(&self, name: &str, new_name: String) -> Result<TableRef> {
NotSupportedSnafu {
op: format!("rename_table({name}, {new_name})"),
}
.fail()
}
/// If supported by the implementation, removes an existing table from this schema and returns it.
/// If no table of that name exists, returns Ok(None).
async fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
NotSupportedSnafu {
op: format!("deregister_table({name})"),
}
.fail()
}
/// If supported by the implementation, checks the table exist in the schema provider or not.
/// If no matched table in the schema provider, return false.
/// Otherwise, return true.
async fn table_exist(&self, name: &str) -> Result<bool>;
}
pub type SchemaProviderRef = Arc<dyn SchemaProvider>;

View File

@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use std::any::Any;
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
@@ -19,24 +20,27 @@ use common_catalog::consts::{
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MITO_ENGINE, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MITO_ENGINE,
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME, SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
}; };
use common_query::logical_plan::Expr;
use common_query::physical_plan::{PhysicalPlanRef, SessionContext};
use common_recordbatch::SendableRecordBatchStream; use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::{debug, warn}; use common_telemetry::debug;
use common_time::util; use common_time::util;
use datatypes::prelude::{ConcreteDataType, ScalarVector, VectorRef}; use datatypes::prelude::{ConcreteDataType, ScalarVector, VectorRef};
use datatypes::schema::{ColumnSchema, RawSchema}; use datatypes::schema::{ColumnSchema, RawSchema, SchemaRef};
use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector}; use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use snafu::{ensure, OptionExt, ResultExt}; use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::ScanRequest; use store_api::storage::ScanRequest;
use table::engine::{EngineContext, TableEngineRef}; use table::engine::{EngineContext, TableEngineRef};
use table::metadata::TableId; use table::metadata::{TableId, TableInfoRef};
use table::requests::{CreateTableRequest, InsertRequest, OpenTableRequest, TableOptions}; use table::requests::{
use table::TableRef; CreateTableRequest, DeleteRequest, InsertRequest, OpenTableRequest, TableOptions,
};
use table::{Result as TableResult, Table, TableRef};
use crate::error::{ use crate::error::{
self, CreateSystemCatalogSnafu, DeregisterTableSnafu, EmptyValueSnafu, Error, self, CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
InsertCatalogRecordSnafu, InvalidEntryTypeSnafu, InvalidKeySnafu, OpenSystemCatalogSnafu, OpenSystemCatalogSnafu, Result, ValueDeserializeSnafu,
Result, ValueDeserializeSnafu,
}; };
use crate::DeregisterTableRequest; use crate::DeregisterTableRequest;
@@ -46,6 +50,47 @@ pub const VALUE_INDEX: usize = 3;
pub struct SystemCatalogTable(TableRef); pub struct SystemCatalogTable(TableRef);
#[async_trait::async_trait]
impl Table for SystemCatalogTable {
fn as_any(&self) -> &dyn Any {
self
}
fn schema(&self) -> SchemaRef {
self.0.schema()
}
async fn scan(
&self,
projection: Option<&Vec<usize>>,
filters: &[Expr],
limit: Option<usize>,
) -> table::Result<PhysicalPlanRef> {
self.0.scan(projection, filters, limit).await
}
async fn scan_to_stream(&self, request: ScanRequest) -> TableResult<SendableRecordBatchStream> {
self.0.scan_to_stream(request).await
}
/// Insert values into table.
async fn insert(&self, request: InsertRequest) -> TableResult<usize> {
self.0.insert(request).await
}
fn table_info(&self) -> TableInfoRef {
self.0.table_info()
}
async fn delete(&self, request: DeleteRequest) -> TableResult<usize> {
self.0.delete(request).await
}
fn statistics(&self) -> Option<table::stats::TableStatistics> {
self.0.statistics()
}
}
impl SystemCatalogTable { impl SystemCatalogTable {
pub async fn new(engine: TableEngineRef) -> Result<Self> { pub async fn new(engine: TableEngineRef) -> Result<Self> {
let request = OpenTableRequest { let request = OpenTableRequest {
@@ -88,75 +133,19 @@ impl SystemCatalogTable {
} }
} }
pub async fn register_table(
&self,
catalog: String,
schema: String,
table_name: String,
table_id: TableId,
engine: String,
) -> Result<usize> {
let insert_request =
build_table_insert_request(catalog, schema, table_name, table_id, engine);
self.0
.insert(insert_request)
.await
.context(InsertCatalogRecordSnafu)
}
pub(crate) async fn deregister_table(
&self,
request: &DeregisterTableRequest,
table_id: TableId,
) -> Result<()> {
let deletion_request = build_table_deletion_request(request, table_id);
self.0
.insert(deletion_request)
.await
.map(|x| {
if x != 1 {
let table = common_catalog::format_full_table_name(
&request.catalog,
&request.schema,
&request.table_name
);
warn!("Failed to delete table record from information_schema, unexpected returned result: {x}, table: {table}");
}
})
.with_context(|_| DeregisterTableSnafu {
request: request.clone(),
})
}
pub async fn register_schema(&self, catalog: String, schema: String) -> Result<usize> {
let insert_request = build_schema_insert_request(catalog, schema);
self.0
.insert(insert_request)
.await
.context(InsertCatalogRecordSnafu)
}
/// Create a stream of all entries inside system catalog table /// Create a stream of all entries inside system catalog table
pub async fn records(&self) -> Result<SendableRecordBatchStream> { pub async fn records(&self) -> Result<SendableRecordBatchStream> {
let full_projection = None; let full_projection = None;
let scan_req = ScanRequest { let ctx = SessionContext::new();
sequence: None, let scan = self
projection: full_projection, .scan(full_projection, &[], None)
filters: vec![],
output_ordering: None,
limit: None,
};
let stream = self
.0
.scan_to_stream(scan_req)
.await .await
.context(error::SystemCatalogTableScanSnafu)?; .context(error::SystemCatalogTableScanSnafu)?;
let stream = scan
.execute(0, ctx.task_ctx())
.context(error::SystemCatalogTableScanExecSnafu)?;
Ok(stream) Ok(stream)
} }
pub fn as_table_ref(&self) -> TableRef {
self.0.clone()
}
} }
/// Build system catalog table schema. /// Build system catalog table schema.
@@ -222,50 +211,38 @@ pub fn build_table_insert_request(
build_insert_request( build_insert_request(
EntryType::Table, EntryType::Table,
entry_key.as_bytes(), entry_key.as_bytes(),
serde_json::to_string(&TableEntryValue { serde_json::to_string(&TableEntryValue { table_name, engine })
table_name, .unwrap()
engine, .as_bytes(),
is_deleted: false,
})
.unwrap()
.as_bytes(),
) )
} }
pub(crate) fn build_table_deletion_request( pub(crate) fn build_table_deletion_request(
request: &DeregisterTableRequest, request: &DeregisterTableRequest,
table_id: TableId, table_id: TableId,
) -> InsertRequest { ) -> DeleteRequest {
let entry_key = format_table_entry_key(&request.catalog, &request.schema, table_id); let table_key = format_table_entry_key(&request.catalog, &request.schema, table_id);
build_insert_request( DeleteRequest {
EntryType::Table, key_column_values: build_primary_key_columns(EntryType::Table, table_key.as_bytes()),
entry_key.as_bytes(), }
serde_json::to_string(&TableEntryValue {
table_name: "".to_string(),
engine: "".to_string(),
is_deleted: true,
})
.unwrap()
.as_bytes(),
)
} }
fn build_primary_key_columns(entry_type: EntryType, key: &[u8]) -> HashMap<String, VectorRef> { fn build_primary_key_columns(entry_type: EntryType, key: &[u8]) -> HashMap<String, VectorRef> {
HashMap::from([ let mut m = HashMap::with_capacity(3);
( m.insert(
"entry_type".to_string(), "entry_type".to_string(),
Arc::new(UInt8Vector::from_slice([entry_type as u8])) as VectorRef, Arc::new(UInt8Vector::from_slice([entry_type as u8])) as _,
), );
( m.insert(
"key".to_string(), "key".to_string(),
Arc::new(BinaryVector::from_slice(&[key])) as VectorRef, Arc::new(BinaryVector::from_slice(&[key])) as _,
), );
( // Timestamp in key part is intentionally left to 0
"timestamp".to_string(), m.insert(
// Timestamp in key part is intentionally left to 0 "timestamp".to_string(),
Arc::new(TimestampMillisecondVector::from_slice([0])) as VectorRef, Arc::new(TimestampMillisecondVector::from_slice([0])) as _,
), );
]) m
} }
pub fn build_schema_insert_request(catalog_name: String, schema_name: String) -> InsertRequest { pub fn build_schema_insert_request(catalog_name: String, schema_name: String) -> InsertRequest {
@@ -283,20 +260,20 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
let primary_key_columns = build_primary_key_columns(entry_type, key); let primary_key_columns = build_primary_key_columns(entry_type, key);
let mut columns_values = HashMap::with_capacity(6); let mut columns_values = HashMap::with_capacity(6);
columns_values.extend(primary_key_columns); columns_values.extend(primary_key_columns.into_iter());
let _ = columns_values.insert( columns_values.insert(
"value".to_string(), "value".to_string(),
Arc::new(BinaryVector::from_slice(&[value])) as _, Arc::new(BinaryVector::from_slice(&[value])) as _,
); );
let now = util::current_time_millis(); let now = util::current_time_millis();
let _ = columns_values.insert( columns_values.insert(
"gmt_created".to_string(), "gmt_created".to_string(),
Arc::new(TimestampMillisecondVector::from_slice([now])) as _, Arc::new(TimestampMillisecondVector::from_slice([now])) as _,
); );
let _ = columns_values.insert( columns_values.insert(
"gmt_modified".to_string(), "gmt_modified".to_string(),
Arc::new(TimestampMillisecondVector::from_slice([now])) as _, Arc::new(TimestampMillisecondVector::from_slice([now])) as _,
); );
@@ -366,7 +343,6 @@ pub fn decode_system_catalog(
table_name: table_meta.table_name, table_name: table_meta.table_name,
table_id, table_id,
engine: table_meta.engine, engine: table_meta.engine,
is_deleted: table_meta.is_deleted,
})) }))
} }
} }
@@ -423,7 +399,6 @@ pub struct TableEntry {
pub table_name: String, pub table_name: String,
pub table_id: TableId, pub table_id: TableId,
pub engine: String, pub engine: String,
pub is_deleted: bool,
} }
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
@@ -432,19 +407,12 @@ pub struct TableEntryValue {
#[serde(default = "mito_engine")] #[serde(default = "mito_engine")]
pub engine: String, pub engine: String,
#[serde(default = "not_deleted")]
pub is_deleted: bool,
} }
fn mito_engine() -> String { fn mito_engine() -> String {
MITO_ENGINE.to_string() MITO_ENGINE.to_string()
} }
fn not_deleted() -> bool {
false
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use common_recordbatch::RecordBatches; use common_recordbatch::RecordBatches;
@@ -514,13 +482,14 @@ mod tests {
} }
#[test] #[test]
#[should_panic]
pub fn test_decode_mismatch() { pub fn test_decode_mismatch() {
assert!(decode_system_catalog( decode_system_catalog(
Some(EntryType::Table as u8), Some(EntryType::Table as u8),
Some("some_catalog.some_schema.42".as_bytes()), Some("some_catalog.some_schema.42".as_bytes()),
None, None,
) )
.is_err()); .unwrap();
} }
#[test] #[test]
@@ -535,14 +504,14 @@ mod tests {
let dir = create_temp_dir("system-table-test"); let dir = create_temp_dir("system-table-test");
let store_dir = dir.path().to_string_lossy(); let store_dir = dir.path().to_string_lossy();
let mut builder = object_store::services::Fs::default(); let mut builder = object_store::services::Fs::default();
let _ = builder.root(&store_dir); builder.root(&store_dir);
let object_store = ObjectStore::new(builder).unwrap().finish(); let object_store = ObjectStore::new(builder).unwrap().finish();
let noop_compaction_scheduler = Arc::new(NoopCompactionScheduler::default()); let noop_compaction_scheduler = Arc::new(NoopCompactionScheduler::default());
let table_engine = Arc::new(MitoEngine::new( let table_engine = Arc::new(MitoEngine::new(
EngineConfig::default(), EngineConfig::default(),
EngineImpl::new( EngineImpl::new(
StorageEngineConfig::default(), StorageEngineConfig::default(),
Arc::new(NoopLogStore), Arc::new(NoopLogStore::default()),
object_store.clone(), object_store.clone(),
noop_compaction_scheduler, noop_compaction_scheduler,
) )
@@ -556,14 +525,14 @@ mod tests {
async fn test_system_table_type() { async fn test_system_table_type() {
let (_dir, table_engine) = prepare_table_engine().await; let (_dir, table_engine) = prepare_table_engine().await;
let system_table = SystemCatalogTable::new(table_engine).await.unwrap(); let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
assert_eq!(Base, system_table.as_table_ref().table_type()); assert_eq!(Base, system_table.table_type());
} }
#[tokio::test] #[tokio::test]
async fn test_system_table_info() { async fn test_system_table_info() {
let (_dir, table_engine) = prepare_table_engine().await; let (_dir, table_engine) = prepare_table_engine().await;
let system_table = SystemCatalogTable::new(table_engine).await.unwrap(); let system_table = SystemCatalogTable::new(table_engine).await.unwrap();
let info = system_table.as_table_ref().table_info(); let info = system_table.table_info();
assert_eq!(TableType::Base, info.table_type); assert_eq!(TableType::Base, info.table_type);
assert_eq!(SYSTEM_CATALOG_TABLE_NAME, info.name); assert_eq!(SYSTEM_CATALOG_TABLE_NAME, info.name);
assert_eq!(SYSTEM_CATALOG_TABLE_ID, info.ident.table_id); assert_eq!(SYSTEM_CATALOG_TABLE_ID, info.ident.table_id);
@@ -576,16 +545,14 @@ mod tests {
let (_, table_engine) = prepare_table_engine().await; let (_, table_engine) = prepare_table_engine().await;
let catalog_table = SystemCatalogTable::new(table_engine).await.unwrap(); let catalog_table = SystemCatalogTable::new(table_engine).await.unwrap();
let result = catalog_table let table_insertion = build_table_insert_request(
.register_table( DEFAULT_CATALOG_NAME.to_string(),
DEFAULT_CATALOG_NAME.to_string(), DEFAULT_SCHEMA_NAME.to_string(),
DEFAULT_SCHEMA_NAME.to_string(), "my_table".to_string(),
"my_table".to_string(), 1,
1, MITO_ENGINE.to_string(),
MITO_ENGINE.to_string(), );
) let result = catalog_table.insert(table_insertion).await.unwrap();
.await
.unwrap();
assert_eq!(result, 1); assert_eq!(result, 1);
let records = catalog_table.records().await.unwrap(); let records = catalog_table.records().await.unwrap();
@@ -595,15 +562,9 @@ mod tests {
assert_eq!(batch.num_rows(), 1); assert_eq!(batch.num_rows(), 1);
let row = batch.rows().next().unwrap(); let row = batch.rows().next().unwrap();
let Value::UInt8(entry_type) = row[0] else { let Value::UInt8(entry_type) = row[0] else { unreachable!() };
unreachable!() let Value::Binary(key) = row[1].clone() else { unreachable!() };
}; let Value::Binary(value) = row[3].clone() else { unreachable!() };
let Value::Binary(key) = row[1].clone() else {
unreachable!()
};
let Value::Binary(value) = row[3].clone() else {
unreachable!()
};
let entry = decode_system_catalog(Some(entry_type), Some(&*key), Some(&*value)).unwrap(); let entry = decode_system_catalog(Some(entry_type), Some(&*key), Some(&*value)).unwrap();
let expected = Entry::Table(TableEntry { let expected = Entry::Table(TableEntry {
catalog_name: DEFAULT_CATALOG_NAME.to_string(), catalog_name: DEFAULT_CATALOG_NAME.to_string(),
@@ -611,24 +572,22 @@ mod tests {
table_name: "my_table".to_string(), table_name: "my_table".to_string(),
table_id: 1, table_id: 1,
engine: MITO_ENGINE.to_string(), engine: MITO_ENGINE.to_string(),
is_deleted: false,
}); });
assert_eq!(entry, expected); assert_eq!(entry, expected);
catalog_table let table_deletion = build_table_deletion_request(
.deregister_table( &DeregisterTableRequest {
&DeregisterTableRequest { catalog: DEFAULT_CATALOG_NAME.to_string(),
catalog: DEFAULT_CATALOG_NAME.to_string(), schema: DEFAULT_SCHEMA_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(), table_name: "my_table".to_string(),
table_name: "my_table".to_string(), },
}, 1,
1, );
) let result = catalog_table.delete(table_deletion).await.unwrap();
.await assert_eq!(result, 1);
.unwrap();
let records = catalog_table.records().await.unwrap(); let records = catalog_table.records().await.unwrap();
let batches = RecordBatches::try_collect(records).await.unwrap().take(); let batches = RecordBatches::try_collect(records).await.unwrap().take();
assert_eq!(batches.len(), 1); assert_eq!(batches.len(), 0);
} }
} }

View File

@@ -24,7 +24,10 @@ use session::context::QueryContext;
use snafu::{ensure, OptionExt}; use snafu::{ensure, OptionExt};
use table::table::adapter::DfTableProviderAdapter; use table::table::adapter::DfTableProviderAdapter;
use crate::error::{QueryAccessDeniedSnafu, Result, TableNotExistSnafu}; use crate::error::{
CatalogNotFoundSnafu, QueryAccessDeniedSnafu, Result, SchemaNotFoundSnafu, TableNotExistSnafu,
};
use crate::information_schema::InformationSchemaProvider;
use crate::CatalogManagerRef; use crate::CatalogManagerRef;
pub struct DfTableSourceProvider { pub struct DfTableSourceProvider {
@@ -45,8 +48,8 @@ impl DfTableSourceProvider {
catalog_manager, catalog_manager,
disallow_cross_schema_query, disallow_cross_schema_query,
resolved_tables: HashMap::new(), resolved_tables: HashMap::new(),
default_catalog: query_ctx.current_catalog().to_owned(), default_catalog: query_ctx.current_catalog(),
default_schema: query_ctx.current_schema().to_owned(), default_schema: query_ctx.current_schema(),
} }
} }
@@ -101,18 +104,41 @@ impl DfTableSourceProvider {
let schema_name = table_ref.schema.as_ref(); let schema_name = table_ref.schema.as_ref();
let table_name = table_ref.table.as_ref(); let table_name = table_ref.table.as_ref();
let table = self let schema = if schema_name != INFORMATION_SCHEMA_NAME {
.catalog_manager let catalog = self
.table(catalog_name, schema_name, table_name) .catalog_manager
.catalog(catalog_name)
.await?
.context(CatalogNotFoundSnafu { catalog_name })?;
catalog
.schema(schema_name)
.await?
.context(SchemaNotFoundSnafu {
catalog: catalog_name,
schema: schema_name,
})?
} else {
let catalog_provider = self
.catalog_manager
.catalog(catalog_name)
.await?
.context(CatalogNotFoundSnafu { catalog_name })?;
Arc::new(InformationSchemaProvider::new(
catalog_name.to_string(),
catalog_provider,
))
};
let table = schema
.table(table_name)
.await? .await?
.with_context(|| TableNotExistSnafu { .with_context(|| TableNotExistSnafu {
table: format_full_table_name(catalog_name, schema_name, table_name), table: format_full_table_name(catalog_name, schema_name, table_name),
})?; })?;
let provider = DfTableProviderAdapter::new(table); let table = DfTableProviderAdapter::new(table);
let source = provider_as_source(Arc::new(provider)); let table = provider_as_source(Arc::new(table));
let _ = self.resolved_tables.insert(resolved_name, source.clone()); self.resolved_tables.insert(resolved_name, table.clone());
Ok(source) Ok(table)
} }
} }
@@ -130,20 +156,20 @@ mod tests {
let query_ctx = &QueryContext::with("greptime", "public"); let query_ctx = &QueryContext::with("greptime", "public");
let table_provider = let table_provider =
DfTableSourceProvider::new(MemoryCatalogManager::with_default_setup(), true, query_ctx); DfTableSourceProvider::new(Arc::new(MemoryCatalogManager::default()), true, query_ctx);
let table_ref = TableReference::Bare { let table_ref = TableReference::Bare {
table: Cow::Borrowed("table_name"), table: Cow::Borrowed("table_name"),
}; };
let result = table_provider.resolve_table_ref(table_ref); let result = table_provider.resolve_table_ref(table_ref);
let _ = result.unwrap(); assert!(result.is_ok());
let table_ref = TableReference::Partial { let table_ref = TableReference::Partial {
schema: Cow::Borrowed("public"), schema: Cow::Borrowed("public"),
table: Cow::Borrowed("table_name"), table: Cow::Borrowed("table_name"),
}; };
let result = table_provider.resolve_table_ref(table_ref); let result = table_provider.resolve_table_ref(table_ref);
let _ = result.unwrap(); assert!(result.is_ok());
let table_ref = TableReference::Partial { let table_ref = TableReference::Partial {
schema: Cow::Borrowed("wrong_schema"), schema: Cow::Borrowed("wrong_schema"),
@@ -158,7 +184,7 @@ mod tests {
table: Cow::Borrowed("table_name"), table: Cow::Borrowed("table_name"),
}; };
let result = table_provider.resolve_table_ref(table_ref); let result = table_provider.resolve_table_ref(table_ref);
let _ = result.unwrap(); assert!(result.is_ok());
let table_ref = TableReference::Full { let table_ref = TableReference::Full {
catalog: Cow::Borrowed("wrong_catalog"), catalog: Cow::Borrowed("wrong_catalog"),
@@ -172,14 +198,14 @@ mod tests {
schema: Cow::Borrowed("information_schema"), schema: Cow::Borrowed("information_schema"),
table: Cow::Borrowed("columns"), table: Cow::Borrowed("columns"),
}; };
let _ = table_provider.resolve_table_ref(table_ref).unwrap(); assert!(table_provider.resolve_table_ref(table_ref).is_ok());
let table_ref = TableReference::Full { let table_ref = TableReference::Full {
catalog: Cow::Borrowed("greptime"), catalog: Cow::Borrowed("greptime"),
schema: Cow::Borrowed("information_schema"), schema: Cow::Borrowed("information_schema"),
table: Cow::Borrowed("columns"), table: Cow::Borrowed("columns"),
}; };
let _ = table_provider.resolve_table_ref(table_ref).unwrap(); assert!(table_provider.resolve_table_ref(table_ref).is_ok());
let table_ref = TableReference::Full { let table_ref = TableReference::Full {
catalog: Cow::Borrowed("dummy"), catalog: Cow::Borrowed("dummy"),

View File

@@ -14,17 +14,50 @@
// The `tables` table in system catalog keeps a record of all tables created by user. // The `tables` table in system catalog keeps a record of all tables created by user.
use std::any::Any;
use std::sync::Arc; use std::sync::Arc;
use async_trait::async_trait;
use common_catalog::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
use common_telemetry::logging;
use snafu::ResultExt;
use table::metadata::TableId; use table::metadata::TableId;
use table::{Table, TableRef};
use crate::system::SystemCatalogTable; use crate::error::{self, Error, InsertCatalogRecordSnafu, Result as CatalogResult};
use crate::DeregisterTableRequest; use crate::system::{
build_schema_insert_request, build_table_deletion_request, build_table_insert_request,
SystemCatalogTable,
};
use crate::{CatalogProvider, DeregisterTableRequest, SchemaProvider, SchemaProviderRef};
pub struct InformationSchema { pub struct InformationSchema {
pub system: Arc<SystemCatalogTable>, pub system: Arc<SystemCatalogTable>,
} }
#[async_trait]
impl SchemaProvider for InformationSchema {
fn as_any(&self) -> &dyn Any {
self
}
async fn table_names(&self) -> Result<Vec<String>, Error> {
Ok(vec![SYSTEM_CATALOG_TABLE_NAME.to_string()])
}
async fn table(&self, name: &str) -> Result<Option<TableRef>, Error> {
if name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME) {
Ok(Some(self.system.clone()))
} else {
Ok(None)
}
}
async fn table_exist(&self, name: &str) -> Result<bool, Error> {
Ok(name.eq_ignore_ascii_case(SYSTEM_CATALOG_TABLE_NAME))
}
}
pub struct SystemCatalog { pub struct SystemCatalog {
pub information_schema: Arc<InformationSchema>, pub information_schema: Arc<InformationSchema>,
} }
@@ -47,21 +80,36 @@ impl SystemCatalog {
table_id: TableId, table_id: TableId,
engine: String, engine: String,
) -> crate::error::Result<usize> { ) -> crate::error::Result<usize> {
let request = build_table_insert_request(catalog, schema, table_name, table_id, engine);
self.information_schema self.information_schema
.system .system
.register_table(catalog, schema, table_name, table_id, engine) .insert(request)
.await .await
.context(InsertCatalogRecordSnafu)
} }
pub(crate) async fn deregister_table( pub(crate) async fn deregister_table(
&self, &self,
request: &DeregisterTableRequest, request: &DeregisterTableRequest,
table_id: TableId, table_id: TableId,
) -> crate::error::Result<()> { ) -> CatalogResult<()> {
self.information_schema self.information_schema
.system .system
.deregister_table(request, table_id) .delete(build_table_deletion_request(request, table_id))
.await .await
.map(|x| {
if x != 1 {
let table = common_catalog::format_full_table_name(
&request.catalog,
&request.schema,
&request.table_name
);
logging::warn!("Failed to delete table record from information_schema, unexpected returned result: {x}, table: {table}");
}
})
.with_context(|_| error::DeregisterTableSnafu {
request: request.clone(),
})
} }
pub async fn register_schema( pub async fn register_schema(
@@ -69,9 +117,38 @@ impl SystemCatalog {
catalog: String, catalog: String,
schema: String, schema: String,
) -> crate::error::Result<usize> { ) -> crate::error::Result<usize> {
let request = build_schema_insert_request(catalog, schema);
self.information_schema self.information_schema
.system .system
.register_schema(catalog, schema) .insert(request)
.await .await
.context(InsertCatalogRecordSnafu)
}
}
#[async_trait::async_trait]
impl CatalogProvider for SystemCatalog {
fn as_any(&self) -> &dyn Any {
self
}
async fn schema_names(&self) -> Result<Vec<String>, Error> {
Ok(vec![INFORMATION_SCHEMA_NAME.to_string()])
}
async fn register_schema(
&self,
_name: String,
_schema: SchemaProviderRef,
) -> Result<Option<SchemaProviderRef>, Error> {
panic!("System catalog does not support registering schema!")
}
async fn schema(&self, name: &str) -> Result<Option<Arc<dyn SchemaProvider>>, Error> {
if name.eq_ignore_ascii_case(INFORMATION_SCHEMA_NAME) {
Ok(Some(self.information_schema.clone()))
} else {
Ok(None)
}
} }
} }

View File

@@ -24,6 +24,7 @@ mod tests {
use mito::config::EngineConfig; use mito::config::EngineConfig;
use table::engine::manager::MemoryTableEngineManager; use table::engine::manager::MemoryTableEngineManager;
use table::table::numbers::NumbersTable; use table::table::numbers::NumbersTable;
use table::TableRef;
use tokio::sync::Mutex; use tokio::sync::Mutex;
async fn create_local_catalog_manager( async fn create_local_catalog_manager(
@@ -48,12 +49,13 @@ mod tests {
// register table // register table
let table_name = "test_table"; let table_name = "test_table";
let table_id = 42; let table_id = 42;
let table = Arc::new(NumbersTable::new(table_id));
let request = RegisterTableRequest { let request = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(), catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(), schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: table_name.to_string(), table_name: table_name.to_string(),
table_id, table_id,
table: NumbersTable::table(table_id), table: table.clone(),
}; };
assert!(catalog_manager.register_table(request).await.unwrap()); assert!(catalog_manager.register_table(request).await.unwrap());
@@ -87,7 +89,7 @@ mod tests {
schema: DEFAULT_SCHEMA_NAME.to_string(), schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "test_table".to_string(), table_name: "test_table".to_string(),
table_id: 42, table_id: 42,
table: NumbersTable::table(42), table: Arc::new(NumbersTable::new(42)),
}; };
assert!(catalog_manager assert!(catalog_manager
.register_table(request.clone()) .register_table(request.clone())
@@ -103,7 +105,7 @@ mod tests {
schema: DEFAULT_SCHEMA_NAME.to_string(), schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "test_table".to_string(), table_name: "test_table".to_string(),
table_id: 43, table_id: 43,
table: NumbersTable::table(43), table: Arc::new(NumbersTable::new(43)),
}) })
.await .await
.unwrap_err(); .unwrap_err();
@@ -122,7 +124,7 @@ mod tests {
rt.block_on(async { create_local_catalog_manager().await.unwrap() }); rt.block_on(async { create_local_catalog_manager().await.unwrap() });
let catalog_manager = Arc::new(catalog_manager); let catalog_manager = Arc::new(catalog_manager);
let succeed = Arc::new(Mutex::new(None)); let succeed: Arc<Mutex<Option<TableRef>>> = Arc::new(Mutex::new(None));
let mut handles = Vec::with_capacity(8); let mut handles = Vec::with_capacity(8);
for i in 0..8 { for i in 0..8 {
@@ -130,21 +132,20 @@ mod tests {
let succeed = succeed.clone(); let succeed = succeed.clone();
let handle = rt.spawn(async move { let handle = rt.spawn(async move {
let table_id = 42 + i; let table_id = 42 + i;
let table = NumbersTable::table(table_id); let table = Arc::new(NumbersTable::new(table_id));
let table_info = table.table_info();
let req = RegisterTableRequest { let req = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(), catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(), schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "test_table".to_string(), table_name: "test_table".to_string(),
table_id, table_id,
table, table: table.clone(),
}; };
match catalog.register_table(req).await { match catalog.register_table(req).await {
Ok(res) => { Ok(res) => {
if res { if res {
let mut succeed = succeed.lock().await; let mut succeed = succeed.lock().await;
info!("Successfully registered table: {}", table_id); info!("Successfully registered table: {}", table_id);
*succeed = Some(table_info); *succeed = Some(table);
} }
} }
Err(_) => { Err(_) => {
@@ -160,7 +161,7 @@ mod tests {
handle.await.unwrap(); handle.await.unwrap();
} }
let guard = succeed.lock().await; let guard = succeed.lock().await;
let table_info = guard.as_ref().unwrap(); let table = guard.as_ref().unwrap();
let table_registered = catalog_manager let table_registered = catalog_manager
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table") .table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table")
.await .await
@@ -168,7 +169,7 @@ mod tests {
.unwrap(); .unwrap();
assert_eq!( assert_eq!(
table_registered.table_info().ident.table_id, table_registered.table_info().ident.table_id,
table_info.ident.table_id table.table_info().ident.table_id
); );
}); });
} }

View File

@@ -21,21 +21,18 @@ mod tests {
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use catalog::remote::mock::MockTableEngine; use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
use catalog::remote::mock::{MockKvBackend, MockTableEngine};
use catalog::remote::region_alive_keeper::RegionAliveKeepers; use catalog::remote::region_alive_keeper::RegionAliveKeepers;
use catalog::remote::{CachedMetaKvBackend, RemoteCatalogManager}; use catalog::remote::{
use catalog::{CatalogManager, RegisterSchemaRequest, RegisterTableRequest}; CachedMetaKvBackend, KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider,
use common_catalog::consts::{ RemoteSchemaProvider,
DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, INFORMATION_SCHEMA_NAME, MITO_ENGINE,
}; };
use common_meta::helper::CatalogValue; use catalog::{CatalogManager, RegisterTableRequest};
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MITO_ENGINE};
use common_meta::ident::TableIdent; use common_meta::ident::TableIdent;
use common_meta::key::catalog_name::CatalogNameKey;
use common_meta::key::TableMetadataManager;
use common_meta::kv_backend::memory::MemoryKvBackend;
use common_meta::kv_backend::KvBackend;
use common_meta::rpc::store::{CompareAndPutRequest, PutRequest};
use datatypes::schema::RawSchema; use datatypes::schema::RawSchema;
use futures_util::StreamExt;
use table::engine::manager::{MemoryTableEngineManager, TableEngineManagerRef}; use table::engine::manager::{MemoryTableEngineManager, TableEngineManagerRef};
use table::engine::{EngineContext, TableEngineRef}; use table::engine::{EngineContext, TableEngineRef};
use table::requests::CreateTableRequest; use table::requests::CreateTableRequest;
@@ -43,6 +40,7 @@ mod tests {
use tokio::time::Instant; use tokio::time::Instant;
struct TestingComponents { struct TestingComponents {
kv_backend: KvBackendRef,
catalog_manager: Arc<RemoteCatalogManager>, catalog_manager: Arc<RemoteCatalogManager>,
table_engine_manager: TableEngineManagerRef, table_engine_manager: TableEngineManagerRef,
region_alive_keepers: Arc<RegionAliveKeepers>, region_alive_keepers: Arc<RegionAliveKeepers>,
@@ -55,59 +53,95 @@ mod tests {
} }
#[tokio::test] #[tokio::test]
async fn test_cached_backend() { async fn test_backend() {
let backend = CachedMetaKvBackend::wrap(Arc::new(MemoryKvBackend::default())); common_telemetry::init_default_ut_logging();
let backend = MockKvBackend::default();
let default_catalog_key = CatalogNameKey::new(DEFAULT_CATALOG_NAME).to_string(); let default_catalog_key = CatalogKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
let req = PutRequest::new() }
.with_key(default_catalog_key.as_bytes()) .to_string();
.with_value(CatalogValue.as_bytes().unwrap());
backend.put(req).await.unwrap();
let ret = backend.get(b"__catalog_name/greptime").await.unwrap();
let _ = ret.unwrap();
let req = CompareAndPutRequest::new()
.with_key(b"__catalog_name/greptime".to_vec())
.with_expect(CatalogValue.as_bytes().unwrap())
.with_value(b"123".to_vec());
let _ = backend.compare_and_put(req).await.unwrap();
let ret = backend.get(b"__catalog_name/greptime").await.unwrap();
assert_eq!(b"123", ret.as_ref().unwrap().value.as_slice());
let req = PutRequest::new()
.with_key(b"__catalog_name/greptime".to_vec())
.with_value(b"1234".to_vec());
let _ = backend.put(req).await;
let ret = backend.get(b"__catalog_name/greptime").await.unwrap();
assert_eq!(b"1234", ret.unwrap().value.as_slice());
backend backend
.delete(b"__catalog_name/greptime", false) .set(
default_catalog_key.as_bytes(),
&CatalogValue {}.as_bytes().unwrap(),
)
.await .await
.unwrap(); .unwrap();
let ret = backend.get(b"__catalog_name/greptime").await.unwrap(); let schema_key = SchemaKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
}
.to_string();
backend
.set(schema_key.as_bytes(), &SchemaValue {}.as_bytes().unwrap())
.await
.unwrap();
let mut iter = backend.range("__c-".as_bytes());
let mut res = HashSet::new();
while let Some(r) = iter.next().await {
let kv = r.unwrap();
res.insert(String::from_utf8_lossy(&kv.0).to_string());
}
assert_eq!(
vec!["__c-greptime".to_string()],
res.into_iter().collect::<Vec<_>>()
);
}
#[tokio::test]
async fn test_cached_backend() {
common_telemetry::init_default_ut_logging();
let backend = CachedMetaKvBackend::wrap(Arc::new(MockKvBackend::default()));
let default_catalog_key = CatalogKey {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
}
.to_string();
backend
.set(
default_catalog_key.as_bytes(),
&CatalogValue {}.as_bytes().unwrap(),
)
.await
.unwrap();
let ret = backend.get(b"__c-greptime").await.unwrap();
assert!(ret.is_some());
let _ = backend
.compare_and_set(
b"__c-greptime",
&CatalogValue {}.as_bytes().unwrap(),
b"123",
)
.await
.unwrap();
let ret = backend.get(b"__c-greptime").await.unwrap();
assert!(ret.is_some());
assert_eq!(&b"123"[..], &(ret.as_ref().unwrap().1));
let _ = backend.set(b"__c-greptime", b"1234").await;
let ret = backend.get(b"__c-greptime").await.unwrap();
assert!(ret.is_some());
assert_eq!(&b"1234"[..], &(ret.as_ref().unwrap().1));
backend.delete(b"__c-greptime").await.unwrap();
let ret = backend.get(b"__c-greptime").await.unwrap();
assert!(ret.is_none()); assert!(ret.is_none());
} }
async fn prepare_components(node_id: u64) -> TestingComponents { async fn prepare_components(node_id: u64) -> TestingComponents {
let backend = Arc::new(MemoryKvBackend::default()); let cached_backend = Arc::new(CachedMetaKvBackend::wrap(
Arc::new(MockKvBackend::default()),
let req = PutRequest::new() ));
.with_key(b"__catalog_name/greptime".to_vec())
.with_value(b"".to_vec());
backend.put(req).await.unwrap();
let req = PutRequest::new()
.with_key(b"__schema_name/greptime-public".to_vec())
.with_value(b"".to_vec());
backend.put(req).await.unwrap();
let cached_backend = Arc::new(CachedMetaKvBackend::wrap(backend));
let table_engine = Arc::new(MockTableEngine::default()); let table_engine = Arc::new(MockTableEngine::default());
let engine_manager = Arc::new(MemoryTableEngineManager::alias( let engine_manager = Arc::new(MemoryTableEngineManager::alias(
@@ -120,12 +154,13 @@ mod tests {
let catalog_manager = RemoteCatalogManager::new( let catalog_manager = RemoteCatalogManager::new(
engine_manager.clone(), engine_manager.clone(),
node_id, node_id,
cached_backend.clone(),
region_alive_keepers.clone(), region_alive_keepers.clone(),
Arc::new(TableMetadataManager::new(cached_backend)),
); );
catalog_manager.start().await.unwrap(); catalog_manager.start().await.unwrap();
TestingComponents { TestingComponents {
kv_backend: cached_backend,
catalog_manager: Arc::new(catalog_manager), catalog_manager: Arc::new(catalog_manager),
table_engine_manager: engine_manager, table_engine_manager: engine_manager,
region_alive_keepers, region_alive_keepers,
@@ -144,17 +179,14 @@ mod tests {
catalog_manager.catalog_names().await.unwrap() catalog_manager.catalog_names().await.unwrap()
); );
let mut schema_names = catalog_manager let default_catalog = catalog_manager
.schema_names(DEFAULT_CATALOG_NAME) .catalog(DEFAULT_CATALOG_NAME)
.await .await
.unwrap()
.unwrap(); .unwrap();
schema_names.sort_unstable();
assert_eq!( assert_eq!(
vec![ vec![DEFAULT_SCHEMA_NAME.to_string()],
INFORMATION_SCHEMA_NAME.to_string(), default_catalog.schema_names().await.unwrap()
DEFAULT_SCHEMA_NAME.to_string()
],
schema_names
); );
} }
@@ -210,20 +242,23 @@ mod tests {
async fn test_register_table() { async fn test_register_table() {
let node_id = 42; let node_id = 42;
let components = prepare_components(node_id).await; let components = prepare_components(node_id).await;
let mut schema_names = components let catalog_manager = &components.catalog_manager;
.catalog_manager let default_catalog = catalog_manager
.schema_names(DEFAULT_CATALOG_NAME) .catalog(DEFAULT_CATALOG_NAME)
.await .await
.unwrap()
.unwrap(); .unwrap();
schema_names.sort_unstable();
assert_eq!( assert_eq!(
vec![ vec![DEFAULT_SCHEMA_NAME.to_string()],
INFORMATION_SCHEMA_NAME.to_string(), default_catalog.schema_names().await.unwrap()
DEFAULT_SCHEMA_NAME.to_string(),
],
schema_names
); );
let default_schema = default_catalog
.schema(DEFAULT_SCHEMA_NAME)
.await
.unwrap()
.unwrap();
// register a new table with an nonexistent catalog // register a new table with an nonexistent catalog
let catalog_name = DEFAULT_CATALOG_NAME.to_string(); let catalog_name = DEFAULT_CATALOG_NAME.to_string();
let schema_name = DEFAULT_SCHEMA_NAME.to_string(); let schema_name = DEFAULT_SCHEMA_NAME.to_string();
@@ -258,18 +293,10 @@ mod tests {
table_id, table_id,
table, table,
}; };
assert!(components assert!(catalog_manager.register_table(reg_req).await.unwrap());
.catalog_manager
.register_table(reg_req)
.await
.unwrap());
assert_eq!( assert_eq!(
vec![table_name], vec![table_name],
components default_schema.table_names().await.unwrap()
.catalog_manager
.table_names(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
.await
.unwrap()
); );
} }
@@ -277,23 +304,29 @@ mod tests {
async fn test_register_catalog_schema_table() { async fn test_register_catalog_schema_table() {
let node_id = 42; let node_id = 42;
let components = prepare_components(node_id).await; let components = prepare_components(node_id).await;
let backend = &components.kv_backend;
let catalog_manager = components.catalog_manager.clone();
let engine_manager = components.table_engine_manager.clone();
let catalog_name = "test_catalog".to_string(); let catalog_name = "test_catalog".to_string();
let schema_name = "nonexistent_schema".to_string(); let schema_name = "nonexistent_schema".to_string();
let catalog = Arc::new(RemoteCatalogProvider::new(
catalog_name.clone(),
backend.clone(),
engine_manager.clone(),
node_id,
components.region_alive_keepers.clone(),
));
// register catalog to catalog manager // register catalog to catalog manager
assert!(components CatalogManager::register_catalog(&*catalog_manager, catalog_name.clone(), catalog)
.catalog_manager
.clone()
.register_catalog(catalog_name.clone())
.await .await
.is_ok()); .unwrap();
assert_eq!( assert_eq!(
HashSet::<String>::from_iter(vec![ HashSet::<String>::from_iter(
DEFAULT_CATALOG_NAME.to_string(), vec![DEFAULT_CATALOG_NAME.to_string(), catalog_name.clone()].into_iter()
catalog_name.clone() ),
]), HashSet::from_iter(catalog_manager.catalog_names().await.unwrap().into_iter())
HashSet::from_iter(components.catalog_manager.catalog_names().await.unwrap())
); );
let table_to_register = components let table_to_register = components
@@ -326,34 +359,38 @@ mod tests {
}; };
// this register will fail since schema does not exist yet // this register will fail since schema does not exist yet
assert_matches!( assert_matches!(
components catalog_manager
.catalog_manager
.register_table(reg_req.clone()) .register_table(reg_req.clone())
.await .await
.unwrap_err(), .unwrap_err(),
catalog::error::Error::SchemaNotFound { .. } catalog::error::Error::SchemaNotFound { .. }
); );
let register_schema_request = RegisterSchemaRequest { let new_catalog = catalog_manager
catalog: catalog_name.to_string(), .catalog(&catalog_name)
schema: schema_name.to_string(),
};
assert!(components
.catalog_manager
.register_schema(register_schema_request)
.await .await
.expect("Register schema should not fail")); .unwrap()
assert!(components .expect("catalog should exist since it's already registered");
.catalog_manager let schema = Arc::new(RemoteSchemaProvider::new(
.register_table(reg_req) catalog_name.clone(),
schema_name.clone(),
node_id,
engine_manager,
backend.clone(),
components.region_alive_keepers.clone(),
));
let prev = new_catalog
.register_schema(schema_name.clone(), schema.clone())
.await .await
.unwrap()); .expect("Register schema should not fail");
assert!(prev.is_none());
assert!(catalog_manager.register_table(reg_req).await.unwrap());
assert_eq!( assert_eq!(
HashSet::from([schema_name.clone(), INFORMATION_SCHEMA_NAME.to_string()]), HashSet::from([schema_name.clone()]),
components new_catalog
.catalog_manager .schema_names()
.schema_names(&catalog_name)
.await .await
.unwrap() .unwrap()
.into_iter() .into_iter()
@@ -396,7 +433,7 @@ mod tests {
assert!(catalog_manager.register_table(request).await.unwrap()); assert!(catalog_manager.register_table(request).await.unwrap());
let keeper = region_alive_keepers let keeper = region_alive_keepers
.find_keeper(table_before.table_id) .find_keeper(&table_before)
.await .await
.unwrap(); .unwrap();
let deadline = keeper.deadline(0).await.unwrap(); let deadline = keeper.deadline(0).await.unwrap();
@@ -435,7 +472,7 @@ mod tests {
assert!(catalog_manager.register_table(request).await.unwrap()); assert!(catalog_manager.register_table(request).await.unwrap());
let keeper = region_alive_keepers let keeper = region_alive_keepers
.find_keeper(table_after.table_id) .find_keeper(&table_after)
.await .await
.unwrap(); .unwrap();
let deadline = keeper.deadline(0).await.unwrap(); let deadline = keeper.deadline(0).await.unwrap();
@@ -443,7 +480,7 @@ mod tests {
assert!(deadline <= Instant::now() + Duration::from_secs(20)); assert!(deadline <= Instant::now() + Duration::from_secs(20));
let keeper = region_alive_keepers let keeper = region_alive_keepers
.find_keeper(table_before.table_id) .find_keeper(&table_before)
.await .await
.unwrap(); .unwrap();
let deadline = keeper.deadline(0).await.unwrap(); let deadline = keeper.deadline(0).await.unwrap();

View File

@@ -8,21 +8,21 @@ license.workspace = true
testing = [] testing = []
[dependencies] [dependencies]
api = { workspace = true } api = { path = "../api" }
arrow-flight.workspace = true arrow-flight.workspace = true
async-stream.workspace = true async-stream.workspace = true
common-base = { workspace = true } common-base = { path = "../common/base" }
common-catalog = { workspace = true } common-catalog = { path = "../common/catalog" }
common-error = { workspace = true } common-error = { path = "../common/error" }
common-grpc = { workspace = true } common-grpc = { path = "../common/grpc" }
common-meta = { workspace = true } common-grpc-expr = { path = "../common/grpc-expr" }
common-query = { workspace = true } common-query = { path = "../common/query" }
common-recordbatch = { workspace = true } common-recordbatch = { path = "../common/recordbatch" }
common-telemetry = { workspace = true } common-time = { path = "../common/time" }
common-time = { workspace = true } common-meta = { path = "../common/meta" }
common-telemetry = { path = "../common/telemetry" }
datafusion.workspace = true datafusion.workspace = true
datatypes = { workspace = true } datatypes = { path = "../datatypes" }
derive_builder.workspace = true
enum_dispatch = "0.3" enum_dispatch = "0.3"
futures-util.workspace = true futures-util.workspace = true
moka = { version = "0.9", features = ["future"] } moka = { version = "0.9", features = ["future"] }
@@ -35,13 +35,11 @@ tokio.workspace = true
tonic.workspace = true tonic.workspace = true
[dev-dependencies] [dev-dependencies]
common-grpc-expr = { workspace = true } datanode = { path = "../datanode" }
datanode = { workspace = true } substrait = { path = "../common/substrait" }
derive-new = "0.5"
prost.workspace = true
substrait = { workspace = true }
tracing = "0.1" tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] }
prost.workspace = true
[dev-dependencies.substrait_proto] [dev-dependencies.substrait_proto]
package = "substrait" package = "substrait"

View File

@@ -73,7 +73,7 @@ async fn run() {
let logical = mock_logical_plan(); let logical = mock_logical_plan();
event!(Level::INFO, "plan size: {:#?}", logical.len()); event!(Level::INFO, "plan size: {:#?}", logical.len());
let result = db.logical_plan(logical, None).await.unwrap(); let result = db.logical_plan(logical).await.unwrap();
event!(Level::INFO, "result: {:#?}", result); event!(Level::INFO, "result: {:#?}", result);
} }

View File

@@ -1,182 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::*;
use client::{Client, Database, DEFAULT_SCHEMA_NAME};
use derive_new::new;
use tracing::{error, info};
fn main() {
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
.unwrap();
run();
}
#[tokio::main]
async fn run() {
let greptimedb_endpoint =
std::env::var("GREPTIMEDB_ENDPOINT").unwrap_or_else(|_| "localhost:4001".to_owned());
let greptimedb_dbname =
std::env::var("GREPTIMEDB_DBNAME").unwrap_or_else(|_| DEFAULT_SCHEMA_NAME.to_owned());
let grpc_client = Client::with_urls(vec![&greptimedb_endpoint]);
let client = Database::new_with_dbname(greptimedb_dbname, grpc_client);
let stream_inserter = client.streaming_inserter().unwrap();
if let Err(e) = stream_inserter
.insert(vec![to_insert_request(weather_records_1())])
.await
{
error!("Error: {e}");
}
if let Err(e) = stream_inserter
.insert(vec![to_insert_request(weather_records_2())])
.await
{
error!("Error: {e}");
}
let result = stream_inserter.finish().await;
match result {
Ok(rows) => {
info!("Rows written: {rows}");
}
Err(e) => {
error!("Error: {e}");
}
};
}
#[derive(new)]
struct WeatherRecord {
timestamp_millis: i64,
collector: String,
temperature: f32,
humidity: i32,
}
fn weather_records_1() -> Vec<WeatherRecord> {
vec![
WeatherRecord::new(1686109527000, "c1".to_owned(), 26.4, 15),
WeatherRecord::new(1686023127000, "c1".to_owned(), 29.3, 20),
WeatherRecord::new(1685936727000, "c1".to_owned(), 31.8, 13),
WeatherRecord::new(1686109527000, "c2".to_owned(), 20.4, 67),
WeatherRecord::new(1686023127000, "c2".to_owned(), 18.0, 74),
WeatherRecord::new(1685936727000, "c2".to_owned(), 19.2, 81),
]
}
fn weather_records_2() -> Vec<WeatherRecord> {
vec![
WeatherRecord::new(1686109527001, "c3".to_owned(), 26.4, 15),
WeatherRecord::new(1686023127002, "c3".to_owned(), 29.3, 20),
WeatherRecord::new(1685936727003, "c3".to_owned(), 31.8, 13),
WeatherRecord::new(1686109527004, "c4".to_owned(), 20.4, 67),
WeatherRecord::new(1686023127005, "c4".to_owned(), 18.0, 74),
WeatherRecord::new(1685936727006, "c4".to_owned(), 19.2, 81),
]
}
/// This function generates some random data and bundle them into a
/// `InsertRequest`.
///
/// Data structure:
///
/// - `ts`: a timestamp column
/// - `collector`: a tag column
/// - `temperature`: a value field of f32
/// - `humidity`: a value field of i32
///
fn to_insert_request(records: Vec<WeatherRecord>) -> InsertRequest {
// convert records into columns
let rows = records.len();
// transpose records into columns
let (timestamp_millis, collectors, temp, humidity) = records.into_iter().fold(
(
Vec::with_capacity(rows),
Vec::with_capacity(rows),
Vec::with_capacity(rows),
Vec::with_capacity(rows),
),
|mut acc, rec| {
acc.0.push(rec.timestamp_millis);
acc.1.push(rec.collector);
acc.2.push(rec.temperature);
acc.3.push(rec.humidity);
acc
},
);
let columns = vec![
// timestamp column: `ts`
Column {
column_name: "ts".to_owned(),
values: Some(column::Values {
ts_millisecond_values: timestamp_millis,
..Default::default()
}),
semantic_type: SemanticType::Timestamp as i32,
datatype: ColumnDataType::TimestampMillisecond as i32,
..Default::default()
},
// tag column: collectors
Column {
column_name: "collector".to_owned(),
values: Some(column::Values {
string_values: collectors.into_iter().collect(),
..Default::default()
}),
semantic_type: SemanticType::Tag as i32,
datatype: ColumnDataType::String as i32,
..Default::default()
},
// field column: temperature
Column {
column_name: "temperature".to_owned(),
values: Some(column::Values {
f32_values: temp,
..Default::default()
}),
semantic_type: SemanticType::Field as i32,
datatype: ColumnDataType::Float32 as i32,
..Default::default()
},
// field column: humidity
Column {
column_name: "humidity".to_owned(),
values: Some(column::Values {
i32_values: humidity,
..Default::default()
}),
semantic_type: SemanticType::Field as i32,
datatype: ColumnDataType::Int32 as i32,
..Default::default()
},
];
InsertRequest {
table_name: "weather_demo".to_owned(),
columns,
row_count: rows as u32,
..Default::default()
}
}

View File

@@ -17,7 +17,6 @@ use std::sync::Arc;
use api::v1::greptime_database_client::GreptimeDatabaseClient; use api::v1::greptime_database_client::GreptimeDatabaseClient;
use api::v1::health_check_client::HealthCheckClient; use api::v1::health_check_client::HealthCheckClient;
use api::v1::prometheus_gateway_client::PrometheusGatewayClient; use api::v1::prometheus_gateway_client::PrometheusGatewayClient;
use api::v1::region::region_client::RegionClient as PbRegionClient;
use api::v1::HealthCheckRequest; use api::v1::HealthCheckRequest;
use arrow_flight::flight_service_client::FlightServiceClient; use arrow_flight::flight_service_client::FlightServiceClient;
use common_grpc::channel_manager::ChannelManager; use common_grpc::channel_manager::ChannelManager;
@@ -83,6 +82,11 @@ impl Client {
Default::default() Default::default()
} }
pub fn with_manager(channel_manager: ChannelManager) -> Self {
let inner = Arc::new(Inner::with_manager(channel_manager));
Self { inner }
}
pub fn with_urls<U, A>(urls: A) -> Self pub fn with_urls<U, A>(urls: A) -> Self
where where
U: AsRef<str>, U: AsRef<str>,
@@ -153,11 +157,6 @@ impl Client {
}) })
} }
pub(crate) fn raw_region_client(&self) -> Result<PbRegionClient<Channel>> {
let (_, channel) = self.find_channel()?;
Ok(PbRegionClient::new(channel))
}
pub fn make_prometheus_gateway_client(&self) -> Result<PrometheusGatewayClient<Channel>> { pub fn make_prometheus_gateway_client(&self) -> Result<PrometheusGatewayClient<Channel>> {
let (_, channel) = self.find_channel()?; let (_, channel) = self.find_channel()?;
Ok(PrometheusGatewayClient::new(channel)) Ok(PrometheusGatewayClient::new(channel))
@@ -166,7 +165,7 @@ impl Client {
pub async fn health_check(&self) -> Result<()> { pub async fn health_check(&self) -> Result<()> {
let (_, channel) = self.find_channel()?; let (_, channel) = self.find_channel()?;
let mut client = HealthCheckClient::new(channel); let mut client = HealthCheckClient::new(channel);
let _ = client.health_check(HealthCheckRequest {}).await?; client.health_check(HealthCheckRequest {}).await?;
Ok(()) Ok(())
} }
} }

View File

@@ -13,10 +13,12 @@
// limitations under the License. // limitations under the License.
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Formatter};
use std::sync::{Arc, Mutex};
use std::time::Duration; use std::time::Duration;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager}; use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::peer::Peer; use common_meta::peer::Peer;
use common_telemetry::info;
use moka::future::{Cache, CacheBuilder}; use moka::future::{Cache, CacheBuilder};
use crate::Client; use crate::Client;
@@ -24,11 +26,21 @@ use crate::Client;
pub struct DatanodeClients { pub struct DatanodeClients {
channel_manager: ChannelManager, channel_manager: ChannelManager,
clients: Cache<Peer, Client>, clients: Cache<Peer, Client>,
started: Arc<Mutex<bool>>,
} }
impl Default for DatanodeClients { impl Default for DatanodeClients {
fn default() -> Self { fn default() -> Self {
Self::new(ChannelConfig::new()) let config = ChannelConfig::new().timeout(Duration::from_secs(8));
Self {
channel_manager: ChannelManager::with_config(config),
clients: CacheBuilder::new(1024)
.time_to_live(Duration::from_secs(30 * 60))
.time_to_idle(Duration::from_secs(5 * 60))
.build(),
started: Arc::new(Mutex::new(false)),
}
} }
} }
@@ -41,14 +53,16 @@ impl Debug for DatanodeClients {
} }
impl DatanodeClients { impl DatanodeClients {
pub fn new(config: ChannelConfig) -> Self { pub fn start(&self) {
Self { let mut started = self.started.lock().unwrap();
channel_manager: ChannelManager::with_config(config), if *started {
clients: CacheBuilder::new(1024) return;
.time_to_live(Duration::from_secs(30 * 60))
.time_to_idle(Duration::from_secs(5 * 60))
.build(),
} }
self.channel_manager.start_channel_recycle();
info!("Datanode clients manager is started!");
*started = true;
} }
pub async fn get_client(&self, datanode: &Peer) -> Client { pub async fn get_client(&self, datanode: &Peer) -> Client {

View File

@@ -17,24 +17,26 @@ use api::v1::ddl_request::Expr as DdlExpr;
use api::v1::greptime_request::Request; use api::v1::greptime_request::Request;
use api::v1::query_request::Query; use api::v1::query_request::Query;
use api::v1::{ use api::v1::{
AlterExpr, AuthHeader, CompactTableExpr, CreateTableExpr, DdlRequest, DeleteRequests, greptime_response, AffectedRows, AlterExpr, AuthHeader, CreateTableExpr, DdlRequest,
DropTableExpr, FlushTableExpr, GreptimeRequest, InsertRequests, PromRangeQuery, QueryRequest, DeleteRequest, DropTableExpr, FlushTableExpr, GreptimeRequest, InsertRequests, PromRangeQuery,
RequestHeader, RowInsertRequests, TruncateTableExpr, QueryRequest, RequestHeader,
}; };
use arrow_flight::Ticket; use arrow_flight::{FlightData, Ticket};
use async_stream::stream; use common_error::prelude::*;
use common_error::ext::{BoxedError, ErrorExt}; use common_grpc::flight::{flight_messages_to_recordbatches, FlightDecoder, FlightMessage};
use common_grpc::flight::{FlightDecoder, FlightMessage};
use common_query::Output; use common_query::Output;
use common_recordbatch::error::ExternalSnafu;
use common_recordbatch::RecordBatchStreamAdaptor;
use common_telemetry::{logging, timer}; use common_telemetry::{logging, timer};
use futures_util::StreamExt; use futures_util::{TryFutureExt, TryStreamExt};
use prost::Message; use prost::Message;
use snafu::{ensure, ResultExt}; use snafu::{ensure, ResultExt};
use tokio::sync::mpsc::Sender;
use tokio::sync::{mpsc, OnceCell};
use tokio_stream::wrappers::ReceiverStream;
use crate::error::{ConvertFlightDataSnafu, Error, IllegalFlightMessagesSnafu, ServerSnafu}; use crate::error::{
use crate::{error, from_grpc_response, metrics, Client, Result, StreamInserter}; ConvertFlightDataSnafu, IllegalDatabaseResponseSnafu, IllegalFlightMessagesSnafu,
};
use crate::{error, metrics, Client, Result};
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default)]
pub struct Database { pub struct Database {
@@ -48,6 +50,7 @@ pub struct Database {
dbname: String, dbname: String,
client: Client, client: Client,
streaming_client: OnceCell<Sender<GreptimeRequest>>,
ctx: FlightContext, ctx: FlightContext,
} }
@@ -59,6 +62,7 @@ impl Database {
schema: schema.into(), schema: schema.into(),
dbname: "".to_string(), dbname: "".to_string(),
client, client,
streaming_client: OnceCell::new(),
ctx: FlightContext::default(), ctx: FlightContext::default(),
} }
} }
@@ -76,6 +80,7 @@ impl Database {
schema: "".to_string(), schema: "".to_string(),
dbname: dbname.into(), dbname: dbname.into(),
client, client,
streaming_client: OnceCell::new(),
ctx: FlightContext::default(), ctx: FlightContext::default(),
} }
} }
@@ -115,81 +120,76 @@ impl Database {
self.handle(Request::Inserts(requests)).await self.handle(Request::Inserts(requests)).await
} }
pub async fn row_insert(&self, requests: RowInsertRequests) -> Result<u32> { pub async fn insert_to_stream(&self, requests: InsertRequests) -> Result<()> {
let _timer = timer!(metrics::METRIC_GRPC_INSERT); let streaming_client = self
self.handle(Request::RowInserts(requests)).await .streaming_client
.get_or_try_init(|| self.client_stream())
.await?;
let request = self.to_rpc_request(Request::Inserts(requests));
streaming_client.send(request).await.map_err(|e| {
error::ClientStreamingSnafu {
err_msg: e.to_string(),
}
.build()
})
} }
pub fn streaming_inserter(&self) -> Result<StreamInserter> { pub async fn delete(&self, request: DeleteRequest) -> Result<u32> {
self.streaming_inserter_with_channel_size(65536)
}
pub fn streaming_inserter_with_channel_size(
&self,
channel_size: usize,
) -> Result<StreamInserter> {
let client = self.client.make_database_client()?.inner;
let stream_inserter = StreamInserter::new(
client,
self.dbname().to_string(),
self.ctx.auth_header.clone(),
channel_size,
);
Ok(stream_inserter)
}
pub async fn delete(&self, request: DeleteRequests) -> Result<u32> {
let _timer = timer!(metrics::METRIC_GRPC_DELETE); let _timer = timer!(metrics::METRIC_GRPC_DELETE);
self.handle(Request::Deletes(request)).await self.handle(Request::Delete(request)).await
} }
async fn handle(&self, request: Request) -> Result<u32> { async fn handle(&self, request: Request) -> Result<u32> {
let mut client = self.client.make_database_client()?.inner; let mut client = self.client.make_database_client()?.inner;
let request = self.to_rpc_request(request, None); let request = self.to_rpc_request(request);
let response = client.handle(request).await?.into_inner(); let response = client
from_grpc_response(response) .handle(request)
.await?
.into_inner()
.response
.context(IllegalDatabaseResponseSnafu {
err_msg: "GreptimeResponse is empty",
})?;
let greptime_response::Response::AffectedRows(AffectedRows { value }) = response;
Ok(value)
} }
#[inline] #[inline]
fn to_rpc_request(&self, request: Request, trace_id: Option<u64>) -> GreptimeRequest { fn to_rpc_request(&self, request: Request) -> GreptimeRequest {
GreptimeRequest { GreptimeRequest {
header: Some(RequestHeader { header: Some(RequestHeader {
catalog: self.catalog.clone(), catalog: self.catalog.clone(),
schema: self.schema.clone(), schema: self.schema.clone(),
authorization: self.ctx.auth_header.clone(), authorization: self.ctx.auth_header.clone(),
dbname: self.dbname.clone(), dbname: self.dbname.clone(),
trace_id,
span_id: None,
}), }),
request: Some(request), request: Some(request),
} }
} }
async fn client_stream(&self) -> Result<Sender<GreptimeRequest>> {
let mut client = self.client.make_database_client()?.inner;
let (sender, receiver) = mpsc::channel::<GreptimeRequest>(65536);
let receiver = ReceiverStream::new(receiver);
client.handle_requests(receiver).await?;
Ok(sender)
}
pub async fn sql(&self, sql: &str) -> Result<Output> { pub async fn sql(&self, sql: &str) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_SQL); let _timer = timer!(metrics::METRIC_GRPC_SQL);
self.do_get( self.do_get(Request::Query(QueryRequest {
Request::Query(QueryRequest { query: Some(Query::Sql(sql.to_string())),
query: Some(Query::Sql(sql.to_string())), }))
}),
None,
)
.await .await
} }
pub async fn logical_plan( pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<Output> {
&self,
logical_plan: Vec<u8>,
trace_id: Option<u64>,
) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_LOGICAL_PLAN); let _timer = timer!(metrics::METRIC_GRPC_LOGICAL_PLAN);
self.do_get( self.do_get(Request::Query(QueryRequest {
Request::Query(QueryRequest { query: Some(Query::LogicalPlan(logical_plan)),
query: Some(Query::LogicalPlan(logical_plan)), }))
}),
trace_id,
)
.await .await
} }
@@ -201,171 +201,108 @@ impl Database {
step: &str, step: &str,
) -> Result<Output> { ) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_PROMQL_RANGE_QUERY); let _timer = timer!(metrics::METRIC_GRPC_PROMQL_RANGE_QUERY);
self.do_get( self.do_get(Request::Query(QueryRequest {
Request::Query(QueryRequest { query: Some(Query::PromRangeQuery(PromRangeQuery {
query: Some(Query::PromRangeQuery(PromRangeQuery { query: promql.to_string(),
query: promql.to_string(), start: start.to_string(),
start: start.to_string(), end: end.to_string(),
end: end.to_string(), step: step.to_string(),
step: step.to_string(), })),
})), }))
}),
None,
)
.await .await
} }
pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> { pub async fn create(&self, expr: CreateTableExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_CREATE_TABLE); let _timer = timer!(metrics::METRIC_GRPC_CREATE_TABLE);
self.do_get( self.do_get(Request::Ddl(DdlRequest {
Request::Ddl(DdlRequest { expr: Some(DdlExpr::CreateTable(expr)),
expr: Some(DdlExpr::CreateTable(expr)), }))
}),
None,
)
.await .await
} }
pub async fn alter(&self, expr: AlterExpr) -> Result<Output> { pub async fn alter(&self, expr: AlterExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_ALTER); let _timer = timer!(metrics::METRIC_GRPC_ALTER);
self.do_get( self.do_get(Request::Ddl(DdlRequest {
Request::Ddl(DdlRequest { expr: Some(DdlExpr::Alter(expr)),
expr: Some(DdlExpr::Alter(expr)), }))
}),
None,
)
.await .await
} }
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<Output> { pub async fn drop_table(&self, expr: DropTableExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_DROP_TABLE); let _timer = timer!(metrics::METRIC_GRPC_DROP_TABLE);
self.do_get( self.do_get(Request::Ddl(DdlRequest {
Request::Ddl(DdlRequest { expr: Some(DdlExpr::DropTable(expr)),
expr: Some(DdlExpr::DropTable(expr)), }))
}),
None,
)
.await .await
} }
pub async fn flush_table(&self, expr: FlushTableExpr) -> Result<Output> { pub async fn flush_table(&self, expr: FlushTableExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_FLUSH_TABLE); let _timer = timer!(metrics::METRIC_GRPC_FLUSH_TABLE);
self.do_get( self.do_get(Request::Ddl(DdlRequest {
Request::Ddl(DdlRequest { expr: Some(DdlExpr::FlushTable(expr)),
expr: Some(DdlExpr::FlushTable(expr)), }))
}),
None,
)
.await .await
} }
pub async fn compact_table(&self, expr: CompactTableExpr) -> Result<Output> { async fn do_get(&self, request: Request) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_COMPACT_TABLE);
self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::CompactTable(expr)),
}),
None,
)
.await
}
pub async fn truncate_table(&self, expr: TruncateTableExpr) -> Result<Output> {
let _timer = timer!(metrics::METRIC_GRPC_TRUNCATE_TABLE);
self.do_get(
Request::Ddl(DdlRequest {
expr: Some(DdlExpr::TruncateTable(expr)),
}),
None,
)
.await
}
async fn do_get(&self, request: Request, trace_id: Option<u64>) -> Result<Output> {
// FIXME(paomian): should be added some labels for metrics // FIXME(paomian): should be added some labels for metrics
let _timer = timer!(metrics::METRIC_GRPC_DO_GET); let _timer = timer!(metrics::METRIC_GRPC_DO_GET);
let request = self.to_rpc_request(request, trace_id); let request = self.to_rpc_request(request);
let request = Ticket { let request = Ticket {
ticket: request.encode_to_vec().into(), ticket: request.encode_to_vec().into(),
}; };
let mut client = self.client.make_flight_client()?; let mut client = self.client.make_flight_client()?;
let response = client.mut_inner().do_get(request).await.map_err(|e| { let flight_data: Vec<FlightData> = client
let tonic_code = e.code(); .mut_inner()
let e: error::Error = e.into(); .do_get(request)
let code = e.status_code(); .and_then(|response| response.into_inner().try_collect())
let msg = e.to_string(); .await
let error = Error::FlightGet { .map_err(|e| {
tonic_code, let tonic_code = e.code();
addr: client.addr().to_string(), let e: error::Error = e.into();
source: BoxedError::new(ServerSnafu { code, msg }.build()), let code = e.status_code();
}; let msg = e.to_string();
logging::error!( error::ServerSnafu { code, msg }
"Failed to do Flight get, addr: {}, code: {}, source: {}", .fail::<()>()
client.addr(), .map_err(BoxedError::new)
tonic_code, .context(error::FlightGetSnafu {
error tonic_code,
addr: client.addr(),
})
.map_err(|error| {
logging::error!(
"Failed to do Flight get, addr: {}, code: {}, source: {}",
client.addr(),
tonic_code,
error
);
error
})
.unwrap_err()
})?;
let decoder = &mut FlightDecoder::default();
let flight_messages = flight_data
.into_iter()
.map(|x| decoder.try_decode(x).context(ConvertFlightDataSnafu))
.collect::<Result<Vec<_>>>()?;
let output = if let Some(FlightMessage::AffectedRows(rows)) = flight_messages.get(0) {
ensure!(
flight_messages.len() == 1,
IllegalFlightMessagesSnafu {
reason: "Expect 'AffectedRows' Flight messages to be one and only!"
}
); );
error Output::AffectedRows(*rows)
})?; } else {
let recordbatches = flight_messages_to_recordbatches(flight_messages)
let flight_data_stream = response.into_inner(); .context(ConvertFlightDataSnafu)?;
let mut decoder = FlightDecoder::default(); Output::RecordBatches(recordbatches)
let mut flight_message_stream = flight_data_stream.map(move |flight_data| {
flight_data
.map_err(Error::from)
.and_then(|data| decoder.try_decode(data).context(ConvertFlightDataSnafu))
});
let Some(first_flight_message) = flight_message_stream.next().await else {
return IllegalFlightMessagesSnafu {
reason: "Expect the response not to be empty",
}
.fail();
}; };
Ok(output)
let first_flight_message = first_flight_message?;
match first_flight_message {
FlightMessage::AffectedRows(rows) => {
ensure!(
flight_message_stream.next().await.is_none(),
IllegalFlightMessagesSnafu {
reason: "Expect 'AffectedRows' Flight messages to be the one and the only!"
}
);
Ok(Output::AffectedRows(rows))
}
FlightMessage::Recordbatch(_) => IllegalFlightMessagesSnafu {
reason: "The first flight message cannot be a RecordBatch message",
}
.fail(),
FlightMessage::Schema(schema) => {
let stream = Box::pin(stream!({
while let Some(flight_message) = flight_message_stream.next().await {
let flight_message = flight_message
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let FlightMessage::Recordbatch(record_batch) = flight_message else {
yield IllegalFlightMessagesSnafu {reason: "A Schema message must be succeeded exclusively by a set of RecordBatch messages"}
.fail()
.map_err(BoxedError::new)
.context(ExternalSnafu);
break;
};
yield Ok(record_batch);
}
}));
let record_batch_stream = RecordBatchStreamAdaptor {
schema,
stream,
output_ordering: None,
};
Ok(Output::Stream(Box::pin(record_batch_stream)))
}
}
} }
} }
@@ -376,11 +313,106 @@ pub struct FlightContext {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper;
use api::v1::auth_header::AuthScheme; use api::v1::auth_header::AuthScheme;
use api::v1::{AuthHeader, Basic}; use api::v1::{AuthHeader, Basic, Column};
use common_grpc::select::{null_mask, values};
use common_grpc_expr::column_to_vector;
use datatypes::prelude::{Vector, VectorRef};
use datatypes::vectors::{
BinaryVector, BooleanVector, DateTimeVector, DateVector, Float32Vector, Float64Vector,
Int16Vector, Int32Vector, Int64Vector, Int8Vector, StringVector, UInt16Vector,
UInt32Vector, UInt64Vector, UInt8Vector,
};
use crate::database::FlightContext; use crate::database::FlightContext;
#[test]
fn test_column_to_vector() {
let mut column = create_test_column(Arc::new(BooleanVector::from(vec![true])));
column.datatype = -100;
let result = column_to_vector(&column, 1);
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Column datatype error, source: Unknown proto column datatype: -100"
);
macro_rules! test_with_vector {
($vector: expr) => {
let vector = Arc::new($vector);
let column = create_test_column(vector.clone());
let result = column_to_vector(&column, vector.len() as u32).unwrap();
assert_eq!(result, vector as VectorRef);
};
}
test_with_vector!(BooleanVector::from(vec![Some(true), None, Some(false)]));
test_with_vector!(Int8Vector::from(vec![Some(i8::MIN), None, Some(i8::MAX)]));
test_with_vector!(Int16Vector::from(vec![
Some(i16::MIN),
None,
Some(i16::MAX)
]));
test_with_vector!(Int32Vector::from(vec![
Some(i32::MIN),
None,
Some(i32::MAX)
]));
test_with_vector!(Int64Vector::from(vec![
Some(i64::MIN),
None,
Some(i64::MAX)
]));
test_with_vector!(UInt8Vector::from(vec![Some(u8::MIN), None, Some(u8::MAX)]));
test_with_vector!(UInt16Vector::from(vec![
Some(u16::MIN),
None,
Some(u16::MAX)
]));
test_with_vector!(UInt32Vector::from(vec![
Some(u32::MIN),
None,
Some(u32::MAX)
]));
test_with_vector!(UInt64Vector::from(vec![
Some(u64::MIN),
None,
Some(u64::MAX)
]));
test_with_vector!(Float32Vector::from(vec![
Some(f32::MIN),
None,
Some(f32::MAX)
]));
test_with_vector!(Float64Vector::from(vec![
Some(f64::MIN),
None,
Some(f64::MAX)
]));
test_with_vector!(BinaryVector::from(vec![
Some(b"".to_vec()),
None,
Some(b"hello".to_vec())
]));
test_with_vector!(StringVector::from(vec![Some(""), None, Some("foo"),]));
test_with_vector!(DateVector::from(vec![Some(1), None, Some(3)]));
test_with_vector!(DateTimeVector::from(vec![Some(4), None, Some(6)]));
}
fn create_test_column(vector: VectorRef) -> Column {
let wrapper: ColumnDataTypeWrapper = vector.data_type().try_into().unwrap();
Column {
column_name: "test".to_string(),
semantic_type: 1,
values: Some(values(&[vector.clone()]).unwrap()),
null_mask: null_mask(&[vector.clone()], vector.len()),
datatype: wrapper.datatype() as i32,
}
}
#[test] #[test]
fn test_flight_ctx() { fn test_flight_ctx() {
let mut ctx = FlightContext::default(); let mut ctx = FlightContext::default();

View File

@@ -13,11 +13,10 @@
// limitations under the License. // limitations under the License.
use std::any::Any; use std::any::Any;
use std::str::FromStr;
use common_error::ext::{BoxedError, ErrorExt}; use common_error::prelude::*;
use common_error::status_code::StatusCode; use snafu::Location;
use common_error::{GREPTIME_ERROR_CODE, GREPTIME_ERROR_MSG};
use snafu::{Location, Snafu};
use tonic::{Code, Status}; use tonic::{Code, Status};
#[derive(Debug, Snafu)] #[derive(Debug, Snafu)]
@@ -106,18 +105,11 @@ impl From<Status> for Error {
.and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok()) .and_then(|v| String::from_utf8(v.as_bytes().to_vec()).ok())
} }
let code = get_metadata_value(&e, GREPTIME_ERROR_CODE) let code = get_metadata_value(&e, INNER_ERROR_CODE)
.and_then(|s| { .and_then(|s| StatusCode::from_str(&s).ok())
if let Ok(code) = s.parse::<u32>() {
StatusCode::from_u32(code)
} else {
None
}
})
.unwrap_or(StatusCode::Unknown); .unwrap_or(StatusCode::Unknown);
let msg = let msg = get_metadata_value(&e, INNER_ERROR_MSG).unwrap_or(e.to_string());
get_metadata_value(&e, GREPTIME_ERROR_MSG).unwrap_or_else(|| e.message().to_string());
Self::Server { code, msg } Self::Server { code, msg }
} }

View File

@@ -15,49 +15,13 @@
mod client; mod client;
pub mod client_manager; pub mod client_manager;
mod database; mod database;
pub mod error; mod error;
pub mod load_balance; pub mod load_balance;
mod metrics; mod metrics;
pub mod region;
mod stream_insert;
pub use api; pub use api;
use api::v1::greptime_response::Response;
use api::v1::{AffectedRows, GreptimeResponse};
pub use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; pub use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::status_code::StatusCode;
use snafu::OptionExt;
pub use self::client::Client; pub use self::client::Client;
pub use self::database::Database; pub use self::database::Database;
pub use self::error::{Error, Result}; pub use self::error::{Error, Result};
pub use self::stream_insert::StreamInserter;
use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};
pub fn from_grpc_response(response: GreptimeResponse) -> Result<u32> {
let header = response.header.context(IllegalDatabaseResponseSnafu {
err_msg: "missing header",
})?;
let status = header.status.context(IllegalDatabaseResponseSnafu {
err_msg: "missing status",
})?;
if StatusCode::is_success(status.status_code) {
let res = response.response.context(IllegalDatabaseResponseSnafu {
err_msg: "missing response",
})?;
match res {
Response::AffectedRows(AffectedRows { value }) => Ok(value),
}
} else {
let status_code =
StatusCode::from_u32(status.status_code).context(IllegalDatabaseResponseSnafu {
err_msg: format!("invalid status: {:?}", status),
})?;
ServerSnafu {
code: status_code,
msg: status.err_msg,
}
.fail()
}
}

View File

@@ -60,7 +60,7 @@ mod tests {
let random = Random; let random = Random;
for _ in 0..100 { for _ in 0..100 {
let peer = random.get_peer(&peers).unwrap(); let peer = random.get_peer(&peers).unwrap();
assert!(all.contains(peer)); all.contains(peer);
} }
} }
} }

View File

@@ -22,7 +22,4 @@ pub const METRIC_GRPC_LOGICAL_PLAN: &str = "grpc.logical_plan";
pub const METRIC_GRPC_ALTER: &str = "grpc.alter"; pub const METRIC_GRPC_ALTER: &str = "grpc.alter";
pub const METRIC_GRPC_DROP_TABLE: &str = "grpc.drop_table"; pub const METRIC_GRPC_DROP_TABLE: &str = "grpc.drop_table";
pub const METRIC_GRPC_FLUSH_TABLE: &str = "grpc.flush_table"; pub const METRIC_GRPC_FLUSH_TABLE: &str = "grpc.flush_table";
pub const METRIC_GRPC_COMPACT_TABLE: &str = "grpc.compact_table";
pub const METRIC_GRPC_TRUNCATE_TABLE: &str = "grpc.truncate_table";
pub const METRIC_GRPC_DO_GET: &str = "grpc.do_get"; pub const METRIC_GRPC_DO_GET: &str = "grpc.do_get";
pub(crate) const METRIC_REGION_REQUEST_GRPC: &str = "grpc.region_request";

View File

@@ -1,146 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::region::{region_request, RegionRequest, RegionRequestHeader, RegionResponse};
use api::v1::ResponseHeader;
use common_error::status_code::StatusCode;
use common_telemetry::timer;
use snafu::OptionExt;
use crate::error::{IllegalDatabaseResponseSnafu, Result, ServerSnafu};
use crate::{metrics, Client};
type AffectedRows = u64;
#[derive(Debug)]
pub struct RegionRequester {
trace_id: Option<u64>,
span_id: Option<u64>,
client: Client,
}
impl RegionRequester {
pub fn new(client: Client) -> Self {
// TODO(LFC): Pass in trace_id and span_id from some context when we have it.
Self {
trace_id: None,
span_id: None,
client,
}
}
pub async fn handle(self, request: region_request::Body) -> Result<AffectedRows> {
let request_type = request.as_ref().to_string();
let request = RegionRequest {
header: Some(RegionRequestHeader {
trace_id: self.trace_id,
span_id: self.span_id,
}),
body: Some(request),
};
let _timer = timer!(
metrics::METRIC_REGION_REQUEST_GRPC,
&[("request_type", request_type)]
);
let mut client = self.client.raw_region_client()?;
let RegionResponse {
header,
affected_rows,
} = client.handle(request).await?.into_inner();
check_response_header(header)?;
Ok(affected_rows)
}
}
fn check_response_header(header: Option<ResponseHeader>) -> Result<()> {
let status = header
.and_then(|header| header.status)
.context(IllegalDatabaseResponseSnafu {
err_msg: "either response header or status is missing",
})?;
if StatusCode::is_success(status.status_code) {
Ok(())
} else {
let code =
StatusCode::from_u32(status.status_code).context(IllegalDatabaseResponseSnafu {
err_msg: format!("unknown server status: {:?}", status),
})?;
ServerSnafu {
code,
msg: status.err_msg,
}
.fail()
}
}
#[cfg(test)]
mod test {
use api::v1::Status as PbStatus;
use super::*;
use crate::Error::{IllegalDatabaseResponse, Server};
#[test]
fn test_check_response_header() {
let result = check_response_header(None);
assert!(matches!(
result.unwrap_err(),
IllegalDatabaseResponse { .. }
));
let result = check_response_header(Some(ResponseHeader { status: None }));
assert!(matches!(
result.unwrap_err(),
IllegalDatabaseResponse { .. }
));
let result = check_response_header(Some(ResponseHeader {
status: Some(PbStatus {
status_code: StatusCode::Success as u32,
err_msg: "".to_string(),
}),
}));
assert!(result.is_ok());
let result = check_response_header(Some(ResponseHeader {
status: Some(PbStatus {
status_code: u32::MAX,
err_msg: "".to_string(),
}),
}));
assert!(matches!(
result.unwrap_err(),
IllegalDatabaseResponse { .. }
));
let result = check_response_header(Some(ResponseHeader {
status: Some(PbStatus {
status_code: StatusCode::Internal as u32,
err_msg: "blabla".to_string(),
}),
}));
let Server { code, msg } = result.unwrap_err() else {
unreachable!()
};
assert_eq!(code, StatusCode::Internal);
assert_eq!(msg, "blabla");
}
}

View File

@@ -1,118 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::greptime_database_client::GreptimeDatabaseClient;
use api::v1::greptime_request::Request;
use api::v1::{
AuthHeader, GreptimeRequest, GreptimeResponse, InsertRequest, InsertRequests, RequestHeader,
RowInsertRequest, RowInsertRequests,
};
use tokio::sync::mpsc;
use tokio::task::JoinHandle;
use tokio_stream::wrappers::ReceiverStream;
use tonic::transport::Channel;
use tonic::{Response, Status};
use crate::error::{self, Result};
use crate::from_grpc_response;
/// A structure that provides some methods for streaming data insert.
///
/// [`StreamInserter`] cannot be constructed via the `StreamInserter::new` method.
/// You can use the following way to obtain [`StreamInserter`].
///
/// ```ignore
/// let grpc_client = Client::with_urls(vec!["127.0.0.1:4002"]);
/// let client = Database::new_with_dbname("db_name", grpc_client);
/// let stream_inserter = client.streaming_inserter().unwrap();
/// ```
///
/// If you want to see a concrete usage example, please see
/// [stream_inserter.rs](https://github.com/GreptimeTeam/greptimedb/blob/develop/src/client/examples/stream_ingest.rs).
pub struct StreamInserter {
sender: mpsc::Sender<GreptimeRequest>,
auth_header: Option<AuthHeader>,
dbname: String,
join: JoinHandle<std::result::Result<Response<GreptimeResponse>, Status>>,
}
impl StreamInserter {
pub(crate) fn new(
mut client: GreptimeDatabaseClient<Channel>,
dbname: String,
auth_header: Option<AuthHeader>,
channel_size: usize,
) -> StreamInserter {
let (send, recv) = tokio::sync::mpsc::channel(channel_size);
let join: JoinHandle<std::result::Result<Response<GreptimeResponse>, Status>> =
tokio::spawn(async move {
let recv_stream = ReceiverStream::new(recv);
client.handle_requests(recv_stream).await
});
StreamInserter {
sender: send,
auth_header,
dbname,
join,
}
}
pub async fn insert(&self, requests: Vec<InsertRequest>) -> Result<()> {
let inserts = InsertRequests { inserts: requests };
let request = self.to_rpc_request(Request::Inserts(inserts));
self.sender.send(request).await.map_err(|e| {
error::ClientStreamingSnafu {
err_msg: e.to_string(),
}
.build()
})
}
pub async fn row_insert(&self, requests: Vec<RowInsertRequest>) -> Result<()> {
let inserts = RowInsertRequests { inserts: requests };
let request = self.to_rpc_request(Request::RowInserts(inserts));
self.sender.send(request).await.map_err(|e| {
error::ClientStreamingSnafu {
err_msg: e.to_string(),
}
.build()
})
}
pub async fn finish(self) -> Result<u32> {
drop(self.sender);
let response = self.join.await.unwrap()?;
let response = response.into_inner();
from_grpc_response(response)
}
fn to_rpc_request(&self, request: Request) -> GreptimeRequest {
GreptimeRequest {
header: Some(RequestHeader {
authorization: self.auth_header.clone(),
dbname: self.dbname.clone(),
..Default::default()
}),
request: Some(request),
}
}
}

View File

@@ -10,61 +10,46 @@ name = "greptime"
path = "src/bin/greptime.rs" path = "src/bin/greptime.rs"
[features] [features]
default = ["metrics-process"]
tokio-console = ["common-telemetry/tokio-console"] tokio-console = ["common-telemetry/tokio-console"]
metrics-process = ["servers/metrics-process"]
[dependencies] [dependencies]
anymap = "1.0.0-beta.2" anymap = "1.0.0-beta.2"
async-trait.workspace = true catalog = { path = "../catalog" }
auth.workspace = true
catalog = { workspace = true }
chrono.workspace = true
clap = { version = "3.1", features = ["derive"] } clap = { version = "3.1", features = ["derive"] }
client = { workspace = true } client = { path = "../client" }
common-base = { workspace = true } common-base = { path = "../common/base" }
common-error = { workspace = true } common-error = { path = "../common/error" }
common-meta = { workspace = true } common-query = { path = "../common/query" }
common-query = { workspace = true } common-recordbatch = { path = "../common/recordbatch" }
common-recordbatch = { workspace = true } common-telemetry = { path = "../common/telemetry", features = [
common-telemetry = { workspace = true, features = [
"deadlock_detection", "deadlock_detection",
] } ] }
config = "0.13" config = "0.13"
datanode = { workspace = true } datanode = { path = "../datanode" }
datatypes = { workspace = true }
either = "1.8" either = "1.8"
etcd-client.workspace = true frontend = { path = "../frontend" }
frontend = { workspace = true }
futures.workspace = true futures.workspace = true
meta-client = { workspace = true } meta-client = { path = "../meta-client" }
meta-srv = { workspace = true } meta-srv = { path = "../meta-srv" }
metrics.workspace = true metrics.workspace = true
nu-ansi-term = "0.46" nu-ansi-term = "0.46"
partition = { workspace = true } partition = { path = "../partition" }
prost.workspace = true query = { path = "../query" }
query = { workspace = true }
rand.workspace = true
rustyline = "10.1" rustyline = "10.1"
serde.workspace = true serde.workspace = true
servers = { workspace = true } servers = { path = "../servers" }
session = { workspace = true } session = { path = "../session" }
snafu.workspace = true snafu.workspace = true
substrait = { workspace = true } substrait = { path = "../common/substrait" }
table = { workspace = true } tikv-jemallocator = "0.5"
tokio.workspace = true tokio.workspace = true
[target.'cfg(not(windows))'.dependencies]
tikv-jemallocator = "0.5"
[dev-dependencies] [dev-dependencies]
common-test-util = { workspace = true } common-test-util = { path = "../common/test-util" }
serde.workspace = true
temp-env = "0.3"
toml.workspace = true
[target.'cfg(not(windows))'.dev-dependencies]
rexpect = "0.5" rexpect = "0.5"
temp-env = "0.3"
serde.workspace = true
toml = "0.5"
[build-dependencies] [build-dependencies]
common-version = { workspace = true } build-data = "0.1.4"

View File

@@ -12,6 +12,22 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
const DEFAULT_VALUE: &str = "unknown";
fn main() { fn main() {
common_version::setup_git_versions(); println!(
"cargo:rustc-env=GIT_COMMIT={}",
build_data::get_git_commit().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
);
println!(
"cargo:rustc-env=GIT_COMMIT_SHORT={}",
build_data::get_git_commit_short().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
);
println!(
"cargo:rustc-env=GIT_BRANCH={}",
build_data::get_git_branch().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
);
println!(
"cargo:rustc-env=GIT_DIRTY={}",
build_data::get_git_dirty().map_or(DEFAULT_VALUE.to_string(), |v| v.to_string())
);
} }

View File

@@ -187,7 +187,6 @@ fn log_env_flags() {
} }
} }
#[cfg(not(windows))]
#[global_allocator] #[global_allocator]
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

View File

@@ -12,40 +12,24 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
mod bench;
mod cmd; mod cmd;
mod helper; mod helper;
mod repl; mod repl;
// TODO(weny): Removes it
#[allow(deprecated)]
mod upgrade;
use async_trait::async_trait;
use bench::BenchTableMetadataCommand;
use clap::Parser; use clap::Parser;
use common_telemetry::logging::LoggingOptions; use common_telemetry::logging::LoggingOptions;
pub use repl::Repl; pub use repl::Repl;
use upgrade::UpgradeCommand;
use crate::error::Result; use crate::error::Result;
use crate::options::{Options, TopLevelOptions}; use crate::options::{Options, TopLevelOptions};
#[async_trait] pub struct Instance {
pub trait Tool { repl: Repl,
async fn do_work(&self) -> Result<()>;
}
pub enum Instance {
Repl(Repl),
Tool(Box<dyn Tool>),
} }
impl Instance { impl Instance {
pub async fn start(&mut self) -> Result<()> { pub async fn start(&mut self) -> Result<()> {
match self { self.repl.run().await
Instance::Repl(repl) => repl.run().await,
Instance::Tool(tool) => tool.do_work().await,
}
} }
pub async fn stop(&self) -> Result<()> { pub async fn stop(&self) -> Result<()> {
@@ -79,16 +63,12 @@ impl Command {
#[derive(Parser)] #[derive(Parser)]
enum SubCommand { enum SubCommand {
Attach(AttachCommand), Attach(AttachCommand),
Upgrade(UpgradeCommand),
Bench(BenchTableMetadataCommand),
} }
impl SubCommand { impl SubCommand {
async fn build(self) -> Result<Instance> { async fn build(self) -> Result<Instance> {
match self { match self {
SubCommand::Attach(cmd) => cmd.build().await, SubCommand::Attach(cmd) => cmd.build().await,
SubCommand::Upgrade(cmd) => cmd.build().await,
SubCommand::Bench(cmd) => cmd.build().await,
} }
} }
} }
@@ -106,7 +86,7 @@ pub(crate) struct AttachCommand {
impl AttachCommand { impl AttachCommand {
async fn build(self) -> Result<Instance> { async fn build(self) -> Result<Instance> {
let repl = Repl::try_new(&self).await?; let repl = Repl::try_new(&self).await?;
Ok(Instance::Repl(repl)) Ok(Instance { repl })
} }
} }

View File

@@ -1,165 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use std::future::Future;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use clap::Parser;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::peer::Peer;
use common_meta::rpc::router::{Region, RegionRoute};
use common_meta::table_name::TableName;
use common_telemetry::info;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema};
use meta_srv::service::store::etcd::EtcdStore;
use meta_srv::service::store::kv::KvBackendAdapter;
use rand::Rng;
use table::metadata::{RawTableInfo, RawTableMeta, TableId, TableIdent, TableType};
use self::metadata::TableMetadataBencher;
use crate::cli::{Instance, Tool};
use crate::error::Result;
mod metadata;
async fn bench_self_recorded<F, Fut>(desc: &str, f: F, count: u32)
where
F: Fn(u32) -> Fut,
Fut: Future<Output = Duration>,
{
let mut total = Duration::default();
for i in 1..=count {
total += f(i).await;
}
let cost = total.as_millis() as f64 / count as f64;
info!("{desc}, average operation cost: {cost:.2} ms");
}
#[derive(Debug, Default, Parser)]
pub struct BenchTableMetadataCommand {
#[clap(long)]
etcd_addr: String,
#[clap(long)]
count: u32,
}
impl BenchTableMetadataCommand {
pub async fn build(&self) -> Result<Instance> {
let etcd_store = EtcdStore::with_endpoints([&self.etcd_addr]).await.unwrap();
let table_metadata_manager = Arc::new(TableMetadataManager::new(KvBackendAdapter::wrap(
etcd_store,
)));
let tool = BenchTableMetadata {
table_metadata_manager,
count: self.count,
};
Ok(Instance::Tool(Box::new(tool)))
}
}
struct BenchTableMetadata {
table_metadata_manager: TableMetadataManagerRef,
count: u32,
}
#[async_trait]
impl Tool for BenchTableMetadata {
async fn do_work(&self) -> Result<()> {
let bencher = TableMetadataBencher::new(self.table_metadata_manager.clone(), self.count);
bencher.bench_create().await;
bencher.bench_get().await;
bencher.bench_rename().await;
bencher.bench_delete().await;
Ok(())
}
}
fn create_table_info(table_id: TableId, table_name: TableName) -> RawTableInfo {
let columns = 100;
let mut column_schemas = Vec::with_capacity(columns);
column_schemas.push(
ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_millisecond_datatype(),
true,
)
.with_time_index(true),
);
for i in 1..columns {
let column_name = format!("my_column_{i}");
column_schemas.push(ColumnSchema::new(
column_name,
ConcreteDataType::string_datatype(),
true,
));
}
let meta = RawTableMeta {
schema: RawSchema::new(column_schemas),
engine: "mito".to_string(),
created_on: chrono::DateTime::default(),
primary_key_indices: vec![],
next_column_id: columns as u32 + 1,
engine_options: Default::default(),
value_indices: vec![],
options: Default::default(),
region_numbers: (1..=100).collect(),
partition_key_indices: vec![],
};
RawTableInfo {
ident: TableIdent {
table_id,
version: 1,
},
name: table_name.table_name,
desc: Some("blah".to_string()),
catalog_name: table_name.catalog_name,
schema_name: table_name.schema_name,
meta,
table_type: TableType::Base,
}
}
fn create_region_routes() -> Vec<RegionRoute> {
let mut regions = Vec::with_capacity(100);
let mut rng = rand::thread_rng();
for region_id in 0..64u64 {
regions.push(RegionRoute {
region: Region {
id: region_id.into(),
name: String::new(),
partition: None,
attrs: BTreeMap::new(),
},
leader_peer: Some(Peer {
id: rng.gen_range(0..10),
addr: String::new(),
}),
follower_peers: vec![],
});
}
regions
}

Some files were not shown because too many files have changed in this diff Show More