Compare commits

..

1 Commits

Author SHA1 Message Date
luofucong
34da9ad838 refactor: memtable extend kvs 2025-03-24 21:03:37 +08:00
275 changed files with 7236 additions and 10910 deletions

View File

@@ -41,14 +41,7 @@ runs:
username: ${{ inputs.dockerhub-image-registry-username }}
password: ${{ inputs.dockerhub-image-registry-token }}
- name: Set up qemu for multi-platform builds
uses: docker/setup-qemu-action@v3
with:
platforms: linux/amd64,linux/arm64
# The latest version will lead to segmentation fault.
image: tonistiigi/binfmt:qemu-v7.0.0-28
- name: Build and push dev-builder-ubuntu image # Build image for amd64 and arm64 platform.
- name: Build and push dev-builder-ubuntu image
shell: bash
if: ${{ inputs.build-dev-builder-ubuntu == 'true' }}
run: |
@@ -59,7 +52,7 @@ runs:
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
- name: Build and push dev-builder-centos image # Only build image for amd64 platform.
- name: Build and push dev-builder-centos image
shell: bash
if: ${{ inputs.build-dev-builder-centos == 'true' }}
run: |
@@ -76,7 +69,8 @@ runs:
run: |
make dev-builder \
BASE_IMAGE=android \
BUILDX_MULTI_PLATFORM_BUILD=amd64 \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }} && \
docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}

View File

@@ -56,7 +56,7 @@ runs:
- name: Start EC2 runner
if: startsWith(inputs.runner, 'ec2')
uses: machulav/ec2-github-runner@v2.3.8
uses: machulav/ec2-github-runner@v2
id: start-linux-arm64-ec2-runner
with:
mode: start

View File

@@ -33,7 +33,7 @@ runs:
- name: Stop EC2 runner
if: ${{ inputs.label && inputs.ec2-instance-id }}
uses: machulav/ec2-github-runner@v2.3.8
uses: machulav/ec2-github-runner@v2
with:
mode: stop
label: ${{ inputs.label }}

View File

@@ -14,7 +14,7 @@ name: Build API docs
jobs:
apidoc:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:

View File

@@ -16,11 +16,11 @@ on:
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-22.04
- ubuntu-22.04-8-cores
- ubuntu-22.04-16-cores
- ubuntu-22.04-32-cores
- ubuntu-22.04-64-cores
- ubuntu-20.04
- ubuntu-20.04-8-cores
- ubuntu-20.04-16-cores
- ubuntu-20.04-32-cores
- ubuntu-20.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
@@ -83,7 +83,7 @@ jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -218,7 +218,7 @@ jobs:
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
build-result: ${{ steps.set-build-result.outputs.build-result }}
steps:
@@ -251,7 +251,7 @@ jobs:
allocate-runners,
release-images-to-dockerhub,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
continue-on-error: true
steps:
- uses: actions/checkout@v4
@@ -283,7 +283,7 @@ jobs:
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-amd64-artifacts,
@@ -309,7 +309,7 @@ jobs:
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-arm64-artifacts,
@@ -337,7 +337,7 @@ jobs:
needs: [
release-images-to-dockerhub
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
permissions:
issues: write

View File

@@ -23,7 +23,7 @@ concurrency:
jobs:
check-typos-and-docs:
name: Check typos and docs
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
@@ -36,7 +36,7 @@ jobs:
|| (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)
license-header-check:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
name: Check License Header
steps:
- uses: actions/checkout@v4
@@ -49,7 +49,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -72,7 +72,7 @@ jobs:
toml:
name: Toml Check
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -89,7 +89,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -248,7 +248,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -568,7 +568,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-20.04 ]
mode:
- name: "Basic"
opts: ""
@@ -607,7 +607,7 @@ jobs:
fmt:
name: Rustfmt
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -624,7 +624,7 @@ jobs:
clippy:
name: Clippy
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -710,7 +710,7 @@ jobs:
coverage:
if: github.event_name == 'merge_group'
runs-on: ubuntu-22.04-8-cores
runs-on: ubuntu-20.04-8-cores
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -770,7 +770,7 @@ jobs:
# compat:
# name: Compatibility Test
# needs: build
# runs-on: ubuntu-22.04
# runs-on: ubuntu-20.04
# timeout-minutes: 60
# steps:
# - uses: actions/checkout@v4

View File

@@ -9,7 +9,7 @@ concurrency:
jobs:
docbot:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
permissions:
pull-requests: write
contents: read

View File

@@ -31,7 +31,7 @@ name: CI
jobs:
typos:
name: Spell Check with Typos
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
@@ -39,7 +39,7 @@ jobs:
- uses: crate-ci/typos@master
license-header-check:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
name: Check License Header
steps:
- uses: actions/checkout@v4
@@ -49,29 +49,29 @@ jobs:
check:
name: Check
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
fmt:
name: Rustfmt
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
clippy:
name: Clippy
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
coverage:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
test:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
@@ -80,7 +80,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-20.04 ]
mode:
- name: "Basic"
- name: "Remote WAL"

View File

@@ -1,52 +0,0 @@
name: Check Grafana Panels
on:
pull_request:
branches:
- main
paths:
- 'grafana/**' # Trigger only when files under the grafana/ directory change
jobs:
check-panels:
runs-on: ubuntu-latest
steps:
# Check out the repository
- name: Checkout repository
uses: actions/checkout@v4
# Install jq (required for the script)
- name: Install jq
run: sudo apt-get install -y jq
# Make the check.sh script executable
- name: Make check.sh executable
run: chmod +x grafana/check.sh
# Run the check.sh script
- name: Run check.sh
run: ./grafana/check.sh
# Only run summary.sh for pull_request events (not for merge queues or final pushes)
- name: Check if this is a pull request
id: check-pr
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "is_pull_request=true" >> $GITHUB_OUTPUT
else
echo "is_pull_request=false" >> $GITHUB_OUTPUT
fi
# Make the summary.sh script executable
- name: Make summary.sh executable
if: steps.check-pr.outputs.is_pull_request == 'true'
run: chmod +x grafana/summary.sh
# Run the summary.sh script and add its output to the GitHub Job Summary
- name: Run summary.sh and add to Job Summary
if: steps.check-pr.outputs.is_pull_request == 'true'
run: |
SUMMARY=$(./grafana/summary.sh)
echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY

View File

@@ -14,11 +14,11 @@ on:
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-22.04
- ubuntu-22.04-8-cores
- ubuntu-22.04-16-cores
- ubuntu-22.04-32-cores
- ubuntu-22.04-64-cores
- ubuntu-20.04
- ubuntu-20.04-8-cores
- ubuntu-20.04-16-cores
- ubuntu-20.04-32-cores
- ubuntu-20.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
@@ -70,7 +70,7 @@ jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -182,7 +182,7 @@ jobs:
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
steps:
@@ -214,7 +214,7 @@ jobs:
allocate-runners,
release-images-to-dockerhub,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
# When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -249,7 +249,7 @@ jobs:
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-amd64-artifacts,
@@ -275,7 +275,7 @@ jobs:
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-arm64-artifacts,
@@ -303,7 +303,7 @@ jobs:
needs: [
release-images-to-dockerhub
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
permissions:
issues: write
env:

View File

@@ -13,7 +13,7 @@ jobs:
sqlness-test:
name: Run sqlness test
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
@@ -133,7 +133,7 @@ jobs:
name: Check status
needs: [sqlness-test, sqlness-windows, test-on-windows]
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
check-result: ${{ steps.set-check-result.outputs.check-result }}
steps:
@@ -146,7 +146,7 @@ jobs:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' && always() }} # Not requiring successful dependent jobs, always run.
name: Send notification to Greptime team
needs: [check-status]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:

View File

@@ -29,7 +29,7 @@ jobs:
release-dev-builder-images:
name: Release dev builder images
if: ${{ inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }} # Only manually trigger this job.
runs-on: ubuntu-latest
runs-on: ubuntu-22.04-16-cores
outputs:
version: ${{ steps.set-version.outputs.version }}
steps:
@@ -63,7 +63,7 @@ jobs:
release-dev-builder-images-ecr:
name: Release dev builder images to AWS ECR
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
needs: [
release-dev-builder-images
]
@@ -148,7 +148,7 @@ jobs:
release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
name: Release dev builder images to CN region
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
needs: [
release-dev-builder-images
]

View File

@@ -18,11 +18,11 @@ on:
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-22.04
- ubuntu-22.04-8-cores
- ubuntu-22.04-16-cores
- ubuntu-22.04-32-cores
- ubuntu-22.04-64-cores
- ubuntu-20.04
- ubuntu-20.04-8-cores
- ubuntu-20.04-16-cores
- ubuntu-20.04-32-cores
- ubuntu-20.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
@@ -97,7 +97,7 @@ jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -299,7 +299,7 @@ jobs:
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-latest
runs-on: ubuntu-2004-16-cores
outputs:
build-image-result: ${{ steps.set-build-image-result.outputs.build-image-result }}
steps:
@@ -335,7 +335,7 @@ jobs:
build-windows-artifacts,
release-images-to-dockerhub,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
# When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -377,7 +377,7 @@ jobs:
build-windows-artifacts,
release-images-to-dockerhub,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
@@ -396,7 +396,7 @@ jobs:
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-amd64-artifacts,
@@ -422,7 +422,7 @@ jobs:
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-arm64-artifacts,
@@ -448,7 +448,7 @@ jobs:
name: Bump doc version
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [allocate-runners]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:
issues: write # Allows the action to create issues for cyborg.
@@ -475,7 +475,7 @@ jobs:
build-macos-artifacts,
build-windows-artifacts,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:
issues: write # Allows the action to create issues for cyborg.

View File

@@ -13,7 +13,7 @@ concurrency:
jobs:
check:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
timeout-minutes: 10
steps:
- uses: actions/checkout@v4

46
Cargo.lock generated
View File

@@ -4167,7 +4167,6 @@ dependencies = [
"bytes",
"cache",
"catalog",
"chrono",
"client",
"common-base",
"common-catalog",
@@ -4702,7 +4701,7 @@ dependencies = [
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486#d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=072ce580502e015df1a6b03a185b60309a7c2a7a#072ce580502e015df1a6b03a185b60309a7c2a7a"
dependencies = [
"prost 0.13.3",
"serde",
@@ -5567,7 +5566,6 @@ dependencies = [
"rand",
"regex",
"regex-automata 0.4.8",
"roaring",
"serde",
"serde_json",
"snafu 0.8.5",
@@ -5899,15 +5897,15 @@ dependencies = [
[[package]]
name = "jsonpath-rust"
version = "0.7.5"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c00ae348f9f8fd2d09f82a98ca381c60df9e0820d8d79fce43e649b4dc3128b"
checksum = "69a61b87f6a55cc6c28fed5739dd36b9642321ce63e4a5e4a4715d69106f4a10"
dependencies = [
"pest",
"pest_derive",
"regex",
"serde_json",
"thiserror 2.0.12",
"thiserror 1.0.64",
]
[[package]]
@@ -8272,7 +8270,7 @@ dependencies = [
"rand",
"ring",
"rust_decimal",
"thiserror 2.0.12",
"thiserror 2.0.6",
"tokio",
"tokio-rustls 0.26.0",
"tokio-util",
@@ -8384,7 +8382,7 @@ dependencies = [
"greptime-proto",
"itertools 0.10.5",
"jsonb",
"jsonpath-rust 0.7.5",
"jsonpath-rust 0.7.3",
"lazy_static",
"moka",
"once_cell",
@@ -8762,7 +8760,6 @@ dependencies = [
"common-recordbatch",
"common-telemetry",
"datafusion",
"datafusion-common",
"datafusion-expr",
"datatypes",
"futures",
@@ -8776,9 +8773,8 @@ dependencies = [
[[package]]
name = "promql-parser"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c6b1429bdd199d53bd58b745075c1652efedbe2746e5d4f0d56d3184dda48ec"
version = "0.4.3"
source = "git+https://github.com/GreptimeTeam/promql-parser.git?rev=27abb8e16003a50c720f00d6c85f41f5fa2a2a8e#27abb8e16003a50c720f00d6c85f41f5fa2a2a8e"
dependencies = [
"cfgrammar",
"chrono",
@@ -9636,16 +9632,6 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "roaring"
version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661"
dependencies = [
"bytemuck",
"byteorder",
]
[[package]]
name = "robust"
version = "1.1.0"
@@ -11065,7 +11051,7 @@ dependencies = [
"serde_json",
"sha2",
"smallvec",
"thiserror 2.0.12",
"thiserror 2.0.6",
"tokio",
"tokio-stream",
"tracing",
@@ -11150,7 +11136,7 @@ dependencies = [
"smallvec",
"sqlx-core",
"stringprep",
"thiserror 2.0.12",
"thiserror 2.0.6",
"tracing",
"whoami",
]
@@ -11188,7 +11174,7 @@ dependencies = [
"smallvec",
"sqlx-core",
"stringprep",
"thiserror 2.0.12",
"thiserror 2.0.6",
"tracing",
"whoami",
]
@@ -11969,11 +11955,11 @@ dependencies = [
[[package]]
name = "thiserror"
version = "2.0.12"
version = "2.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47"
dependencies = [
"thiserror-impl 2.0.12",
"thiserror-impl 2.0.6",
]
[[package]]
@@ -11989,9 +11975,9 @@ dependencies = [
[[package]]
name = "thiserror-impl"
version = "2.0.12"
version = "2.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312"
dependencies = [
"proc-macro2",
"quote",

View File

@@ -129,7 +129,7 @@ etcd-client = "0.14"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "d92c9ac4e90ef4abdcf5c2eaf5a164e18ba09486" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "072ce580502e015df1a6b03a185b60309a7c2a7a" }
hex = "0.4"
http = "1"
humantime = "2.1"
@@ -160,7 +160,9 @@ parquet = { version = "53.0.0", default-features = false, features = ["arrow", "
paste = "1.0"
pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] }
promql-parser = { version = "0.5", features = ["ser"] }
promql-parser = { git = "https://github.com/GreptimeTeam/promql-parser.git", features = [
"ser",
], rev = "27abb8e16003a50c720f00d6c85f41f5fa2a2a8e" }
prost = "0.13"
raft-engine = { version = "0.4.1", default-features = false }
rand = "0.8"

View File

@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
IMAGE_REGISTRY ?= docker.io
IMAGE_NAMESPACE ?= greptime
IMAGE_TAG ?= latest
DEV_BUILDER_IMAGE_TAG ?= 2024-12-25-a71b93dd-20250305072908
DEV_BUILDER_IMAGE_TAG ?= 2024-12-25-9d0fa5d5-20250124085746
BUILDX_MULTI_PLATFORM_BUILD ?= false
BUILDX_BUILDER_NAME ?= gtbuilder
BASE_IMAGE ?= ubuntu
@@ -60,8 +60,6 @@ ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), all)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64,linux/arm64 --push
else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), amd64)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64 --push
else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), arm64)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/arm64 --push
else
BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
endif

View File

@@ -1,4 +1,4 @@
FROM ubuntu:22.04 as builder
FROM ubuntu:20.04 as builder
ARG CARGO_PROFILE
ARG FEATURES

View File

@@ -1,4 +1,4 @@
FROM ubuntu:latest
FROM ubuntu:22.04
# The binary name of GreptimeDB executable.
# Defaults to "greptime", but sometimes in other projects it might be different.

View File

@@ -41,7 +41,7 @@ RUN mv protoc3/include/* /usr/local/include/
# and the repositories are pulled from trusted sources (still us, of course). Doing so does not violate the intention
# of the Git's addition to the "safe.directory" at the first place (see the commit message here:
# https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9).
# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using
# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using
# wildcard here. However, that requires the git's config files and the submodules all owned by the very same user.
# It's troublesome to do this since the dev build runs in Docker, which is under user "root"; while outside the Docker,
# it can be a different user that have prepared the submodules.

View File

@@ -1,19 +0,0 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
# Use jq to check for panels with empty or missing descriptions
invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels[]
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
')
# Check if any invalid panels were found
if [[ -n "$invalid_panels" ]]; then
echo "Error: The following panels have empty or missing descriptions:"
echo "$invalid_panels"
exit 1
else
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
exit 0
fi

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
echo '| Title | Description | Expressions |
|---|---|---|'
cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels |
map(select(.type == "stat" or .type == "timeseries")) |
.[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`") | join("<br>")) |"
'

View File

@@ -38,7 +38,6 @@ use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
use session::context::{Channel, QueryContext};
use snafu::prelude::*;
use table::dist_table::DistTable;
use table::metadata::TableId;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use table::table_name::TableName;
use table::TableRef;
@@ -287,28 +286,6 @@ impl CatalogManager for KvBackendCatalogManager {
return Ok(None);
}
async fn tables_by_ids(
&self,
catalog: &str,
schema: &str,
table_ids: &[TableId],
) -> Result<Vec<TableRef>> {
let table_info_values = self
.table_metadata_manager
.table_info_manager()
.batch_get(table_ids)
.await
.context(TableMetadataManagerSnafu)?;
let tables = table_info_values
.into_values()
.filter(|t| t.table_info.catalog_name == catalog && t.table_info.schema_name == schema)
.map(build_table)
.collect::<Result<Vec<_>>>()?;
Ok(tables)
}
fn tables<'a>(
&'a self,
catalog: &'a str,

View File

@@ -87,14 +87,6 @@ pub trait CatalogManager: Send + Sync {
query_ctx: Option<&QueryContext>,
) -> Result<Option<TableRef>>;
/// Returns the tables by table ids.
async fn tables_by_ids(
&self,
catalog: &str,
schema: &str,
table_ids: &[TableId],
) -> Result<Vec<TableRef>>;
/// Returns all tables with a stream by catalog and schema.
fn tables<'a>(
&'a self,

View File

@@ -14,7 +14,7 @@
use std::any::Any;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::sync::{Arc, RwLock, Weak};
use async_stream::{stream, try_stream};
@@ -28,7 +28,6 @@ use common_meta::kv_backend::memory::MemoryKvBackend;
use futures_util::stream::BoxStream;
use session::context::QueryContext;
use snafu::OptionExt;
use table::metadata::TableId;
use table::TableRef;
use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
@@ -144,33 +143,6 @@ impl CatalogManager for MemoryCatalogManager {
Ok(result)
}
async fn tables_by_ids(
&self,
catalog: &str,
schema: &str,
table_ids: &[TableId],
) -> Result<Vec<TableRef>> {
let catalogs = self.catalogs.read().unwrap();
let schemas = catalogs.get(catalog).context(CatalogNotFoundSnafu {
catalog_name: catalog,
})?;
let tables = schemas
.get(schema)
.context(SchemaNotFoundSnafu { catalog, schema })?;
let filter_ids: HashSet<_> = table_ids.iter().collect();
// It is very inefficient, but we do not need to optimize it since it will not be called in `MemoryCatalogManager`.
let tables = tables
.values()
.filter(|t| filter_ids.contains(&t.table_info().table_id()))
.cloned()
.collect::<Vec<_>>();
Ok(tables)
}
fn tables<'a>(
&'a self,
catalog: &'a str,

View File

@@ -16,6 +16,7 @@
mod client;
pub mod client_manager;
#[cfg(feature = "testing")]
mod database;
pub mod error;
pub mod flow;
@@ -33,6 +34,7 @@ pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
use snafu::OptionExt;
pub use self::client::Client;
#[cfg(feature = "testing")]
pub use self::database::Database;
pub use self::error::{Error, Result};
use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};

View File

@@ -287,6 +287,7 @@ impl StartCommand {
.await
.context(StartDatanodeSnafu)?;
let cluster_id = 0; // TODO(hl): read from config
let member_id = opts
.node_id
.context(MissingConfigSnafu { msg: "'node_id'" })?;
@@ -295,10 +296,13 @@ impl StartCommand {
msg: "'meta_client_options'",
})?;
let meta_client =
meta_client::create_meta_client(MetaClientType::Datanode { member_id }, meta_config)
.await
.context(MetaClientInitSnafu)?;
let meta_client = meta_client::create_meta_client(
cluster_id,
MetaClientType::Datanode { member_id },
meta_config,
)
.await
.context(MetaClientInitSnafu)?;
let meta_backend = Arc::new(MetaKvBackend {
client: meta_client.clone(),

View File

@@ -32,7 +32,7 @@ use common_meta::key::TableMetadataManager;
use common_telemetry::info;
use common_telemetry::logging::TracingOptions;
use common_version::{short_version, version};
use flow::{FlownodeBuilder, FlownodeInstance, FrontendClient, FrontendInvoker};
use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
use meta_client::{MetaClientOptions, MetaClientType};
use servers::Mode;
use snafu::{OptionExt, ResultExt};
@@ -241,6 +241,9 @@ impl StartCommand {
let mut opts = opts.component;
opts.grpc.detect_server_addr();
// TODO(discord9): make it not optionale after cluster id is required
let cluster_id = opts.cluster_id.unwrap_or(0);
let member_id = opts
.node_id
.context(MissingConfigSnafu { msg: "'node_id'" })?;
@@ -249,10 +252,13 @@ impl StartCommand {
msg: "'meta_client_options'",
})?;
let meta_client =
meta_client::create_meta_client(MetaClientType::Flownode { member_id }, meta_config)
.await
.context(MetaClientInitSnafu)?;
let meta_client = meta_client::create_meta_client(
cluster_id,
MetaClientType::Flownode { member_id },
meta_config,
)
.await
.context(MetaClientInitSnafu)?;
let cache_max_capacity = meta_config.metadata_cache_max_capacity;
let cache_ttl = meta_config.metadata_cache_ttl;
@@ -311,8 +317,6 @@ impl StartCommand {
Arc::new(executor),
);
let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
let flownode_builder = FlownodeBuilder::new(
opts,
@@ -320,7 +324,6 @@ impl StartCommand {
table_metadata_manager,
catalog_manager.clone(),
flow_metadata_manager,
Arc::new(frontend_client),
)
.with_heartbeat_task(heartbeat_task);

View File

@@ -295,10 +295,14 @@ impl StartCommand {
let cache_ttl = meta_client_options.metadata_cache_ttl;
let cache_tti = meta_client_options.metadata_cache_tti;
let meta_client =
meta_client::create_meta_client(MetaClientType::Frontend, meta_client_options)
.await
.context(MetaClientInitSnafu)?;
let cluster_id = 0; // (TODO: jeremy): It is currently a reserved field and has not been enabled.
let meta_client = meta_client::create_meta_client(
cluster_id,
MetaClientType::Frontend,
meta_client_options,
)
.await
.context(MetaClientInitSnafu)?;
// TODO(discord9): add helper function to ease the creation of cache registry&such
let cached_meta_backend =

View File

@@ -54,10 +54,7 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::region_server::RegionServer;
use file_engine::config::EngineConfig as FileEngineConfig;
use flow::{
FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendClient,
FrontendInvoker,
};
use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
@@ -536,16 +533,12 @@ impl StartCommand {
flow: opts.flow.clone(),
..Default::default()
};
let fe_server_addr = fe_opts.grpc.bind_addr.clone();
let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
let flow_builder = FlownodeBuilder::new(
flownode_options,
plugins.clone(),
table_metadata_manager.clone(),
catalog_manager.clone(),
flow_metadata_manager.clone(),
Arc::new(frontend_client),
);
let flownode = Arc::new(
flow_builder

View File

@@ -130,10 +130,3 @@ pub const SEMANTIC_TYPE_TIME_INDEX: &str = "TIMESTAMP";
pub fn is_readonly_schema(schema: &str) -> bool {
matches!(schema, INFORMATION_SCHEMA_NAME)
}
// ---- special table and fields ----
pub const TRACE_ID_COLUMN: &str = "trace_id";
pub const SPAN_ID_COLUMN: &str = "span_id";
pub const SPAN_NAME_COLUMN: &str = "span_name";
pub const PARENT_SPAN_ID_COLUMN: &str = "parent_span_id";
// ---- End of special table and fields ----

View File

@@ -12,16 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! Two UDAFs are implemented for HyperLogLog:
//!
//! - `hll`: Accepts a string column and aggregates the values into a
//! HyperLogLog state.
//! - `hll_merge`: Accepts a binary column of states generated by `hll`
//! and merges them into a single state.
//!
//! The states can be then used to estimate the cardinality of the
//! values in the column by `hll_count` UDF.
use std::sync::Arc;
use common_query::prelude::*;

View File

@@ -12,12 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! Implementation of the `uddsketch_state` UDAF that generate the state of
//! UDDSketch for a given set of values.
//!
//! The generated state can be used to compute approximate quantiles using
//! `uddsketch_calc` UDF.
use std::sync::Arc;
use common_query::prelude::*;

View File

@@ -12,16 +12,24 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! # Deprecate Warning:
//!
//! This module is deprecated and will be removed in the future.
//! All UDAF implementation here are not maintained and should
//! not be used before they are refactored into the `src/aggr`
//! version.
mod argmax;
mod argmin;
mod diff;
mod mean;
mod polyval;
mod scipy_stats_norm_cdf;
mod scipy_stats_norm_pdf;
use std::sync::Arc;
pub use argmax::ArgmaxAccumulatorCreator;
pub use argmin::ArgminAccumulatorCreator;
use common_query::logical_plan::AggregateFunctionCreatorRef;
pub use diff::DiffAccumulatorCreator;
pub use mean::MeanAccumulatorCreator;
pub use polyval::PolyvalAccumulatorCreator;
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
pub use scipy_stats_norm_pdf::ScipyStatsNormPdfAccumulatorCreator;
use crate::function_registry::FunctionRegistry;
use crate::scalars::vector::product::VectorProductCreator;
@@ -68,22 +76,31 @@ pub(crate) struct AggregateFunctions;
impl AggregateFunctions {
pub fn register(registry: &FunctionRegistry) {
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
"vec_sum",
1,
Arc::new(|| Arc::new(VectorSumCreator::default())),
)));
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
"vec_product",
1,
Arc::new(|| Arc::new(VectorProductCreator::default())),
)));
macro_rules! register_aggr_func {
($name :expr, $arg_count :expr, $creator :ty) => {
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
$name,
$arg_count,
Arc::new(|| Arc::new(<$creator>::default())),
)));
};
}
register_aggr_func!("diff", 1, DiffAccumulatorCreator);
register_aggr_func!("mean", 1, MeanAccumulatorCreator);
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
register_aggr_func!("argmax", 1, ArgmaxAccumulatorCreator);
register_aggr_func!("argmin", 1, ArgminAccumulatorCreator);
register_aggr_func!("scipystatsnormcdf", 2, ScipyStatsNormCdfAccumulatorCreator);
register_aggr_func!("scipystatsnormpdf", 2, ScipyStatsNormPdfAccumulatorCreator);
register_aggr_func!("vec_sum", 1, VectorSumCreator);
register_aggr_func!("vec_product", 1, VectorProductCreator);
#[cfg(feature = "geo")]
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
register_aggr_func!(
"json_encode_path",
3,
Arc::new(|| Arc::new(super::geo::encoding::JsonPathEncodeFunctionCreator::default())),
)));
super::geo::encoding::JsonPathEncodeFunctionCreator
);
}
}

View File

@@ -0,0 +1,208 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::cmp::Ordering;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::{LogicalPrimitiveType, WrapperType};
use datatypes::vectors::{ConstantVector, Helper};
use datatypes::with_match_primitive_type_id;
use snafu::ensure;
// https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
// return the index of the max value
#[derive(Debug, Default)]
pub struct Argmax<T> {
max: Option<T>,
n: u64,
}
impl<T> Argmax<T>
where
T: PartialOrd + Copy,
{
fn update(&mut self, value: T, index: u64) {
if let Some(Ordering::Less) = self.max.partial_cmp(&Some(value)) {
self.max = Some(value);
self.n = index;
}
}
}
impl<T> Accumulator for Argmax<T>
where
T: WrapperType + PartialOrd,
{
fn state(&self) -> Result<Vec<Value>> {
match self.max {
Some(max) => Ok(vec![max.into(), self.n.into()]),
_ => Ok(vec![Value::Null, self.n.into()]),
}
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
let column = &values[0];
let column: &<T as Scalar>::VectorType = if column.is_const() {
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
for (i, v) in column.iter_data().enumerate() {
if let Some(value) = v {
self.update(value, i as u64);
}
}
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let max = &states[0];
let index = &states[1];
let max: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(max) };
let index: &<u64 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
index
.iter_data()
.flatten()
.zip(max.iter_data().flatten())
.for_each(|(i, max)| self.update(max, i));
Ok(())
}
fn evaluate(&self) -> Result<Value> {
match self.max {
Some(_) => Ok(self.n.into()),
_ => Ok(Value::Null),
}
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct ArgmaxAccumulatorCreator {}
impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Argmax::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"ARGMAX\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint64_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
Ok(vec![
input_types.into_iter().next().unwrap(),
ConcreteDataType::uint64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut argmax = Argmax::<i32>::default();
argmax.update_batch(&[]).unwrap();
assert_eq!(Value::Null, argmax.evaluate().unwrap());
// test update one not-null value
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
argmax.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
// test update one null value
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
argmax.update_batch(&v).unwrap();
assert_eq!(Value::Null, argmax.evaluate().unwrap());
// test update no null-value batch
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(3),
]))];
argmax.update_batch(&v).unwrap();
assert_eq!(Value::from(2_u64), argmax.evaluate().unwrap());
// test update null-value batch
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(4),
]))];
argmax.update_batch(&v).unwrap();
assert_eq!(Value::from(2_u64), argmax.evaluate().unwrap());
// test update with constant vector
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
10,
))];
argmax.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
}
}

View File

@@ -0,0 +1,216 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::cmp::Ordering;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::vectors::{ConstantVector, Helper};
use datatypes::with_match_primitive_type_id;
use snafu::ensure;
// // https://numpy.org/doc/stable/reference/generated/numpy.argmin.html
#[derive(Debug, Default)]
pub struct Argmin<T> {
min: Option<T>,
n: u32,
}
impl<T> Argmin<T>
where
T: Copy + PartialOrd,
{
fn update(&mut self, value: T, index: u32) {
match self.min {
Some(min) => {
if let Some(Ordering::Greater) = min.partial_cmp(&value) {
self.min = Some(value);
self.n = index;
}
}
None => {
self.min = Some(value);
self.n = index;
}
}
}
}
impl<T> Accumulator for Argmin<T>
where
T: WrapperType + PartialOrd,
{
fn state(&self) -> Result<Vec<Value>> {
match self.min {
Some(min) => Ok(vec![min.into(), self.n.into()]),
_ => Ok(vec![Value::Null, self.n.into()]),
}
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 1, InvalidInputStateSnafu);
let column = &values[0];
let column: &<T as Scalar>::VectorType = if column.is_const() {
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
for (i, v) in column.iter_data().enumerate() {
if let Some(value) = v {
self.update(value, i as u32);
}
}
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let min = &states[0];
let index = &states[1];
let min: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(min) };
let index: &<u32 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
index
.iter_data()
.flatten()
.zip(min.iter_data().flatten())
.for_each(|(i, min)| self.update(min, i));
Ok(())
}
fn evaluate(&self) -> Result<Value> {
match self.min {
Some(_) => Ok(self.n.into()),
_ => Ok(Value::Null),
}
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct ArgminAccumulatorCreator {}
impl AggregateFunctionCreator for ArgminAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Argmin::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"ARGMIN\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint32_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
Ok(vec![
input_types.into_iter().next().unwrap(),
ConcreteDataType::uint32_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut argmin = Argmin::<i32>::default();
argmin.update_batch(&[]).unwrap();
assert_eq!(Value::Null, argmin.evaluate().unwrap());
// test update one not-null value
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
argmin.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
// test update one null value
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
argmin.update_batch(&v).unwrap();
assert_eq!(Value::Null, argmin.evaluate().unwrap());
// test update no null-value batch
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(3),
]))];
argmin.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
// test update null-value batch
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(4),
]))];
argmin.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
// test update with constant vector
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
10,
))];
argmin.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
}
}

View File

@@ -0,0 +1,252 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::marker::PhantomData;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::value::ListValue;
use datatypes::vectors::{ConstantVector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
// https://numpy.org/doc/stable/reference/generated/numpy.diff.html
// I is the input type, O is the output type.
#[derive(Debug, Default)]
pub struct Diff<I, O> {
values: Vec<I>,
_phantom: PhantomData<O>,
}
impl<I, O> Diff<I, O> {
fn push(&mut self, value: I) {
self.values.push(value);
}
}
impl<I, O> Accumulator for Diff<I, O>
where
I: WrapperType,
O: WrapperType,
I::Native: AsPrimitive<O::Native>,
O::Native: std::ops::Sub<Output = O::Native>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.values
.iter()
.map(|&n| n.into())
.collect::<Vec<Value>>();
Ok(vec![Value::List(ListValue::new(
nums,
I::LogicalType::build_data_type(),
))])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 1, InvalidInputStateSnafu);
let column = &values[0];
let mut len = 1;
let column: &<I as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
let states = &states[0];
let states = states
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
states.vector_type_name()
),
})?;
for state in states.values_iter() {
if let Some(state) = state.context(FromScalarValueSnafu)? {
self.update_batch(&[state])?;
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
if self.values.is_empty() || self.values.len() == 1 {
return Ok(Value::Null);
}
let diff = self
.values
.windows(2)
.map(|x| {
let native = x[1].into_native().as_() - x[0].into_native().as_();
O::from_native(native).into()
})
.collect::<Vec<Value>>();
let diff = Value::List(ListValue::new(diff, O::LogicalType::build_data_type()));
Ok(diff)
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct DiffAccumulatorCreator {}
impl AggregateFunctionCreator for DiffAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Diff::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"DIFF\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
with_match_primitive_type_id!(
input_types[0].logical_type_id(),
|$S| {
Ok(ConcreteDataType::list_datatype($S::default().into()))
},
{
unreachable!()
}
)
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
with_match_primitive_type_id!(
input_types[0].logical_type_id(),
|$S| {
Ok(vec![ConcreteDataType::list_datatype($S::default().into())])
},
{
unreachable!()
}
)
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut diff = Diff::<i32, i64>::default();
diff.update_batch(&[]).unwrap();
assert!(diff.values.is_empty());
assert_eq!(Value::Null, diff.evaluate().unwrap());
// test update one not-null value
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
diff.update_batch(&v).unwrap();
assert_eq!(Value::Null, diff.evaluate().unwrap());
// test update one null value
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
diff.update_batch(&v).unwrap();
assert_eq!(Value::Null, diff.evaluate().unwrap());
// test update no null-value batch
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(2),
]))];
let values = vec![Value::from(2_i64), Value::from(1_i64)];
diff.update_batch(&v).unwrap();
assert_eq!(
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
diff.evaluate().unwrap()
);
// test update null-value batch
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(3),
Some(4),
]))];
let values = vec![Value::from(5_i64), Value::from(1_i64)];
diff.update_batch(&v).unwrap();
assert_eq!(
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
diff.evaluate().unwrap()
);
// test update with constant vector
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
4,
))];
let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
diff.update_batch(&v).unwrap();
assert_eq!(
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
diff.evaluate().unwrap()
);
}
}

View File

@@ -0,0 +1,238 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::marker::PhantomData;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::WrapperType;
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, UInt64Vector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt};
#[derive(Debug, Default)]
pub struct Mean<T> {
sum: f64,
n: u64,
_phantom: PhantomData<T>,
}
impl<T> Mean<T>
where
T: WrapperType,
T::Native: AsPrimitive<f64>,
{
#[inline(always)]
fn push(&mut self, value: T) {
self.sum += value.into_native().as_();
self.n += 1;
}
#[inline(always)]
fn update(&mut self, sum: f64, n: u64) {
self.sum += sum;
self.n += n;
}
}
impl<T> Accumulator for Mean<T>
where
T: WrapperType,
T::Native: AsPrimitive<f64>,
{
fn state(&self) -> Result<Vec<Value>> {
Ok(vec![self.sum.into(), self.n.into()])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 1, InvalidInputStateSnafu);
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let sum = &states[0];
let n = &states[1];
let sum = sum
.as_any()
.downcast_ref::<Float64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect Float64Vector, got vector type {}",
sum.vector_type_name()
),
})?;
let n = n
.as_any()
.downcast_ref::<UInt64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect UInt64Vector, got vector type {}",
sum.vector_type_name()
),
})?;
sum.iter_data().zip(n.iter_data()).for_each(|(sum, n)| {
if let (Some(sum), Some(n)) = (sum, n) {
self.update(sum, n);
}
});
Ok(())
}
fn evaluate(&self) -> Result<Value> {
if self.n == 0 {
return Ok(Value::Null);
}
let values = self.sum / self.n as f64;
Ok(values.into())
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct MeanAccumulatorCreator {}
impl AggregateFunctionCreator for MeanAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Mean::<<$S as LogicalPrimitiveType>::Native>::default()))
},
{
let err_msg = format!(
"\"MEAN\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
Ok(ConcreteDataType::float64_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
Ok(vec![
ConcreteDataType::float64_datatype(),
ConcreteDataType::uint64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut mean = Mean::<i32>::default();
mean.update_batch(&[]).unwrap();
assert_eq!(Value::Null, mean.evaluate().unwrap());
// test update one not-null value
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
mean.update_batch(&v).unwrap();
assert_eq!(Value::from(42.0_f64), mean.evaluate().unwrap());
// test update one null value
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
mean.update_batch(&v).unwrap();
assert_eq!(Value::Null, mean.evaluate().unwrap());
// test update no null-value batch
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(2),
]))];
mean.update_batch(&v).unwrap();
assert_eq!(Value::from(0.6666666666666666), mean.evaluate().unwrap());
// test update null-value batch
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(3),
Some(4),
]))];
mean.update_batch(&v).unwrap();
assert_eq!(Value::from(1.6666666666666667), mean.evaluate().unwrap());
// test update with constant vector
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
10,
))];
mean.update_batch(&v).unwrap();
assert_eq!(Value::from(4.0), mean.evaluate().unwrap());
}
}

View File

@@ -0,0 +1,329 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::marker::PhantomData;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, InvalidInputColSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::{LogicalPrimitiveType, WrapperType};
use datatypes::value::ListValue;
use datatypes::vectors::{ConstantVector, Helper, Int64Vector, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
// https://numpy.org/doc/stable/reference/generated/numpy.polyval.html
#[derive(Debug, Default)]
pub struct Polyval<T, PolyT>
where
T: WrapperType,
T::Native: AsPrimitive<PolyT::Native>,
PolyT: WrapperType,
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
{
values: Vec<T>,
// DataFusion casts constant in into i64 type.
x: Option<i64>,
_phantom: PhantomData<PolyT>,
}
impl<T, PolyT> Polyval<T, PolyT>
where
T: WrapperType,
T::Native: AsPrimitive<PolyT::Native>,
PolyT: WrapperType,
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
{
fn push(&mut self, value: T) {
self.values.push(value);
}
}
impl<T, PolyT> Accumulator for Polyval<T, PolyT>
where
T: WrapperType,
T::Native: AsPrimitive<PolyT::Native>,
PolyT: WrapperType + std::iter::Sum<<PolyT as WrapperType>::Native>,
PolyT::Native: std::ops::Mul<Output = PolyT::Native> + std::iter::Sum<PolyT::Native>,
i64: AsPrimitive<<PolyT as WrapperType>::Native>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.values
.iter()
.map(|&n| n.into())
.collect::<Vec<Value>>();
Ok(vec![
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
self.x.into(),
])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 2, InvalidInputStateSnafu);
ensure!(values[0].len() == values[1].len(), InvalidInputStateSnafu);
if values[0].len() == 0 {
return Ok(());
}
// This is a unary accumulator, so only one column is provided.
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
let x = &values[1];
let x = Helper::check_get_scalar::<i64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"POLYVAL\" function's second argument to be a positive integer",
})?;
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
let first = x.get(0);
ensure!(!first.is_null(), InvalidInputColSnafu);
for i in 1..x.len() {
ensure!(first == x.get(i), InvalidInputColSnafu);
}
let first = match first {
Value::Int64(v) => v,
// unreachable because we have checked `first` is not null and is i64 above
_ => unreachable!(),
};
if let Some(x) = self.x {
ensure!(x == first, InvalidInputColSnafu);
} else {
self.x = Some(first);
};
Ok(())
}
// DataFusion executes accumulators in partitions. In some execution stage, DataFusion will
// merge states from other accumulators (returned by `state()` method).
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let x = &states[1];
let x = x
.as_any()
.downcast_ref::<Int64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect Int64Vector, got vector type {}",
x.vector_type_name()
),
})?;
let x = x.get(0);
if x.is_null() {
return Ok(());
}
let x = match x {
Value::Int64(x) => x,
_ => unreachable!(),
};
self.x = Some(x);
let values = &states[0];
let values = values
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
values.vector_type_name()
),
})?;
for value in values.values_iter() {
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
// DataFusion expects this function to return the final value of this aggregator.
fn evaluate(&self) -> Result<Value> {
if self.values.is_empty() {
return Ok(Value::Null);
}
let x = if let Some(x) = self.x {
x
} else {
return Ok(Value::Null);
};
let len = self.values.len();
let polyval: PolyT = self
.values
.iter()
.enumerate()
.map(|(i, &value)| value.into_native().as_() * x.pow((len - 1 - i) as u32).as_())
.sum();
Ok(polyval.into())
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct PolyvalAccumulatorCreator {}
impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Polyval::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"POLYVAL\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
let input_type = self.input_types()?[0].logical_type_id();
with_match_primitive_type_id!(
input_type,
|$S| {
Ok(<<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::build_data_type())
},
{
unreachable!()
}
)
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(vec![
ConcreteDataType::list_datatype(input_types.into_iter().next().unwrap()),
ConcreteDataType::int64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut polyval = Polyval::<i32, i64>::default();
polyval.update_batch(&[]).unwrap();
assert!(polyval.values.is_empty());
assert_eq!(Value::Null, polyval.evaluate().unwrap());
// test update one not-null value
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(3)])),
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
];
polyval.update_batch(&v).unwrap();
assert_eq!(Value::Int64(3), polyval.evaluate().unwrap());
// test update one null value
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
];
polyval.update_batch(&v).unwrap();
assert_eq!(Value::Null, polyval.evaluate().unwrap());
// test update no null-value batch
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(3), Some(0), Some(1)])),
Arc::new(Int64Vector::from(vec![
Some(2_i64),
Some(2_i64),
Some(2_i64),
])),
];
polyval.update_batch(&v).unwrap();
assert_eq!(Value::Int64(13), polyval.evaluate().unwrap());
// test update null-value batch
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(3), Some(0), None, Some(1)])),
Arc::new(Int64Vector::from(vec![
Some(2_i64),
Some(2_i64),
Some(2_i64),
Some(2_i64),
])),
];
polyval.update_batch(&v).unwrap();
assert_eq!(Value::Int64(13), polyval.evaluate().unwrap());
// test update with constant vector
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
2,
)),
Arc::new(Int64Vector::from(vec![Some(5_i64), Some(5_i64)])),
];
polyval.update_batch(&v).unwrap();
assert_eq!(Value::Int64(24), polyval.evaluate().unwrap());
}
}

View File

@@ -0,0 +1,270 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::value::{ListValue, OrderedFloat};
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
use statrs::distribution::{ContinuousCDF, Normal};
use statrs::statistics::Statistics;
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
#[derive(Debug, Default)]
pub struct ScipyStatsNormCdf<T> {
values: Vec<T>,
x: Option<f64>,
}
impl<T> ScipyStatsNormCdf<T> {
fn push(&mut self, value: T) {
self.values.push(value);
}
}
impl<T> Accumulator for ScipyStatsNormCdf<T>
where
T: WrapperType + std::iter::Sum<T>,
T::Native: AsPrimitive<f64>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.values
.iter()
.map(|&x| x.into())
.collect::<Vec<Value>>();
Ok(vec![
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
self.x.into(),
])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 2, InvalidInputStateSnafu);
ensure!(values[1].len() == values[0].len(), InvalidInputStateSnafu);
if values[0].len() == 0 {
return Ok(());
}
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
let x = &values[1];
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"SCIPYSTATSNORMCDF\" function's second argument to be a positive integer",
})?;
let first = x.get(0);
ensure!(!first.is_null(), InvalidInputColSnafu);
let first = match first {
Value::Float64(OrderedFloat(v)) => v,
// unreachable because we have checked `first` is not null and is i64 above
_ => unreachable!(),
};
if let Some(x) = self.x {
ensure!(x == first, InvalidInputColSnafu);
} else {
self.x = Some(first);
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let x = &states[1];
let x = x
.as_any()
.downcast_ref::<Float64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect Float64Vector, got vector type {}",
x.vector_type_name()
),
})?;
let x = x.get(0);
if x.is_null() {
return Ok(());
}
let x = match x {
Value::Float64(OrderedFloat(x)) => x,
_ => unreachable!(),
};
self.x = Some(x);
let values = &states[0];
let values = values
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
values.vector_type_name()
),
})?;
for value in values.values_iter() {
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
if mean.is_nan() || std_dev.is_nan() {
Ok(Value::Null)
} else {
let x = if let Some(x) = self.x {
x
} else {
return Ok(Value::Null);
};
let n = Normal::new(mean, std_dev).context(GenerateFunctionSnafu)?;
Ok(n.cdf(x).into())
}
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct ScipyStatsNormCdfAccumulatorCreator {}
impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(ScipyStatsNormCdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"SCIPYSTATSNORMCDF\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(ConcreteDataType::float64_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(vec![
ConcreteDataType::list_datatype(input_types[0].clone()),
ConcreteDataType::float64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::{Float64Vector, Int32Vector};
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
scipy_stats_norm_cdf.update_batch(&[]).unwrap();
assert!(scipy_stats_norm_cdf.values.is_empty());
assert_eq!(Value::Null, scipy_stats_norm_cdf.evaluate().unwrap());
// test update no null-value batch
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
Some(2.0_f64),
Some(2.0_f64),
])),
];
scipy_stats_norm_cdf.update_batch(&v).unwrap();
assert_eq!(
Value::from(0.8086334555398362),
scipy_stats_norm_cdf.evaluate().unwrap()
);
// test update null-value batch
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
None,
Some(2.0_f64),
Some(2.0_f64),
])),
];
scipy_stats_norm_cdf.update_batch(&v).unwrap();
assert_eq!(
Value::from(0.5412943699039795),
scipy_stats_norm_cdf.evaluate().unwrap()
);
}
}

View File

@@ -0,0 +1,271 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::value::{ListValue, OrderedFloat};
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
use statrs::distribution::{Continuous, Normal};
use statrs::statistics::Statistics;
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
#[derive(Debug, Default)]
pub struct ScipyStatsNormPdf<T> {
values: Vec<T>,
x: Option<f64>,
}
impl<T> ScipyStatsNormPdf<T> {
fn push(&mut self, value: T) {
self.values.push(value);
}
}
impl<T> Accumulator for ScipyStatsNormPdf<T>
where
T: WrapperType,
T::Native: AsPrimitive<f64> + std::iter::Sum<T>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.values
.iter()
.map(|&x| x.into())
.collect::<Vec<Value>>();
Ok(vec![
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
self.x.into(),
])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 2, InvalidInputStateSnafu);
ensure!(values[1].len() == values[0].len(), InvalidInputStateSnafu);
if values[0].len() == 0 {
return Ok(());
}
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
let x = &values[1];
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"SCIPYSTATSNORMPDF\" function's second argument to be a positive integer",
})?;
let first = x.get(0);
ensure!(!first.is_null(), InvalidInputColSnafu);
let first = match first {
Value::Float64(OrderedFloat(v)) => v,
// unreachable because we have checked `first` is not null and is i64 above
_ => unreachable!(),
};
if let Some(x) = self.x {
ensure!(x == first, InvalidInputColSnafu);
} else {
self.x = Some(first);
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let x = &states[1];
let x = x
.as_any()
.downcast_ref::<Float64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect Float64Vector, got vector type {}",
x.vector_type_name()
),
})?;
let x = x.get(0);
if x.is_null() {
return Ok(());
}
let x = match x {
Value::Float64(OrderedFloat(x)) => x,
_ => unreachable!(),
};
self.x = Some(x);
let values = &states[0];
let values = values
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
values.vector_type_name()
),
})?;
for value in values.values_iter() {
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
if mean.is_nan() || std_dev.is_nan() {
Ok(Value::Null)
} else {
let x = if let Some(x) = self.x {
x
} else {
return Ok(Value::Null);
};
let n = Normal::new(mean, std_dev).context(GenerateFunctionSnafu)?;
Ok(n.pdf(x).into())
}
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct ScipyStatsNormPdfAccumulatorCreator {}
impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(ScipyStatsNormPdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"SCIPYSTATSNORMpdf\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(ConcreteDataType::float64_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(vec![
ConcreteDataType::list_datatype(input_types[0].clone()),
ConcreteDataType::float64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::{Float64Vector, Int32Vector};
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
scipy_stats_norm_pdf.update_batch(&[]).unwrap();
assert!(scipy_stats_norm_pdf.values.is_empty());
assert_eq!(Value::Null, scipy_stats_norm_pdf.evaluate().unwrap());
// test update no null-value batch
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
Some(2.0_f64),
Some(2.0_f64),
])),
];
scipy_stats_norm_pdf.update_batch(&v).unwrap();
assert_eq!(
Value::from(0.17843340219081558),
scipy_stats_norm_pdf.evaluate().unwrap()
);
// test update null-value batch
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
None,
Some(2.0_f64),
Some(2.0_f64),
])),
];
scipy_stats_norm_pdf.update_batch(&v).unwrap();
assert_eq!(
Value::from(0.12343972049858312),
scipy_stats_norm_pdf.evaluate().unwrap()
);
}
}

View File

@@ -445,20 +445,10 @@ impl Pool {
async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
// use weak ref here to prevent pool being leaked
let pool_weak = {
let weak = Arc::downgrade(&pool);
drop(pool);
weak
};
loop {
let _ = interval.tick().await;
if let Some(pool) = pool_weak.upgrade() {
pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
} else {
// no one is using this pool, so we can also let go
break;
}
pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
}
}

View File

@@ -28,6 +28,7 @@ use crate::error::{
InvalidRoleSnafu, ParseNumSnafu, Result,
};
use crate::peer::Peer;
use crate::ClusterId;
const CLUSTER_NODE_INFO_PREFIX: &str = "__meta_cluster_node_info";
@@ -55,9 +56,12 @@ pub trait ClusterInfo {
// TODO(jeremy): Other info, like region status, etc.
}
/// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-0-{role}-{node_id}`.
/// The key of [NodeInfo] in the storage. The format is `__meta_cluster_node_info-{cluster_id}-{role}-{node_id}`.
#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
pub struct NodeInfoKey {
/// The cluster id.
// todo(hl): remove cluster_id as it is not assigned anywhere.
pub cluster_id: ClusterId,
/// The role of the node. It can be `[Role::Datanode]` or `[Role::Frontend]`.
pub role: Role,
/// The node id.
@@ -80,15 +84,24 @@ impl NodeInfoKey {
_ => peer.id,
};
Some(NodeInfoKey { role, node_id })
Some(NodeInfoKey {
cluster_id: header.cluster_id,
role,
node_id,
})
}
pub fn key_prefix() -> String {
format!("{}-0-", CLUSTER_NODE_INFO_PREFIX)
pub fn key_prefix_with_cluster_id(cluster_id: u64) -> String {
format!("{}-{}-", CLUSTER_NODE_INFO_PREFIX, cluster_id)
}
pub fn key_prefix_with_role(role: Role) -> String {
format!("{}-0-{}-", CLUSTER_NODE_INFO_PREFIX, i32::from(role))
pub fn key_prefix_with_role(cluster_id: ClusterId, role: Role) -> String {
format!(
"{}-{}-{}-",
CLUSTER_NODE_INFO_PREFIX,
cluster_id,
i32::from(role)
)
}
}
@@ -180,10 +193,15 @@ impl FromStr for NodeInfoKey {
let caps = CLUSTER_NODE_INFO_PREFIX_PATTERN
.captures(key)
.context(InvalidNodeInfoKeySnafu { key })?;
ensure!(caps.len() == 4, InvalidNodeInfoKeySnafu { key });
let cluster_id = caps[1].to_string();
let role = caps[2].to_string();
let node_id = caps[3].to_string();
let cluster_id: u64 = cluster_id.parse().context(ParseNumSnafu {
err_msg: format!("invalid cluster_id: {cluster_id}"),
})?;
let role: i32 = role.parse().context(ParseNumSnafu {
err_msg: format!("invalid role {role}"),
})?;
@@ -192,7 +210,11 @@ impl FromStr for NodeInfoKey {
err_msg: format!("invalid node_id: {node_id}"),
})?;
Ok(Self { role, node_id })
Ok(Self {
cluster_id,
role,
node_id,
})
}
}
@@ -211,8 +233,9 @@ impl TryFrom<Vec<u8>> for NodeInfoKey {
impl From<&NodeInfoKey> for Vec<u8> {
fn from(key: &NodeInfoKey) -> Self {
format!(
"{}-0-{}-{}",
"{}-{}-{}-{}",
CLUSTER_NODE_INFO_PREFIX,
key.cluster_id,
i32::from(key.role),
key.node_id
)
@@ -285,6 +308,7 @@ mod tests {
#[test]
fn test_node_info_key_round_trip() {
let key = NodeInfoKey {
cluster_id: 1,
role: Datanode,
node_id: 2,
};
@@ -292,6 +316,7 @@ mod tests {
let key_bytes: Vec<u8> = (&key).into();
let new_key: NodeInfoKey = key_bytes.try_into().unwrap();
assert_eq!(1, new_key.cluster_id);
assert_eq!(Datanode, new_key.role);
assert_eq!(2, new_key.node_id);
}
@@ -337,11 +362,11 @@ mod tests {
#[test]
fn test_node_info_key_prefix() {
let prefix = NodeInfoKey::key_prefix();
assert_eq!(prefix, "__meta_cluster_node_info-0-");
let prefix = NodeInfoKey::key_prefix_with_cluster_id(1);
assert_eq!(prefix, "__meta_cluster_node_info-1-");
let prefix = NodeInfoKey::key_prefix_with_role(Frontend);
assert_eq!(prefix, "__meta_cluster_node_info-0-1-");
let prefix = NodeInfoKey::key_prefix_with_role(2, Frontend);
assert_eq!(prefix, "__meta_cluster_node_info-2-1-");
}
#[test]

View File

@@ -25,8 +25,8 @@ use store_api::region_engine::{RegionRole, RegionStatistic};
use store_api::storage::RegionId;
use table::metadata::TableId;
use crate::error;
use crate::error::Result;
use crate::{error, ClusterId};
pub(crate) const DATANODE_LEASE_PREFIX: &str = "__meta_datanode_lease";
const INACTIVE_REGION_PREFIX: &str = "__meta_inactive_region";
@@ -48,10 +48,11 @@ lazy_static! {
/// The key of the datanode stat in the storage.
///
/// The format is `__meta_datanode_stat-0-{node_id}`.
/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Stat {
pub timestamp_millis: i64,
pub cluster_id: ClusterId,
// The datanode Id.
pub id: u64,
// The datanode address.
@@ -101,7 +102,10 @@ impl Stat {
}
pub fn stat_key(&self) -> DatanodeStatKey {
DatanodeStatKey { node_id: self.id }
DatanodeStatKey {
cluster_id: self.cluster_id,
node_id: self.id,
}
}
/// Returns a tuple array containing [RegionId] and [RegionRole].
@@ -141,7 +145,7 @@ impl TryFrom<&HeartbeatRequest> for Stat {
} = value;
match (header, peer) {
(Some(_header), Some(peer)) => {
(Some(header), Some(peer)) => {
let region_stats = region_stats
.iter()
.map(RegionStat::from)
@@ -149,6 +153,7 @@ impl TryFrom<&HeartbeatRequest> for Stat {
Ok(Self {
timestamp_millis: time_util::current_time_millis(),
cluster_id: header.cluster_id,
// datanode id
id: peer.id,
// datanode address
@@ -191,24 +196,32 @@ impl From<&api::v1::meta::RegionStat> for RegionStat {
/// The key of the datanode stat in the memory store.
///
/// The format is `__meta_datanode_stat-0-{node_id}`.
/// The format is `__meta_datanode_stat-{cluster_id}-{node_id}`.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct DatanodeStatKey {
pub cluster_id: ClusterId,
pub node_id: u64,
}
impl DatanodeStatKey {
/// The key prefix.
pub fn prefix_key() -> Vec<u8> {
// todo(hl): remove cluster id in prefix
format!("{DATANODE_STAT_PREFIX}-0-").into_bytes()
format!("{DATANODE_STAT_PREFIX}-").into_bytes()
}
/// The key prefix with the cluster id.
pub fn key_prefix_with_cluster_id(cluster_id: ClusterId) -> String {
format!("{DATANODE_STAT_PREFIX}-{cluster_id}-")
}
}
impl From<DatanodeStatKey> for Vec<u8> {
fn from(value: DatanodeStatKey) -> Self {
// todo(hl): remove cluster id in prefix
format!("{}-0-{}", DATANODE_STAT_PREFIX, value.node_id).into_bytes()
format!(
"{}-{}-{}",
DATANODE_STAT_PREFIX, value.cluster_id, value.node_id
)
.into_bytes()
}
}
@@ -221,12 +234,20 @@ impl FromStr for DatanodeStatKey {
.context(error::InvalidStatKeySnafu { key })?;
ensure!(caps.len() == 3, error::InvalidStatKeySnafu { key });
let cluster_id = caps[1].to_string();
let node_id = caps[2].to_string();
let cluster_id: u64 = cluster_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid cluster_id: {cluster_id}"),
})?;
let node_id: u64 = node_id.parse().context(error::ParseNumSnafu {
err_msg: format!("invalid node_id: {node_id}"),
})?;
Ok(Self { node_id })
Ok(Self {
cluster_id,
node_id,
})
}
}
@@ -300,6 +321,7 @@ mod tests {
#[test]
fn test_stat_key() {
let stat = Stat {
cluster_id: 3,
id: 101,
region_num: 10,
..Default::default()
@@ -307,12 +329,14 @@ mod tests {
let stat_key = stat.stat_key();
assert_eq!(3, stat_key.cluster_id);
assert_eq!(101, stat_key.node_id);
}
#[test]
fn test_stat_val_round_trip() {
let stat = Stat {
cluster_id: 0,
id: 101,
region_num: 100,
..Default::default()
@@ -327,6 +351,7 @@ mod tests {
assert_eq!(1, stats.len());
let stat = stats.first().unwrap();
assert_eq!(0, stat.cluster_id);
assert_eq!(101, stat.id);
assert_eq!(100, stat.region_num);
}

View File

@@ -30,7 +30,7 @@ use crate::node_manager::NodeManagerRef;
use crate::region_keeper::MemoryRegionKeeperRef;
use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse};
use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse};
use crate::DatanodeId;
use crate::{ClusterId, DatanodeId};
pub mod alter_database;
pub mod alter_logical_tables;
@@ -57,6 +57,7 @@ pub mod utils;
#[derive(Debug, Default)]
pub struct ExecutorContext {
pub cluster_id: Option<u64>,
pub tracing_context: Option<W3cTrace>,
}
@@ -89,6 +90,10 @@ pub trait ProcedureExecutor: Send + Sync {
pub type ProcedureExecutorRef = Arc<dyn ProcedureExecutor>;
pub struct TableMetadataAllocatorContext {
pub cluster_id: ClusterId,
}
/// Metadata allocated to a table.
#[derive(Default)]
pub struct TableMetadata {
@@ -103,7 +108,7 @@ pub struct TableMetadata {
pub type RegionFailureDetectorControllerRef = Arc<dyn RegionFailureDetectorController>;
pub type DetectingRegion = (DatanodeId, RegionId);
pub type DetectingRegion = (ClusterId, DatanodeId, RegionId);
/// Used for actively registering Region failure detectors.
///

View File

@@ -30,6 +30,7 @@ use crate::key::DeserializedValueWithBytes;
use crate::lock_key::{CatalogLock, SchemaLock};
use crate::rpc::ddl::UnsetDatabaseOption::{self};
use crate::rpc::ddl::{AlterDatabaseKind, AlterDatabaseTask, SetDatabaseOption};
use crate::ClusterId;
pub struct AlterDatabaseProcedure {
pub context: DdlContext,
@@ -64,10 +65,14 @@ fn build_new_schema_value(
impl AlterDatabaseProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::AlterDatabase";
pub fn new(task: AlterDatabaseTask, context: DdlContext) -> Result<Self> {
pub fn new(
cluster_id: ClusterId,
task: AlterDatabaseTask,
context: DdlContext,
) -> Result<Self> {
Ok(Self {
context,
data: AlterDatabaseData::new(task)?,
data: AlterDatabaseData::new(task, cluster_id)?,
})
}
@@ -178,6 +183,7 @@ enum AlterDatabaseState {
/// The data of alter database procedure.
#[derive(Debug, Serialize, Deserialize)]
pub struct AlterDatabaseData {
cluster_id: ClusterId,
state: AlterDatabaseState,
kind: AlterDatabaseKind,
catalog_name: String,
@@ -186,8 +192,9 @@ pub struct AlterDatabaseData {
}
impl AlterDatabaseData {
pub fn new(task: AlterDatabaseTask) -> Result<Self> {
pub fn new(task: AlterDatabaseTask, cluster_id: ClusterId) -> Result<Self> {
Ok(Self {
cluster_id,
state: AlterDatabaseState::Prepare,
kind: AlterDatabaseKind::try_from(task.alter_expr.kind.unwrap())?,
catalog_name: task.alter_expr.catalog_name,

View File

@@ -37,9 +37,9 @@ use crate::key::table_info::TableInfoValue;
use crate::key::table_route::PhysicalTableRouteValue;
use crate::key::DeserializedValueWithBytes;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::metrics;
use crate::rpc::ddl::AlterTableTask;
use crate::rpc::router::find_leaders;
use crate::{metrics, ClusterId};
pub struct AlterLogicalTablesProcedure {
pub context: DdlContext,
@@ -50,6 +50,7 @@ impl AlterLogicalTablesProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::AlterLogicalTables";
pub fn new(
cluster_id: ClusterId,
tasks: Vec<AlterTableTask>,
physical_table_id: TableId,
context: DdlContext,
@@ -57,6 +58,7 @@ impl AlterLogicalTablesProcedure {
Self {
context,
data: AlterTablesData {
cluster_id,
state: AlterTablesState::Prepare,
tasks,
table_info_values: vec![],
@@ -238,6 +240,7 @@ impl Procedure for AlterLogicalTablesProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub struct AlterTablesData {
cluster_id: ClusterId,
state: AlterTablesState,
tasks: Vec<AlterTableTask>,
/// Table info values before the alter operation.

View File

@@ -45,9 +45,9 @@ use crate::instruction::CacheIdent;
use crate::key::table_info::TableInfoValue;
use crate::key::{DeserializedValueWithBytes, RegionDistribution};
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
use crate::metrics;
use crate::rpc::ddl::AlterTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders, region_distribution};
use crate::{metrics, ClusterId};
/// The alter table procedure
pub struct AlterTableProcedure {
@@ -64,11 +64,16 @@ pub struct AlterTableProcedure {
impl AlterTableProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::AlterTable";
pub fn new(table_id: TableId, task: AlterTableTask, context: DdlContext) -> Result<Self> {
pub fn new(
cluster_id: ClusterId,
table_id: TableId,
task: AlterTableTask,
context: DdlContext,
) -> Result<Self> {
task.validate()?;
Ok(Self {
context,
data: AlterTableData::new(task, table_id),
data: AlterTableData::new(task, table_id, cluster_id),
new_table_info: None,
})
}
@@ -302,6 +307,7 @@ enum AlterTableState {
// The serialized data of alter table.
#[derive(Debug, Serialize, Deserialize)]
pub struct AlterTableData {
cluster_id: ClusterId,
state: AlterTableState,
task: AlterTableTask,
table_id: TableId,
@@ -312,11 +318,12 @@ pub struct AlterTableData {
}
impl AlterTableData {
pub fn new(task: AlterTableTask, table_id: TableId) -> Self {
pub fn new(task: AlterTableTask, table_id: TableId, cluster_id: u64) -> Self {
Self {
state: AlterTableState::Prepare,
task,
table_id,
cluster_id,
table_info_value: None,
region_distribution: None,
}

View File

@@ -167,9 +167,10 @@ mod tests {
use crate::test_util::{new_ddl_context, MockDatanodeManager};
/// Prepares a region with schema `[ts: Timestamp, host: Tag, cpu: Field]`.
async fn prepare_ddl_context() -> (DdlContext, TableId, RegionId, String) {
async fn prepare_ddl_context() -> (DdlContext, u64, TableId, RegionId, String) {
let datanode_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(datanode_manager);
let cluster_id = 1;
let table_id = 1024;
let region_id = RegionId::new(table_id, 1);
let table_name = "foo";
@@ -224,12 +225,19 @@ mod tests {
)
.await
.unwrap();
(ddl_context, table_id, region_id, table_name.to_string())
(
ddl_context,
cluster_id,
table_id,
region_id,
table_name.to_string(),
)
}
#[tokio::test]
async fn test_make_alter_region_request() {
let (ddl_context, table_id, region_id, table_name) = prepare_ddl_context().await;
let (ddl_context, cluster_id, table_id, region_id, table_name) =
prepare_ddl_context().await;
let task = AlterTableTask {
alter_table: AlterTableExpr {
@@ -257,7 +265,8 @@ mod tests {
},
};
let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context).unwrap();
procedure.on_prepare().await.unwrap();
let alter_kind = procedure.make_region_alter_kind().unwrap();
let Some(Body::Alter(alter_region_request)) = procedure
@@ -298,7 +307,8 @@ mod tests {
#[tokio::test]
async fn test_make_alter_column_type_region_request() {
let (ddl_context, table_id, region_id, table_name) = prepare_ddl_context().await;
let (ddl_context, cluster_id, table_id, region_id, table_name) =
prepare_ddl_context().await;
let task = AlterTableTask {
alter_table: AlterTableExpr {
@@ -315,7 +325,8 @@ mod tests {
},
};
let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context).unwrap();
procedure.on_prepare().await.unwrap();
let alter_kind = procedure.make_region_alter_kind().unwrap();
let Some(Body::Alter(alter_region_request)) = procedure

View File

@@ -46,9 +46,9 @@ use crate::key::flow::flow_route::FlowRouteValue;
use crate::key::table_name::TableNameKey;
use crate::key::{DeserializedValueWithBytes, FlowId, FlowPartitionId};
use crate::lock_key::{CatalogLock, FlowNameLock, TableNameLock};
use crate::metrics;
use crate::peer::Peer;
use crate::rpc::ddl::{CreateFlowTask, QueryContext};
use crate::{metrics, ClusterId};
/// The procedure of flow creation.
pub struct CreateFlowProcedure {
@@ -60,10 +60,16 @@ impl CreateFlowProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateFlow";
/// Returns a new [CreateFlowProcedure].
pub fn new(task: CreateFlowTask, query_context: QueryContext, context: DdlContext) -> Self {
pub fn new(
cluster_id: ClusterId,
task: CreateFlowTask,
query_context: QueryContext,
context: DdlContext,
) -> Self {
Self {
context,
data: CreateFlowData {
cluster_id,
task,
flow_id: None,
peers: vec![],
@@ -337,7 +343,6 @@ pub enum FlowType {
impl FlowType {
pub const RECORDING_RULE: &str = "recording_rule";
pub const STREAMING: &str = "streaming";
pub const FLOW_TYPE_KEY: &str = "flow_type";
}
impl Default for FlowType {
@@ -358,6 +363,7 @@ impl fmt::Display for FlowType {
/// The serializable data.
#[derive(Debug, Serialize, Deserialize)]
pub struct CreateFlowData {
pub(crate) cluster_id: ClusterId,
pub(crate) state: CreateFlowState,
pub(crate) task: CreateFlowTask,
pub(crate) flow_id: Option<FlowId>,
@@ -392,8 +398,7 @@ impl From<&CreateFlowData> for CreateRequest {
};
let flow_type = value.flow_type.unwrap_or_default().to_string();
req.flow_options
.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
req.flow_options.insert("flow_type".to_string(), flow_type);
req
}
}
@@ -425,7 +430,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
.collect::<Vec<_>>();
let flow_type = value.flow_type.unwrap_or_default().to_string();
options.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
options.insert("flow_type".to_string(), flow_type);
let flow_info = FlowInfoValue {
source_table_ids: value.source_table_ids.clone(),

View File

@@ -23,10 +23,11 @@ impl CreateFlowProcedure {
pub(crate) async fn allocate_flow_id(&mut self) -> Result<()> {
//TODO(weny, ruihang): We doesn't support the partitions. It's always be 1, now.
let partitions = 1;
let cluster_id = self.data.cluster_id;
let (flow_id, peers) = self
.context
.flow_metadata_allocator
.create(partitions)
.create(cluster_id, partitions)
.await?;
self.data.flow_id = Some(flow_id);
self.data.peers = peers;

View File

@@ -36,9 +36,9 @@ use crate::ddl::DdlContext;
use crate::error::{DecodeJsonSnafu, MetadataCorruptionSnafu, Result};
use crate::key::table_route::TableRouteValue;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
use crate::metrics;
use crate::rpc::ddl::CreateTableTask;
use crate::rpc::router::{find_leaders, RegionRoute};
use crate::{metrics, ClusterId};
pub struct CreateLogicalTablesProcedure {
pub context: DdlContext,
@@ -49,6 +49,7 @@ impl CreateLogicalTablesProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateLogicalTables";
pub fn new(
cluster_id: ClusterId,
tasks: Vec<CreateTableTask>,
physical_table_id: TableId,
context: DdlContext,
@@ -56,6 +57,7 @@ impl CreateLogicalTablesProcedure {
Self {
context,
data: CreateTablesData {
cluster_id,
state: CreateTablesState::Prepare,
tasks,
table_ids_already_exists: vec![],
@@ -243,6 +245,7 @@ impl Procedure for CreateLogicalTablesProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub struct CreateTablesData {
cluster_id: ClusterId,
state: CreateTablesState,
tasks: Vec<CreateTableTask>,
table_ids_already_exists: Vec<Option<TableId>>,

View File

@@ -37,17 +37,17 @@ use crate::ddl::utils::{
add_peer_context_if_needed, convert_region_routes_to_detecting_regions, handle_retry_error,
region_storage_path,
};
use crate::ddl::{DdlContext, TableMetadata};
use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext};
use crate::error::{self, Result};
use crate::key::table_name::TableNameKey;
use crate::key::table_route::{PhysicalTableRouteValue, TableRouteValue};
use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock};
use crate::metrics;
use crate::region_keeper::OperatingRegionGuard;
use crate::rpc::ddl::CreateTableTask;
use crate::rpc::router::{
find_leader_regions, find_leaders, operating_leader_regions, RegionRoute,
};
use crate::{metrics, ClusterId};
pub struct CreateTableProcedure {
pub context: DdlContext,
pub creator: TableCreator,
@@ -56,10 +56,10 @@ pub struct CreateTableProcedure {
impl CreateTableProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateTable";
pub fn new(task: CreateTableTask, context: DdlContext) -> Self {
pub fn new(cluster_id: ClusterId, task: CreateTableTask, context: DdlContext) -> Self {
Self {
context,
creator: TableCreator::new(task),
creator: TableCreator::new(cluster_id, task),
}
}
@@ -154,7 +154,12 @@ impl CreateTableProcedure {
} = self
.context
.table_metadata_allocator
.create(&self.creator.data.task)
.create(
&TableMetadataAllocatorContext {
cluster_id: self.creator.data.cluster_id,
},
&self.creator.data.task,
)
.await?;
self.creator
.set_allocated_metadata(table_id, table_route, region_wal_options);
@@ -263,6 +268,7 @@ impl CreateTableProcedure {
/// - Failed to create table metadata.
async fn on_create_metadata(&mut self) -> Result<Status> {
let table_id = self.table_id();
let cluster_id = self.creator.data.cluster_id;
let manager = &self.context.table_metadata_manager;
let raw_table_info = self.table_info().clone();
@@ -270,8 +276,10 @@ impl CreateTableProcedure {
let region_wal_options = self.region_wal_options()?.clone();
// Safety: the table_route must be allocated.
let physical_table_route = self.table_route()?.clone();
let detecting_regions =
convert_region_routes_to_detecting_regions(&physical_table_route.region_routes);
let detecting_regions = convert_region_routes_to_detecting_regions(
cluster_id,
&physical_table_route.region_routes,
);
let table_route = TableRouteValue::Physical(physical_table_route);
manager
.create_table_metadata(raw_table_info, table_route, region_wal_options)
@@ -343,10 +351,11 @@ pub struct TableCreator {
}
impl TableCreator {
pub fn new(task: CreateTableTask) -> Self {
pub fn new(cluster_id: ClusterId, task: CreateTableTask) -> Self {
Self {
data: CreateTableData {
state: CreateTableState::Prepare,
cluster_id,
task,
table_route: None,
region_wal_options: None,
@@ -412,6 +421,7 @@ pub struct CreateTableData {
table_route: Option<PhysicalTableRouteValue>,
/// None stands for not allocated yet.
pub region_wal_options: Option<HashMap<RegionNumber, String>>,
pub cluster_id: ClusterId,
}
impl CreateTableData {

View File

@@ -24,13 +24,13 @@ use table::table_reference::TableReference;
use crate::cache_invalidator::Context;
use crate::ddl::utils::handle_retry_error;
use crate::ddl::{DdlContext, TableMetadata};
use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext};
use crate::error::{self, Result};
use crate::instruction::CacheIdent;
use crate::key::table_name::TableNameKey;
use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock};
use crate::metrics;
use crate::rpc::ddl::CreateViewTask;
use crate::{metrics, ClusterId};
// The procedure to execute `[CreateViewTask]`.
pub struct CreateViewProcedure {
@@ -41,11 +41,12 @@ pub struct CreateViewProcedure {
impl CreateViewProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::CreateView";
pub fn new(task: CreateViewTask, context: DdlContext) -> Self {
pub fn new(cluster_id: ClusterId, task: CreateViewTask, context: DdlContext) -> Self {
Self {
context,
data: CreateViewData {
state: CreateViewState::Prepare,
cluster_id,
task,
need_update: false,
},
@@ -143,7 +144,12 @@ impl CreateViewProcedure {
let TableMetadata { table_id, .. } = self
.context
.table_metadata_allocator
.create_view(&None)
.create_view(
&TableMetadataAllocatorContext {
cluster_id: self.data.cluster_id,
},
&None,
)
.await?;
self.data.set_allocated_metadata(table_id, false);
}
@@ -279,6 +285,7 @@ pub enum CreateViewState {
pub struct CreateViewData {
pub state: CreateViewState,
pub task: CreateViewTask,
pub cluster_id: ClusterId,
/// Whether to update the view info.
pub need_update: bool,
}

View File

@@ -35,6 +35,7 @@ use crate::ddl::DdlContext;
use crate::error::Result;
use crate::key::table_name::TableNameValue;
use crate::lock_key::{CatalogLock, SchemaLock};
use crate::ClusterId;
pub struct DropDatabaseProcedure {
/// The context of procedure runtime.
@@ -53,6 +54,7 @@ pub(crate) enum DropTableTarget {
/// Context of [DropDatabaseProcedure] execution.
pub(crate) struct DropDatabaseContext {
cluster_id: ClusterId,
catalog: String,
schema: String,
drop_if_exists: bool,
@@ -85,6 +87,7 @@ impl DropDatabaseProcedure {
Self {
runtime_context: context,
context: DropDatabaseContext {
cluster_id: 0,
catalog,
schema,
drop_if_exists,
@@ -105,6 +108,7 @@ impl DropDatabaseProcedure {
Ok(Self {
runtime_context,
context: DropDatabaseContext {
cluster_id: 0,
catalog,
schema,
drop_if_exists,

View File

@@ -217,10 +217,11 @@ mod tests {
async fn test_next_without_logical_tables() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
create_physical_table(&ddl_context, "phy").await;
create_physical_table(&ddl_context, 0, "phy").await;
// It always starts from Logical
let mut state = DropDatabaseCursor::new(DropTableTarget::Logical);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -251,11 +252,12 @@ mod tests {
async fn test_next_with_logical_tables() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let physical_table_id = create_physical_table(&ddl_context, "phy").await;
create_logical_table(ddl_context.clone(), physical_table_id, "metric_0").await;
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric_0").await;
// It always starts from Logical
let mut state = DropDatabaseCursor::new(DropTableTarget::Logical);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -284,6 +286,7 @@ mod tests {
let ddl_context = new_ddl_context(node_manager);
let mut state = DropDatabaseCursor::new(DropTableTarget::Physical);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,

View File

@@ -98,10 +98,11 @@ impl State for DropDatabaseExecutor {
async fn next(
&mut self,
ddl_ctx: &DdlContext,
_ctx: &mut DropDatabaseContext,
ctx: &mut DropDatabaseContext,
) -> Result<(Box<dyn State>, Status)> {
self.register_dropping_regions(ddl_ctx)?;
let executor = DropTableExecutor::new(self.table_name.clone(), self.table_id, true);
let executor =
DropTableExecutor::new(ctx.cluster_id, self.table_name.clone(), self.table_id, true);
// Deletes metadata for table permanently.
let table_route_value = TableRouteValue::new(
self.table_id,
@@ -186,7 +187,7 @@ mod tests {
async fn test_next_with_physical_table() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let physical_table_id = create_physical_table(&ddl_context, "phy").await;
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
let (_, table_route) = ddl_context
.table_metadata_manager
.table_route_manager()
@@ -202,6 +203,7 @@ mod tests {
DropTableTarget::Physical,
);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -214,6 +216,7 @@ mod tests {
}
// Execute again
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -236,8 +239,8 @@ mod tests {
async fn test_next_logical_table() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let physical_table_id = create_physical_table(&ddl_context, "phy").await;
create_logical_table(ddl_context.clone(), physical_table_id, "metric").await;
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
create_logical_table(ddl_context.clone(), 0, physical_table_id, "metric").await;
let logical_table_id = physical_table_id + 1;
let (_, table_route) = ddl_context
.table_metadata_manager
@@ -254,6 +257,7 @@ mod tests {
DropTableTarget::Logical,
);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -266,6 +270,7 @@ mod tests {
}
// Execute again
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -340,7 +345,7 @@ mod tests {
async fn test_next_retryable_err() {
let node_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let physical_table_id = create_physical_table(&ddl_context, "phy").await;
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
let (_, table_route) = ddl_context
.table_metadata_manager
.table_route_manager()
@@ -355,6 +360,7 @@ mod tests {
DropTableTarget::Physical,
);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,
@@ -368,7 +374,7 @@ mod tests {
async fn test_on_recovery() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let physical_table_id = create_physical_table(&ddl_context, "phy").await;
let physical_table_id = create_physical_table(&ddl_context, 0, "phy").await;
let (_, table_route) = ddl_context
.table_metadata_manager
.table_route_manager()
@@ -384,6 +390,7 @@ mod tests {
DropTableTarget::Physical,
);
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
drop_if_exists: false,

View File

@@ -118,6 +118,7 @@ mod tests {
.unwrap();
let mut state = DropDatabaseRemoveMetadata;
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: "foo".to_string(),
schema: "bar".to_string(),
drop_if_exists: true,
@@ -144,6 +145,7 @@ mod tests {
// Schema not exists
let mut state = DropDatabaseRemoveMetadata;
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: "foo".to_string(),
schema: "bar".to_string(),
drop_if_exists: true,

View File

@@ -89,6 +89,7 @@ mod tests {
let ddl_context = new_ddl_context(node_manager);
let mut step = DropDatabaseStart;
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: "foo".to_string(),
schema: "bar".to_string(),
drop_if_exists: false,
@@ -104,6 +105,7 @@ mod tests {
let ddl_context = new_ddl_context(node_manager);
let mut state = DropDatabaseStart;
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: "foo".to_string(),
schema: "bar".to_string(),
drop_if_exists: true,
@@ -126,6 +128,7 @@ mod tests {
.unwrap();
let mut state = DropDatabaseStart;
let mut ctx = DropDatabaseContext {
cluster_id: 0,
catalog: "foo".to_string(),
schema: "bar".to_string(),
drop_if_exists: false,

View File

@@ -37,8 +37,8 @@ use crate::instruction::{CacheIdent, DropFlow};
use crate::key::flow::flow_info::FlowInfoValue;
use crate::key::flow::flow_route::FlowRouteValue;
use crate::lock_key::{CatalogLock, FlowLock};
use crate::metrics;
use crate::rpc::ddl::DropFlowTask;
use crate::{metrics, ClusterId};
/// The procedure for dropping a flow.
pub struct DropFlowProcedure {
@@ -51,11 +51,12 @@ pub struct DropFlowProcedure {
impl DropFlowProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::DropFlow";
pub fn new(task: DropFlowTask, context: DdlContext) -> Self {
pub fn new(cluster_id: ClusterId, task: DropFlowTask, context: DdlContext) -> Self {
Self {
context,
data: DropFlowData {
state: DropFlowState::Prepare,
cluster_id,
task,
flow_info_value: None,
flow_route_values: vec![],
@@ -217,6 +218,7 @@ impl Procedure for DropFlowProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct DropFlowData {
state: DropFlowState,
cluster_id: ClusterId,
task: DropFlowTask,
pub(crate) flow_info_value: Option<FlowInfoValue>,
pub(crate) flow_route_values: Vec<FlowRouteValue>,

View File

@@ -40,10 +40,10 @@ use crate::ddl::DdlContext;
use crate::error::{self, Result};
use crate::key::table_route::TableRouteValue;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::metrics;
use crate::region_keeper::OperatingRegionGuard;
use crate::rpc::ddl::DropTableTask;
use crate::rpc::router::{operating_leader_regions, RegionRoute};
use crate::{metrics, ClusterId};
pub struct DropTableProcedure {
/// The context of procedure runtime.
@@ -59,8 +59,8 @@ pub struct DropTableProcedure {
impl DropTableProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::DropTable";
pub fn new(task: DropTableTask, context: DdlContext) -> Self {
let data = DropTableData::new(task);
pub fn new(cluster_id: ClusterId, task: DropTableTask, context: DdlContext) -> Self {
let data = DropTableData::new(cluster_id, task);
let executor = data.build_executor();
Self {
context,
@@ -268,6 +268,7 @@ impl Procedure for DropTableProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub struct DropTableData {
pub state: DropTableState,
pub cluster_id: ClusterId,
pub task: DropTableTask,
pub physical_region_routes: Vec<RegionRoute>,
pub physical_table_id: Option<TableId>,
@@ -278,9 +279,10 @@ pub struct DropTableData {
}
impl DropTableData {
pub fn new(task: DropTableTask) -> Self {
pub fn new(cluster_id: ClusterId, task: DropTableTask) -> Self {
Self {
state: DropTableState::Prepare,
cluster_id,
task,
physical_region_routes: vec![],
physical_table_id: None,
@@ -299,6 +301,7 @@ impl DropTableData {
fn build_executor(&self) -> DropTableExecutor {
DropTableExecutor::new(
self.cluster_id,
self.task.table_name(),
self.task.table_id,
self.task.drop_if_exists,

View File

@@ -36,6 +36,7 @@ use crate::instruction::CacheIdent;
use crate::key::table_name::TableNameKey;
use crate::key::table_route::TableRouteValue;
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
use crate::ClusterId;
/// [Control] indicated to the caller whether to go to the next step.
#[derive(Debug)]
@@ -53,8 +54,14 @@ impl<T> Control<T> {
impl DropTableExecutor {
/// Returns the [DropTableExecutor].
pub fn new(table: TableName, table_id: TableId, drop_if_exists: bool) -> Self {
pub fn new(
cluster_id: ClusterId,
table: TableName,
table_id: TableId,
drop_if_exists: bool,
) -> Self {
Self {
cluster_id,
table,
table_id,
drop_if_exists,
@@ -67,6 +74,7 @@ impl DropTableExecutor {
/// - Invalidates the cache on the Frontend nodes.
/// - Drops the regions on the Datanode nodes.
pub struct DropTableExecutor {
cluster_id: ClusterId,
table: TableName,
table_id: TableId,
drop_if_exists: bool,
@@ -156,7 +164,7 @@ impl DropTableExecutor {
let detecting_regions = if table_route_value.is_physical() {
// Safety: checked.
let regions = table_route_value.region_routes().unwrap();
convert_region_routes_to_detecting_regions(regions)
convert_region_routes_to_detecting_regions(self.cluster_id, regions)
} else {
vec![]
};
@@ -313,6 +321,7 @@ mod tests {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ctx = new_ddl_context(node_manager);
let executor = DropTableExecutor::new(
0,
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
1024,
true,
@@ -322,6 +331,7 @@ mod tests {
// Drops a non-exists table
let executor = DropTableExecutor::new(
0,
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
1024,
false,
@@ -331,6 +341,7 @@ mod tests {
// Drops a exists table
let executor = DropTableExecutor::new(
0,
TableName::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "my_table"),
1024,
false,

View File

@@ -31,8 +31,8 @@ use crate::error::{self, Result};
use crate::instruction::CacheIdent;
use crate::key::table_name::TableNameKey;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::metrics;
use crate::rpc::ddl::DropViewTask;
use crate::{metrics, ClusterId};
/// The procedure for dropping a view.
pub struct DropViewProcedure {
@@ -45,11 +45,12 @@ pub struct DropViewProcedure {
impl DropViewProcedure {
pub const TYPE_NAME: &'static str = "metasrv-procedure::DropView";
pub fn new(task: DropViewTask, context: DdlContext) -> Self {
pub fn new(cluster_id: ClusterId, task: DropViewTask, context: DdlContext) -> Self {
Self {
context,
data: DropViewData {
state: DropViewState::Prepare,
cluster_id,
task,
},
}
@@ -215,6 +216,7 @@ impl Procedure for DropViewProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct DropViewData {
state: DropViewState,
cluster_id: ClusterId,
task: DropViewTask,
}

View File

@@ -20,6 +20,7 @@ use crate::error::Result;
use crate::key::FlowId;
use crate::peer::Peer;
use crate::sequence::SequenceRef;
use crate::ClusterId;
/// The reference of [FlowMetadataAllocator].
pub type FlowMetadataAllocatorRef = Arc<FlowMetadataAllocator>;
@@ -59,9 +60,16 @@ impl FlowMetadataAllocator {
}
/// Allocates the [FlowId] and [Peer]s.
pub async fn create(&self, partitions: usize) -> Result<(FlowId, Vec<Peer>)> {
pub async fn create(
&self,
cluster_id: ClusterId,
partitions: usize,
) -> Result<(FlowId, Vec<Peer>)> {
let flow_id = self.allocate_flow_id().await?;
let peers = self.partition_peer_allocator.alloc(partitions).await?;
let peers = self
.partition_peer_allocator
.alloc(cluster_id, partitions)
.await?;
Ok((flow_id, peers))
}
@@ -71,7 +79,7 @@ impl FlowMetadataAllocator {
#[async_trait]
pub trait PartitionPeerAllocator: Send + Sync {
/// Allocates [Peer] nodes for storing partitions.
async fn alloc(&self, partitions: usize) -> Result<Vec<Peer>>;
async fn alloc(&self, cluster_id: ClusterId, partitions: usize) -> Result<Vec<Peer>>;
}
/// [PartitionPeerAllocatorRef] allocates [Peer]s for partitions.
@@ -81,7 +89,7 @@ struct NoopPartitionPeerAllocator;
#[async_trait]
impl PartitionPeerAllocator for NoopPartitionPeerAllocator {
async fn alloc(&self, partitions: usize) -> Result<Vec<Peer>> {
async fn alloc(&self, _cluster_id: ClusterId, partitions: usize) -> Result<Vec<Peer>> {
Ok(vec![Peer::default(); partitions])
}
}

View File

@@ -20,7 +20,7 @@ use common_telemetry::{debug, info};
use snafu::ensure;
use store_api::storage::{RegionId, RegionNumber, TableId};
use crate::ddl::TableMetadata;
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
use crate::error::{self, Result, UnsupportedSnafu};
use crate::key::table_route::PhysicalTableRouteValue;
use crate::peer::Peer;
@@ -109,6 +109,7 @@ impl TableMetadataAllocator {
async fn create_table_route(
&self,
ctx: &TableMetadataAllocatorContext,
table_id: TableId,
task: &CreateTableTask,
) -> Result<PhysicalTableRouteValue> {
@@ -120,7 +121,7 @@ impl TableMetadataAllocator {
}
);
let peers = self.peer_allocator.alloc(regions).await?;
let peers = self.peer_allocator.alloc(ctx, regions).await?;
let region_routes = task
.partitions
.iter()
@@ -146,7 +147,11 @@ impl TableMetadataAllocator {
}
/// Create VIEW metadata
pub async fn create_view(&self, table_id: &Option<api::v1::TableId>) -> Result<TableMetadata> {
pub async fn create_view(
&self,
_ctx: &TableMetadataAllocatorContext,
table_id: &Option<api::v1::TableId>,
) -> Result<TableMetadata> {
let table_id = self.allocate_table_id(table_id).await?;
Ok(TableMetadata {
@@ -155,9 +160,13 @@ impl TableMetadataAllocator {
})
}
pub async fn create(&self, task: &CreateTableTask) -> Result<TableMetadata> {
pub async fn create(
&self,
ctx: &TableMetadataAllocatorContext,
task: &CreateTableTask,
) -> Result<TableMetadata> {
let table_id = self.allocate_table_id(&task.create_table.table_id).await?;
let table_route = self.create_table_route(table_id, task).await?;
let table_route = self.create_table_route(ctx, table_id, task).await?;
let region_wal_options = self.create_wal_options(&table_route)?;
debug!(
@@ -179,14 +188,19 @@ pub type PeerAllocatorRef = Arc<dyn PeerAllocator>;
#[async_trait]
pub trait PeerAllocator: Send + Sync {
/// Allocates `regions` size [`Peer`]s.
async fn alloc(&self, regions: usize) -> Result<Vec<Peer>>;
async fn alloc(&self, ctx: &TableMetadataAllocatorContext, regions: usize)
-> Result<Vec<Peer>>;
}
struct NoopPeerAllocator;
#[async_trait]
impl PeerAllocator for NoopPeerAllocator {
async fn alloc(&self, regions: usize) -> Result<Vec<Peer>> {
async fn alloc(
&self,
_ctx: &TableMetadataAllocatorContext,
regions: usize,
) -> Result<Vec<Peer>> {
Ok(vec![Peer::default(); regions])
}
}

View File

@@ -31,9 +31,10 @@ use crate::ddl::test_util::columns::TestColumnDefBuilder;
use crate::ddl::test_util::create_table::{
build_raw_table_info_from_expr, TestCreateTableExprBuilder,
};
use crate::ddl::{DdlContext, TableMetadata};
use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext};
use crate::key::table_route::TableRouteValue;
use crate::rpc::ddl::CreateTableTask;
use crate::ClusterId;
pub async fn create_physical_table_metadata(
ddl_context: &DdlContext,
@@ -47,7 +48,11 @@ pub async fn create_physical_table_metadata(
.unwrap();
}
pub async fn create_physical_table(ddl_context: &DdlContext, name: &str) -> TableId {
pub async fn create_physical_table(
ddl_context: &DdlContext,
cluster_id: ClusterId,
name: &str,
) -> TableId {
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task(name);
let TableMetadata {
@@ -56,7 +61,10 @@ pub async fn create_physical_table(ddl_context: &DdlContext, name: &str) -> Tabl
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -72,13 +80,15 @@ pub async fn create_physical_table(ddl_context: &DdlContext, name: &str) -> Tabl
pub async fn create_logical_table(
ddl_context: DdlContext,
cluster_id: ClusterId,
physical_table_id: TableId,
table_name: &str,
) -> TableId {
use std::assert_matches::assert_matches;
let tasks = vec![test_create_logical_table_task(table_name)];
let mut procedure = CreateLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let mut procedure =
CreateLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
let status = procedure.on_create_metadata().await.unwrap();

View File

@@ -86,6 +86,7 @@ fn make_alter_logical_table_rename_task(
async fn test_on_prepare_check_schema() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let tasks = vec![
make_alter_logical_table_add_column_task(
Some("schema1"),
@@ -99,7 +100,8 @@ async fn test_on_prepare_check_schema() {
),
];
let physical_table_id = 1024u32;
let mut procedure = AlterLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let mut procedure =
AlterLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, AlterLogicalTablesInvalidArguments { .. });
}
@@ -108,46 +110,50 @@ async fn test_on_prepare_check_schema() {
async fn test_on_prepare_check_alter_kind() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let tasks = vec![make_alter_logical_table_rename_task(
"schema1",
"table1",
"new_table1",
)];
let physical_table_id = 1024u32;
let mut procedure = AlterLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let mut procedure =
AlterLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, AlterLogicalTablesInvalidArguments { .. });
}
#[tokio::test]
async fn test_on_prepare_different_physical_table() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let phy1_id = create_physical_table(&ddl_context, "phy1").await;
create_logical_table(ddl_context.clone(), phy1_id, "table1").await;
let phy2_id = create_physical_table(&ddl_context, "phy2").await;
create_logical_table(ddl_context.clone(), phy2_id, "table2").await;
let phy1_id = create_physical_table(&ddl_context, cluster_id, "phy1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy1_id, "table1").await;
let phy2_id = create_physical_table(&ddl_context, cluster_id, "phy2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy2_id, "table2").await;
let tasks = vec![
make_alter_logical_table_add_column_task(None, "table1", vec!["column1".to_string()]),
make_alter_logical_table_add_column_task(None, "table2", vec!["column2".to_string()]),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy1_id, ddl_context);
let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy1_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, AlterLogicalTablesInvalidArguments { .. });
}
#[tokio::test]
async fn test_on_prepare_logical_table_not_exists() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
let tasks = vec![
make_alter_logical_table_add_column_task(None, "table1", vec!["column1".to_string()]),
@@ -155,22 +161,23 @@ async fn test_on_prepare_logical_table_not_exists() {
make_alter_logical_table_add_column_task(None, "table2", vec!["column2".to_string()]),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context);
let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, TableNotFound { .. });
}
#[tokio::test]
async fn test_on_prepare() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), phy_id, "table2").await;
create_logical_table(ddl_context.clone(), phy_id, "table3").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
let tasks = vec![
make_alter_logical_table_add_column_task(None, "table1", vec!["column1".to_string()]),
@@ -178,24 +185,25 @@ async fn test_on_prepare() {
make_alter_logical_table_add_column_task(None, "table3", vec!["column3".to_string()]),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context);
let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context);
let result = procedure.on_prepare().await;
assert_matches!(result, Ok(Status::Executing { persist: true }));
}
#[tokio::test]
async fn test_on_update_metadata() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), phy_id, "table2").await;
create_logical_table(ddl_context.clone(), phy_id, "table3").await;
create_logical_table(ddl_context.clone(), phy_id, "table4").await;
create_logical_table(ddl_context.clone(), phy_id, "table5").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table4").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table5").await;
let tasks = vec![
make_alter_logical_table_add_column_task(None, "table1", vec!["new_col".to_string()]),
@@ -203,7 +211,7 @@ async fn test_on_update_metadata() {
make_alter_logical_table_add_column_task(None, "table3", vec!["new_col".to_string()]),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context);
let mut procedure = AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context);
let mut status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
@@ -221,21 +229,23 @@ async fn test_on_update_metadata() {
#[tokio::test]
async fn test_on_part_duplicate_alter_request() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), phy_id, "table2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
let tasks = vec![
make_alter_logical_table_add_column_task(None, "table1", vec!["col_0".to_string()]),
make_alter_logical_table_add_column_task(None, "table2", vec!["col_0".to_string()]),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context.clone());
let mut procedure =
AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context.clone());
let mut status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
@@ -268,7 +278,8 @@ async fn test_on_part_duplicate_alter_request() {
),
];
let mut procedure = AlterLogicalTablesProcedure::new(tasks, phy_id, ddl_context.clone());
let mut procedure =
AlterLogicalTablesProcedure::new(cluster_id, tasks, phy_id, ddl_context.clone());
let mut status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });

View File

@@ -59,6 +59,7 @@ fn test_rename_alter_table_task(table_name: &str, new_table_name: &str) -> Alter
async fn test_on_prepare_table_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo", 1024);
// Puts a value to table name key.
ddl_context
@@ -72,7 +73,7 @@ async fn test_on_prepare_table_exists_err() {
.unwrap();
let task = test_rename_alter_table_task("non-exists", "foo");
let mut procedure = AlterTableProcedure::new(1024, task, ddl_context).unwrap();
let mut procedure = AlterTableProcedure::new(cluster_id, 1024, task, ddl_context).unwrap();
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err.status_code(), StatusCode::TableAlreadyExists);
}
@@ -81,8 +82,9 @@ async fn test_on_prepare_table_exists_err() {
async fn test_on_prepare_table_not_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_rename_alter_table_task("non-exists", "foo");
let mut procedure = AlterTableProcedure::new(1024, task, ddl_context).unwrap();
let mut procedure = AlterTableProcedure::new(cluster_id, 1024, task, ddl_context).unwrap();
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err.status_code(), StatusCode::TableNotFound);
}
@@ -93,6 +95,7 @@ async fn test_on_submit_alter_request() {
let datanode_handler = DatanodeWatcher(tx);
let node_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_id = 1024;
let table_name = "foo";
let task = test_create_table_task(table_name, table_id);
@@ -141,7 +144,8 @@ async fn test_on_submit_alter_request() {
})),
},
};
let mut procedure = AlterTableProcedure::new(table_id, alter_table_task, ddl_context).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, alter_table_task, ddl_context).unwrap();
procedure.on_prepare().await.unwrap();
procedure.submit_alter_region_requests().await.unwrap();
@@ -177,6 +181,7 @@ async fn test_on_submit_alter_request_with_outdated_request() {
RequestOutdatedErrorDatanodeHandler,
));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_id = 1024;
let table_name = "foo";
let task = test_create_table_task(table_name, table_id);
@@ -225,7 +230,8 @@ async fn test_on_submit_alter_request_with_outdated_request() {
})),
},
};
let mut procedure = AlterTableProcedure::new(table_id, alter_table_task, ddl_context).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, alter_table_task, ddl_context).unwrap();
procedure.on_prepare().await.unwrap();
procedure.submit_alter_region_requests().await.unwrap();
}
@@ -234,6 +240,7 @@ async fn test_on_submit_alter_request_with_outdated_request() {
async fn test_on_update_metadata_rename() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_name = "foo";
let new_table_name = "bar";
let table_id = 1024;
@@ -250,7 +257,8 @@ async fn test_on_update_metadata_rename() {
.unwrap();
let task = test_rename_alter_table_task(table_name, new_table_name);
let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context.clone()).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
procedure.on_prepare().await.unwrap();
procedure.on_update_metadata().await.unwrap();
@@ -283,6 +291,7 @@ async fn test_on_update_metadata_rename() {
async fn test_on_update_metadata_add_columns() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_name = "foo";
let table_id = 1024;
let task = test_create_table_task(table_name, table_id);
@@ -326,7 +335,8 @@ async fn test_on_update_metadata_add_columns() {
})),
},
};
let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context.clone()).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
procedure.on_prepare().await.unwrap();
procedure.submit_alter_region_requests().await.unwrap();
procedure.on_update_metadata().await.unwrap();
@@ -351,6 +361,7 @@ async fn test_on_update_metadata_add_columns() {
async fn test_on_update_table_options() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_name = "foo";
let table_id = 1024;
let task = test_create_table_task(table_name, table_id);
@@ -387,7 +398,8 @@ async fn test_on_update_table_options() {
})),
},
};
let mut procedure = AlterTableProcedure::new(table_id, task, ddl_context.clone()).unwrap();
let mut procedure =
AlterTableProcedure::new(cluster_id, table_id, task, ddl_context.clone()).unwrap();
procedure.on_prepare().await.unwrap();
procedure.submit_alter_region_requests().await.unwrap();
procedure.on_update_metadata().await.unwrap();

View File

@@ -25,11 +25,11 @@ use crate::ddl::create_flow::CreateFlowProcedure;
use crate::ddl::test_util::create_table::test_create_table_task;
use crate::ddl::test_util::flownode_handler::NaiveFlownodeHandler;
use crate::ddl::DdlContext;
use crate::error;
use crate::key::table_route::TableRouteValue;
use crate::key::FlowId;
use crate::rpc::ddl::CreateFlowTask;
use crate::test_util::{new_ddl_context, MockFlownodeManager};
use crate::{error, ClusterId};
pub(crate) fn test_create_flow_task(
name: &str,
@@ -53,6 +53,7 @@ pub(crate) fn test_create_flow_task(
#[tokio::test]
async fn test_create_flow_source_table_not_found() {
let cluster_id = 1;
let source_table_names = vec![TableName::new(
DEFAULT_CATALOG_NAME,
DEFAULT_SCHEMA_NAME,
@@ -64,13 +65,14 @@ async fn test_create_flow_source_table_not_found() {
let node_manager = Arc::new(MockFlownodeManager::new(NaiveFlownodeHandler));
let ddl_context = new_ddl_context(node_manager);
let query_ctx = QueryContext::arc().into();
let mut procedure = CreateFlowProcedure::new(task, query_ctx, ddl_context);
let mut procedure = CreateFlowProcedure::new(cluster_id, task, query_ctx, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, error::Error::TableNotFound { .. });
}
pub(crate) async fn create_test_flow(
ddl_context: &DdlContext,
cluster_id: ClusterId,
flow_name: &str,
source_table_names: Vec<TableName>,
sink_table_name: TableName,
@@ -82,7 +84,8 @@ pub(crate) async fn create_test_flow(
false,
);
let query_ctx = QueryContext::arc().into();
let mut procedure = CreateFlowProcedure::new(task.clone(), query_ctx, ddl_context.clone());
let mut procedure =
CreateFlowProcedure::new(cluster_id, task.clone(), query_ctx, ddl_context.clone());
let output = execute_procedure_until_done(&mut procedure).await.unwrap();
let flow_id = output.downcast_ref::<FlowId>().unwrap();
@@ -91,6 +94,7 @@ pub(crate) async fn create_test_flow(
#[tokio::test]
async fn test_create_flow() {
let cluster_id = 1;
let table_id = 1024;
let source_table_names = vec![TableName::new(
DEFAULT_CATALOG_NAME,
@@ -114,6 +118,7 @@ async fn test_create_flow() {
.unwrap();
let flow_id = create_test_flow(
&ddl_context,
cluster_id,
"my_flow",
source_table_names.clone(),
sink_table_name.clone(),
@@ -129,7 +134,8 @@ async fn test_create_flow() {
true,
);
let query_ctx = QueryContext::arc().into();
let mut procedure = CreateFlowProcedure::new(task.clone(), query_ctx, ddl_context.clone());
let mut procedure =
CreateFlowProcedure::new(cluster_id, task.clone(), query_ctx, ddl_context.clone());
let output = execute_procedure_until_done(&mut procedure).await.unwrap();
let flow_id = output.downcast_ref::<FlowId>().unwrap();
assert_eq!(*flow_id, 1024);
@@ -137,7 +143,7 @@ async fn test_create_flow() {
// Creates again
let task = test_create_flow_task("my_flow", source_table_names, sink_table_name, false);
let query_ctx = QueryContext::arc().into();
let mut procedure = CreateFlowProcedure::new(task.clone(), query_ctx, ddl_context);
let mut procedure = CreateFlowProcedure::new(cluster_id, task.clone(), query_ctx, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, error::Error::FlowAlreadyExists { .. });
}

View File

@@ -26,7 +26,7 @@ use crate::ddl::test_util::datanode_handler::NaiveDatanodeHandler;
use crate::ddl::test_util::{
create_physical_table_metadata, test_create_logical_table_task, test_create_physical_table_task,
};
use crate::ddl::TableMetadata;
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
use crate::error::Error;
use crate::key::table_route::TableRouteValue;
use crate::test_util::{new_ddl_context, MockDatanodeManager};
@@ -35,9 +35,11 @@ use crate::test_util::{new_ddl_context, MockDatanodeManager};
async fn test_on_prepare_physical_table_not_found() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let tasks = vec![test_create_logical_table_task("foo")];
let physical_table_id = 1024u32;
let mut procedure = CreateLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let mut procedure =
CreateLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::TableRouteNotFound { .. });
}
@@ -46,6 +48,7 @@ async fn test_on_prepare_physical_table_not_found() {
async fn test_on_prepare() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -54,7 +57,10 @@ async fn test_on_prepare() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -67,7 +73,8 @@ async fn test_on_prepare() {
// The create logical table procedure.
let tasks = vec![test_create_logical_table_task("foo")];
let physical_table_id = table_id;
let mut procedure = CreateLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let mut procedure =
CreateLogicalTablesProcedure::new(cluster_id, tasks, physical_table_id, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
}
@@ -76,6 +83,7 @@ async fn test_on_prepare() {
async fn test_on_prepare_logical_table_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -84,7 +92,10 @@ async fn test_on_prepare_logical_table_exists_err() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -108,7 +119,7 @@ async fn test_on_prepare_logical_table_exists_err() {
// The create logical table procedure.
let physical_table_id = table_id;
let mut procedure =
CreateLogicalTablesProcedure::new(vec![task], physical_table_id, ddl_context);
CreateLogicalTablesProcedure::new(cluster_id, vec![task], physical_table_id, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::TableAlreadyExists { .. });
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -118,6 +129,7 @@ async fn test_on_prepare_logical_table_exists_err() {
async fn test_on_prepare_with_create_if_table_exists() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -126,7 +138,10 @@ async fn test_on_prepare_with_create_if_table_exists() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -152,7 +167,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
// Sets `create_if_not_exists`
task.create_table.create_if_not_exists = true;
let mut procedure =
CreateLogicalTablesProcedure::new(vec![task], physical_table_id, ddl_context);
CreateLogicalTablesProcedure::new(cluster_id, vec![task], physical_table_id, ddl_context);
let status = procedure.on_prepare().await.unwrap();
let output = status.downcast_output_ref::<Vec<u32>>().unwrap();
assert_eq!(*output, vec![8192]);
@@ -162,6 +177,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
async fn test_on_prepare_part_logical_tables_exist() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -170,7 +186,10 @@ async fn test_on_prepare_part_logical_tables_exist() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -197,6 +216,7 @@ async fn test_on_prepare_part_logical_tables_exist() {
task.create_table.create_if_not_exists = true;
let non_exist_task = test_create_logical_table_task("non_exists");
let mut procedure = CreateLogicalTablesProcedure::new(
cluster_id,
vec![task, non_exist_task],
physical_table_id,
ddl_context,
@@ -209,6 +229,7 @@ async fn test_on_prepare_part_logical_tables_exist() {
async fn test_on_create_metadata() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -217,7 +238,10 @@ async fn test_on_create_metadata() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -233,6 +257,7 @@ async fn test_on_create_metadata() {
let task = test_create_logical_table_task("foo");
let yet_another_task = test_create_logical_table_task("bar");
let mut procedure = CreateLogicalTablesProcedure::new(
cluster_id,
vec![task, yet_another_task],
physical_table_id,
ddl_context,
@@ -254,6 +279,7 @@ async fn test_on_create_metadata() {
async fn test_on_create_metadata_part_logical_tables_exist() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -262,7 +288,10 @@ async fn test_on_create_metadata_part_logical_tables_exist() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -289,6 +318,7 @@ async fn test_on_create_metadata_part_logical_tables_exist() {
task.create_table.create_if_not_exists = true;
let non_exist_task = test_create_logical_table_task("non_exists");
let mut procedure = CreateLogicalTablesProcedure::new(
cluster_id,
vec![task, non_exist_task],
physical_table_id,
ddl_context,
@@ -310,6 +340,7 @@ async fn test_on_create_metadata_part_logical_tables_exist() {
async fn test_on_create_metadata_err() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -318,7 +349,10 @@ async fn test_on_create_metadata_err() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -334,6 +368,7 @@ async fn test_on_create_metadata_err() {
let task = test_create_logical_table_task("foo");
let yet_another_task = test_create_logical_table_task("bar");
let mut procedure = CreateLogicalTablesProcedure::new(
cluster_id,
vec![task.clone(), yet_another_task],
physical_table_id,
ddl_context.clone(),

View File

@@ -87,6 +87,7 @@ pub(crate) fn test_create_table_task(name: &str) -> CreateTableTask {
async fn test_on_prepare_table_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo");
assert!(!task.create_table.create_if_not_exists);
// Puts a value to table name key.
@@ -99,7 +100,7 @@ async fn test_on_prepare_table_exists_err() {
)
.await
.unwrap();
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::TableAlreadyExists { .. });
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -109,6 +110,7 @@ async fn test_on_prepare_table_exists_err() {
async fn test_on_prepare_with_create_if_table_exists() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let mut task = test_create_table_task("foo");
task.create_table.create_if_not_exists = true;
task.table_info.ident.table_id = 1024;
@@ -122,7 +124,7 @@ async fn test_on_prepare_with_create_if_table_exists() {
)
.await
.unwrap();
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Done { output: Some(..) });
let table_id = *status.downcast_output_ref::<u32>().unwrap();
@@ -133,9 +135,10 @@ async fn test_on_prepare_with_create_if_table_exists() {
async fn test_on_prepare_without_create_if_table_exists() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let mut task = test_create_table_task("foo");
task.create_table.create_if_not_exists = true;
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
assert_eq!(procedure.table_id(), 1024);
@@ -145,10 +148,11 @@ async fn test_on_prepare_without_create_if_table_exists() {
async fn test_on_prepare_with_no_partition_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let mut task = test_create_table_task("foo");
task.partitions = vec![];
task.create_table.create_if_not_exists = true;
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::Unexpected { .. });
assert!(err
@@ -161,9 +165,10 @@ async fn test_on_datanode_create_regions_should_retry() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo");
assert!(!task.create_table.create_if_not_exists);
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -178,9 +183,10 @@ async fn test_on_datanode_create_regions_should_not_retry() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(UnexpectedErrorDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo");
assert!(!task.create_table.create_if_not_exists);
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -195,9 +201,10 @@ async fn test_on_create_metadata_error() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo");
assert!(!task.create_table.create_if_not_exists);
let mut procedure = CreateTableProcedure::new(task.clone(), ddl_context.clone());
let mut procedure = CreateTableProcedure::new(cluster_id, task.clone(), ddl_context.clone());
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -226,9 +233,10 @@ async fn test_on_create_metadata() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_table_task("foo");
assert!(!task.create_table.create_if_not_exists);
let mut procedure = CreateTableProcedure::new(task, ddl_context);
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -243,12 +251,14 @@ async fn test_on_create_metadata() {
#[tokio::test]
async fn test_memory_region_keeper_guard_dropped_on_procedure_done() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
let task = test_create_table_task("foo");
let mut procedure = CreateTableProcedure::new(task, ddl_context.clone());
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until(&mut procedure, |p| {
p.creator.data.state == CreateTableState::CreateMetadata

View File

@@ -97,6 +97,7 @@ pub(crate) fn test_create_view_task(name: &str) -> CreateViewTask {
async fn test_on_prepare_view_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_view_task("foo");
assert!(!task.create_view.create_if_not_exists);
// Puts a value to table name key.
@@ -112,7 +113,7 @@ async fn test_on_prepare_view_exists_err() {
)
.await
.unwrap();
let mut procedure = CreateViewProcedure::new(task, ddl_context);
let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::ViewAlreadyExists { .. });
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -122,6 +123,7 @@ async fn test_on_prepare_view_exists_err() {
async fn test_on_prepare_with_create_if_view_exists() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let mut task = test_create_view_task("foo");
task.create_view.create_if_not_exists = true;
task.view_info.ident.table_id = 1024;
@@ -138,7 +140,7 @@ async fn test_on_prepare_with_create_if_view_exists() {
)
.await
.unwrap();
let mut procedure = CreateViewProcedure::new(task, ddl_context);
let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Done { output: Some(..) });
let table_id = *status.downcast_output_ref::<u32>().unwrap();
@@ -149,9 +151,10 @@ async fn test_on_prepare_with_create_if_view_exists() {
async fn test_on_prepare_without_create_if_table_exists() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let mut task = test_create_view_task("foo");
task.create_view.create_if_not_exists = true;
let mut procedure = CreateViewProcedure::new(task, ddl_context);
let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
assert_eq!(procedure.view_id(), 1024);
@@ -162,9 +165,10 @@ async fn test_on_create_metadata() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let task = test_create_view_task("foo");
assert!(!task.create_view.create_if_not_exists);
let mut procedure = CreateViewProcedure::new(task, ddl_context);
let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -181,9 +185,10 @@ async fn test_replace_view_metadata() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager.clone());
let cluster_id = 1;
let task = test_create_view_task("foo");
assert!(!task.create_view.create_if_not_exists);
let mut procedure = CreateViewProcedure::new(task.clone(), ddl_context.clone());
let mut procedure = CreateViewProcedure::new(cluster_id, task.clone(), ddl_context.clone());
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -208,7 +213,7 @@ async fn test_replace_view_metadata() {
let mut task = test_create_view_task("foo");
// The view already exists, prepare should fail
{
let mut procedure = CreateViewProcedure::new(task.clone(), ddl_context.clone());
let mut procedure = CreateViewProcedure::new(cluster_id, task.clone(), ddl_context.clone());
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::ViewAlreadyExists { .. });
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);
@@ -219,7 +224,7 @@ async fn test_replace_view_metadata() {
task.create_view.logical_plan = vec![4, 5, 6];
task.create_view.definition = "new_definition".to_string();
let mut procedure = CreateViewProcedure::new(task, ddl_context.clone());
let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -249,11 +254,12 @@ async fn test_replace_table() {
common_telemetry::init_default_ut_logging();
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager.clone());
let cluster_id = 1;
{
// Create a `foo` table.
let task = test_create_table_task("foo");
let mut procedure = CreateTableProcedure::new(task, ddl_context.clone());
let mut procedure = CreateTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
let ctx = ProcedureContext {
procedure_id: ProcedureId::random(),
@@ -266,7 +272,7 @@ async fn test_replace_table() {
// Try to replace a view named `foo` too.
let mut task = test_create_view_task("foo");
task.create_view.or_replace = true;
let mut procedure = CreateViewProcedure::new(task.clone(), ddl_context.clone());
let mut procedure = CreateViewProcedure::new(cluster_id, task.clone(), ddl_context.clone());
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, Error::TableAlreadyExists { .. });
assert_eq!(err.status_code(), StatusCode::TableAlreadyExists);

View File

@@ -31,6 +31,7 @@ use crate::test_util::{new_ddl_context, MockDatanodeManager};
#[tokio::test]
async fn test_drop_database_with_logical_tables() {
common_telemetry::init_default_ut_logging();
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
ddl_context
@@ -44,11 +45,11 @@ async fn test_drop_database_with_logical_tables() {
.await
.unwrap();
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), phy_id, "table2").await;
create_logical_table(ddl_context.clone(), phy_id, "table3").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
let mut procedure = DropDatabaseProcedure::new(
DEFAULT_CATALOG_NAME.to_string(),
@@ -79,6 +80,7 @@ async fn test_drop_database_with_logical_tables() {
#[tokio::test]
async fn test_drop_database_retryable_error() {
common_telemetry::init_default_ut_logging();
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(RetryErrorDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
ddl_context
@@ -92,11 +94,11 @@ async fn test_drop_database_retryable_error() {
.await
.unwrap();
// Creates physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates 3 logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), phy_id, "table2").await;
create_logical_table(ddl_context.clone(), phy_id, "table3").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table2").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table3").await;
let mut procedure = DropDatabaseProcedure::new(
DEFAULT_CATALOG_NAME.to_string(),
@@ -126,6 +128,7 @@ async fn test_drop_database_retryable_error() {
#[tokio::test]
async fn test_drop_database_recover() {
common_telemetry::init_default_ut_logging();
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let ddl_context = new_ddl_context(node_manager);
ddl_context
@@ -139,9 +142,9 @@ async fn test_drop_database_recover() {
.await
.unwrap();
// Creates a physical table
let phy_id = create_physical_table(&ddl_context, "phy").await;
let phy_id = create_physical_table(&ddl_context, cluster_id, "phy").await;
// Creates a logical tables
create_logical_table(ddl_context.clone(), phy_id, "table1").await;
create_logical_table(ddl_context.clone(), cluster_id, phy_id, "table1").await;
let mut procedure = DropDatabaseProcedure::new(
DEFAULT_CATALOG_NAME.to_string(),
DEFAULT_SCHEMA_NAME.to_string(),

View File

@@ -40,11 +40,12 @@ fn test_drop_flow_task(flow_name: &str, flow_id: u32, drop_if_exists: bool) -> D
#[tokio::test]
async fn test_drop_flow_not_found() {
let cluster_id = 1;
let flow_id = 1024;
let node_manager = Arc::new(MockFlownodeManager::new(NaiveFlownodeHandler));
let ddl_context = new_ddl_context(node_manager);
let task = test_drop_flow_task("my_flow", flow_id, false);
let mut procedure = DropFlowProcedure::new(task, ddl_context);
let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, error::Error::FlowNotFound { .. });
}
@@ -52,6 +53,7 @@ async fn test_drop_flow_not_found() {
#[tokio::test]
async fn test_drop_flow() {
// create a flow
let cluster_id = 1;
let table_id = 1024;
let source_table_names = vec![TableName::new(
DEFAULT_CATALOG_NAME,
@@ -73,21 +75,27 @@ async fn test_drop_flow() {
)
.await
.unwrap();
let flow_id =
create_test_flow(&ddl_context, "my_flow", source_table_names, sink_table_name).await;
let flow_id = create_test_flow(
&ddl_context,
cluster_id,
"my_flow",
source_table_names,
sink_table_name,
)
.await;
// Drops the flows
let task = test_drop_flow_task("my_flow", flow_id, false);
let mut procedure = DropFlowProcedure::new(task, ddl_context.clone());
let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until_done(&mut procedure).await;
// Drops if not exists
let task = test_drop_flow_task("my_flow", flow_id, true);
let mut procedure = DropFlowProcedure::new(task, ddl_context.clone());
let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until_done(&mut procedure).await;
// Drops again
let task = test_drop_flow_task("my_flow", flow_id, false);
let mut procedure = DropFlowProcedure::new(task, ddl_context);
let mut procedure = DropFlowProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_matches!(err, error::Error::FlowNotFound { .. });
}

View File

@@ -35,7 +35,7 @@ use crate::ddl::test_util::{
create_logical_table, create_physical_table, create_physical_table_metadata,
test_create_logical_table_task, test_create_physical_table_task,
};
use crate::ddl::TableMetadata;
use crate::ddl::{TableMetadata, TableMetadataAllocatorContext};
use crate::key::table_route::TableRouteValue;
use crate::kv_backend::memory::MemoryKvBackend;
use crate::peer::Peer;
@@ -47,6 +47,7 @@ use crate::test_util::{new_ddl_context, new_ddl_context_with_kv_backend, MockDat
async fn test_on_prepare_table_not_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_name = "foo";
let table_id = 1024;
let task = test_create_table_task(table_name, table_id);
@@ -62,7 +63,7 @@ async fn test_on_prepare_table_not_exists_err() {
.unwrap();
let task = new_drop_table_task("bar", table_id, false);
let mut procedure = DropTableProcedure::new(task, ddl_context);
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_eq!(err.status_code(), StatusCode::TableNotFound);
}
@@ -71,6 +72,7 @@ async fn test_on_prepare_table_not_exists_err() {
async fn test_on_prepare_table() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_name = "foo";
let table_id = 1024;
let task = test_create_table_task(table_name, table_id);
@@ -87,13 +89,13 @@ async fn test_on_prepare_table() {
let task = new_drop_table_task("bar", table_id, true);
// Drop if exists
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
assert!(!procedure.rollback_supported());
let task = new_drop_table_task(table_name, table_id, false);
// Drop table
let mut procedure = DropTableProcedure::new(task, ddl_context);
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
}
@@ -103,6 +105,7 @@ async fn test_on_datanode_drop_regions() {
let datanode_handler = DatanodeWatcher(tx);
let node_manager = Arc::new(MockDatanodeManager::new(datanode_handler));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let table_id = 1024;
let table_name = "foo";
let task = test_create_table_task(table_name, table_id);
@@ -141,7 +144,7 @@ async fn test_on_datanode_drop_regions() {
let task = new_drop_table_task(table_name, table_id, false);
// Drop table
let mut procedure = DropTableProcedure::new(task, ddl_context);
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
procedure.on_datanode_drop_regions().await.unwrap();
@@ -176,6 +179,7 @@ async fn test_on_rollback() {
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend.clone());
let cluster_id = 1;
// Prepares physical table metadata.
let mut create_physical_table_task = test_create_physical_table_task("phy_table");
let TableMetadata {
@@ -184,7 +188,10 @@ async fn test_on_rollback() {
..
} = ddl_context
.table_metadata_allocator
.create(&create_physical_table_task)
.create(
&TableMetadataAllocatorContext { cluster_id },
&create_physical_table_task,
)
.await
.unwrap();
create_physical_table_task.set_table_id(table_id);
@@ -198,8 +205,12 @@ async fn test_on_rollback() {
let physical_table_id = table_id;
// Creates the logical table metadata.
let task = test_create_logical_table_task("foo");
let mut procedure =
CreateLogicalTablesProcedure::new(vec![task], physical_table_id, ddl_context.clone());
let mut procedure = CreateLogicalTablesProcedure::new(
cluster_id,
vec![task],
physical_table_id,
ddl_context.clone(),
);
procedure.on_prepare().await.unwrap();
let ctx = new_test_procedure_context();
procedure.execute(&ctx).await.unwrap();
@@ -212,7 +223,7 @@ async fn test_on_rollback() {
// Drops the physical table
{
let task = new_drop_table_task("phy_table", physical_table_id, false);
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
assert!(procedure.rollback_supported());
procedure.on_delete_metadata().await.unwrap();
@@ -227,7 +238,7 @@ async fn test_on_rollback() {
// Drops the logical table
let task = new_drop_table_task("foo", table_ids[0], false);
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
assert!(!procedure.rollback_supported());
}
@@ -244,15 +255,18 @@ fn new_drop_table_task(table_name: &str, table_id: TableId, drop_if_exists: bool
#[tokio::test]
async fn test_memory_region_keeper_guard_dropped_on_procedure_done() {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
let physical_table_id = create_physical_table(&ddl_context, "t").await;
let logical_table_id = create_logical_table(ddl_context.clone(), physical_table_id, "s").await;
let physical_table_id = create_physical_table(&ddl_context, cluster_id, "t").await;
let logical_table_id =
create_logical_table(ddl_context.clone(), cluster_id, physical_table_id, "s").await;
let inner_test = |task: DropTableTask| async {
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until(&mut procedure, |p| {
p.data.state == DropTableState::InvalidateTableCache
})
@@ -290,13 +304,14 @@ async fn test_from_json() {
(DropTableState::DatanodeDropRegions, 1, 1),
(DropTableState::DeleteTombstone, 1, 0),
] {
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
let physical_table_id = create_physical_table(&ddl_context, "t").await;
let physical_table_id = create_physical_table(&ddl_context, cluster_id, "t").await;
let task = new_drop_table_task("t", physical_table_id, false);
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until(&mut procedure, |p| p.data.state == state).await;
let data = procedure.dump().unwrap();
assert_eq!(
@@ -319,13 +334,14 @@ async fn test_from_json() {
let num_operating_regions = 0;
let num_operating_regions_after_recovery = 0;
let cluster_id = 1;
let node_manager = Arc::new(MockDatanodeManager::new(NaiveDatanodeHandler));
let kv_backend = Arc::new(MemoryKvBackend::new());
let ddl_context = new_ddl_context_with_kv_backend(node_manager, kv_backend);
let physical_table_id = create_physical_table(&ddl_context, "t").await;
let physical_table_id = create_physical_table(&ddl_context, cluster_id, "t").await;
let task = new_drop_table_task("t", physical_table_id, false);
let mut procedure = DropTableProcedure::new(task, ddl_context.clone());
let mut procedure = DropTableProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until_done(&mut procedure).await;
let data = procedure.dump().unwrap();
assert_eq!(

View File

@@ -41,6 +41,7 @@ fn new_drop_view_task(view: &str, view_id: TableId, drop_if_exists: bool) -> Dro
async fn test_on_prepare_view_not_exists_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let view_id = 1024;
let mut task = test_create_view_task("foo");
task.view_info.ident.table_id = view_id;
@@ -59,7 +60,7 @@ async fn test_on_prepare_view_not_exists_err() {
.unwrap();
let task = new_drop_view_task("bar", view_id, false);
let mut procedure = DropViewProcedure::new(task, ddl_context);
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_eq!(err.status_code(), StatusCode::TableNotFound);
}
@@ -68,6 +69,7 @@ async fn test_on_prepare_view_not_exists_err() {
async fn test_on_prepare_not_view_err() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let view_id = 1024;
let view_name = "foo";
let task = test_create_table_task(view_name, view_id);
@@ -83,7 +85,7 @@ async fn test_on_prepare_not_view_err() {
.unwrap();
let task = new_drop_view_task(view_name, view_id, false);
let mut procedure = DropViewProcedure::new(task, ddl_context);
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
// It's not a view, expect error
let err = procedure.on_prepare().await.unwrap_err();
assert_eq!(err.status_code(), StatusCode::InvalidArguments);
@@ -93,6 +95,7 @@ async fn test_on_prepare_not_view_err() {
async fn test_on_prepare_success() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let view_id = 1024;
let view_name = "foo";
let mut task = test_create_view_task("foo");
@@ -113,12 +116,12 @@ async fn test_on_prepare_success() {
let task = new_drop_view_task("bar", view_id, true);
// Drop if exists
let mut procedure = DropViewProcedure::new(task, ddl_context.clone());
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context.clone());
procedure.on_prepare().await.unwrap();
let task = new_drop_view_task(view_name, view_id, false);
// Prepare success
let mut procedure = DropViewProcedure::new(task, ddl_context);
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
procedure.on_prepare().await.unwrap();
assert_eq!(DropViewState::DeleteMetadata, procedure.state());
}
@@ -127,6 +130,7 @@ async fn test_on_prepare_success() {
async fn test_drop_view_success() {
let node_manager = Arc::new(MockDatanodeManager::new(()));
let ddl_context = new_ddl_context(node_manager);
let cluster_id = 1;
let view_id = 1024;
let view_name = "foo";
let mut task = test_create_view_task("foo");
@@ -155,7 +159,7 @@ async fn test_drop_view_success() {
let task = new_drop_view_task(view_name, view_id, false);
// Prepare success
let mut procedure = DropViewProcedure::new(task, ddl_context.clone());
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context.clone());
execute_procedure_until_done(&mut procedure).await;
assert_eq!(DropViewState::InvalidateViewCache, procedure.state());
@@ -170,7 +174,7 @@ async fn test_drop_view_success() {
// Drop again
let task = new_drop_view_task(view_name, view_id, false);
let mut procedure = DropViewProcedure::new(task, ddl_context);
let mut procedure = DropViewProcedure::new(cluster_id, task, ddl_context);
let err = procedure.on_prepare().await.unwrap_err();
assert_eq!(err.status_code(), StatusCode::TableNotFound);
}

View File

@@ -39,9 +39,9 @@ use crate::key::table_info::TableInfoValue;
use crate::key::table_name::TableNameKey;
use crate::key::DeserializedValueWithBytes;
use crate::lock_key::{CatalogLock, SchemaLock, TableLock};
use crate::metrics;
use crate::rpc::ddl::TruncateTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute};
use crate::{metrics, ClusterId};
pub struct TruncateTableProcedure {
context: DdlContext,
@@ -91,6 +91,7 @@ impl TruncateTableProcedure {
pub(crate) const TYPE_NAME: &'static str = "metasrv-procedure::TruncateTable";
pub(crate) fn new(
cluster_id: ClusterId,
task: TruncateTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
@@ -98,7 +99,7 @@ impl TruncateTableProcedure {
) -> Self {
Self {
context,
data: TruncateTableData::new(task, table_info_value, region_routes),
data: TruncateTableData::new(cluster_id, task, table_info_value, region_routes),
}
}
@@ -188,6 +189,7 @@ impl TruncateTableProcedure {
#[derive(Debug, Serialize, Deserialize)]
pub struct TruncateTableData {
state: TruncateTableState,
cluster_id: ClusterId,
task: TruncateTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
@@ -195,12 +197,14 @@ pub struct TruncateTableData {
impl TruncateTableData {
pub fn new(
cluster_id: ClusterId,
task: TruncateTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
) -> Self {
Self {
state: TruncateTableState::Prepare,
cluster_id,
task,
table_info_value,
region_routes,

View File

@@ -34,6 +34,7 @@ use crate::key::TableMetadataManagerRef;
use crate::peer::Peer;
use crate::rpc::ddl::CreateTableTask;
use crate::rpc::router::RegionRoute;
use crate::ClusterId;
/// Adds [Peer] context if the error is unretryable.
pub fn add_peer_context_if_needed(datanode: Peer) -> impl FnOnce(Error) -> Error {
@@ -143,6 +144,7 @@ pub async fn get_physical_table_id(
/// Converts a list of [`RegionRoute`] to a list of [`DetectingRegion`].
pub fn convert_region_routes_to_detecting_regions(
cluster_id: ClusterId,
region_routes: &[RegionRoute],
) -> Vec<DetectingRegion> {
region_routes
@@ -151,7 +153,7 @@ pub fn convert_region_routes_to_detecting_regions(
route
.leader_peer
.as_ref()
.map(|peer| (peer.id, route.region.id))
.map(|peer| (cluster_id, peer.id, route.region.id))
})
.collect::<Vec<_>>()
}

View File

@@ -60,6 +60,7 @@ use crate::rpc::ddl::{
use crate::rpc::procedure;
use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse};
use crate::rpc::router::RegionRoute;
use crate::ClusterId;
pub type DdlManagerRef = Arc<DdlManager>;
@@ -153,12 +154,13 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_alter_table_task(
&self,
cluster_id: ClusterId,
table_id: TableId,
alter_table_task: AlterTableTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = AlterTableProcedure::new(table_id, alter_table_task, context)?;
let procedure = AlterTableProcedure::new(cluster_id, table_id, alter_table_task, context)?;
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -169,11 +171,12 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_create_table_task(
&self,
cluster_id: ClusterId,
create_table_task: CreateTableTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = CreateTableProcedure::new(create_table_task, context);
let procedure = CreateTableProcedure::new(cluster_id, create_table_task, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -184,11 +187,12 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_create_view_task(
&self,
cluster_id: ClusterId,
create_view_task: CreateViewTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = CreateViewProcedure::new(create_view_task, context);
let procedure = CreateViewProcedure::new(cluster_id, create_view_task, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -199,13 +203,18 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_create_logical_table_tasks(
&self,
cluster_id: ClusterId,
create_table_tasks: Vec<CreateTableTask>,
physical_table_id: TableId,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure =
CreateLogicalTablesProcedure::new(create_table_tasks, physical_table_id, context);
let procedure = CreateLogicalTablesProcedure::new(
cluster_id,
create_table_tasks,
physical_table_id,
context,
);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -216,13 +225,18 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_alter_logical_table_tasks(
&self,
cluster_id: ClusterId,
alter_table_tasks: Vec<AlterTableTask>,
physical_table_id: TableId,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure =
AlterLogicalTablesProcedure::new(alter_table_tasks, physical_table_id, context);
let procedure = AlterLogicalTablesProcedure::new(
cluster_id,
alter_table_tasks,
physical_table_id,
context,
);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -233,11 +247,12 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_drop_table_task(
&self,
cluster_id: ClusterId,
drop_table_task: DropTableTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = DropTableProcedure::new(drop_table_task, context);
let procedure = DropTableProcedure::new(cluster_id, drop_table_task, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
@@ -248,6 +263,7 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_create_database(
&self,
_cluster_id: ClusterId,
CreateDatabaseTask {
catalog,
schema,
@@ -267,6 +283,7 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_drop_database(
&self,
_cluster_id: ClusterId,
DropDatabaseTask {
catalog,
schema,
@@ -282,10 +299,11 @@ impl DdlManager {
pub async fn submit_alter_database(
&self,
cluster_id: ClusterId,
alter_database_task: AlterDatabaseTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = AlterDatabaseProcedure::new(alter_database_task, context)?;
let procedure = AlterDatabaseProcedure::new(cluster_id, alter_database_task, context)?;
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
self.submit_procedure(procedure_with_id).await
@@ -295,11 +313,12 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_create_flow_task(
&self,
cluster_id: ClusterId,
create_flow: CreateFlowTask,
query_context: QueryContext,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = CreateFlowProcedure::new(create_flow, query_context, context);
let procedure = CreateFlowProcedure::new(cluster_id, create_flow, query_context, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
self.submit_procedure(procedure_with_id).await
@@ -309,10 +328,11 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_drop_flow_task(
&self,
cluster_id: ClusterId,
drop_flow: DropFlowTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = DropFlowProcedure::new(drop_flow, context);
let procedure = DropFlowProcedure::new(cluster_id, drop_flow, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
self.submit_procedure(procedure_with_id).await
@@ -322,10 +342,11 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_drop_view_task(
&self,
cluster_id: ClusterId,
drop_view: DropViewTask,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = DropViewProcedure::new(drop_view, context);
let procedure = DropViewProcedure::new(cluster_id, drop_view, context);
let procedure_with_id = ProcedureWithId::with_random_id(Box::new(procedure));
self.submit_procedure(procedure_with_id).await
@@ -335,12 +356,14 @@ impl DdlManager {
#[tracing::instrument(skip_all)]
pub async fn submit_truncate_table_task(
&self,
cluster_id: ClusterId,
truncate_table_task: TruncateTableTask,
table_info_value: DeserializedValueWithBytes<TableInfoValue>,
region_routes: Vec<RegionRoute>,
) -> Result<(ProcedureId, Option<Output>)> {
let context = self.create_context();
let procedure = TruncateTableProcedure::new(
cluster_id,
truncate_table_task,
table_info_value,
region_routes,
@@ -374,6 +397,7 @@ impl DdlManager {
async fn handle_truncate_table_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
truncate_table_task: TruncateTableTask,
) -> Result<SubmitDdlTaskResponse> {
let table_id = truncate_table_task.table_id;
@@ -392,7 +416,12 @@ async fn handle_truncate_table_task(
let table_route = table_route_value.into_inner().region_routes()?.clone();
let (id, _) = ddl_manager
.submit_truncate_table_task(truncate_table_task, table_info_value, table_route)
.submit_truncate_table_task(
cluster_id,
truncate_table_task,
table_info_value,
table_route,
)
.await?;
info!("Table: {table_id} is truncated via procedure_id {id:?}");
@@ -405,6 +434,7 @@ async fn handle_truncate_table_task(
async fn handle_alter_table_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
alter_table_task: AlterTableTask,
) -> Result<SubmitDdlTaskResponse> {
let table_ref = alter_table_task.table_ref();
@@ -438,7 +468,7 @@ async fn handle_alter_table_task(
);
let (id, _) = ddl_manager
.submit_alter_table_task(table_id, alter_table_task)
.submit_alter_table_task(cluster_id, table_id, alter_table_task)
.await?;
info!("Table: {table_id} is altered via procedure_id {id:?}");
@@ -451,10 +481,13 @@ async fn handle_alter_table_task(
async fn handle_drop_table_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
drop_table_task: DropTableTask,
) -> Result<SubmitDdlTaskResponse> {
let table_id = drop_table_task.table_id;
let (id, _) = ddl_manager.submit_drop_table_task(drop_table_task).await?;
let (id, _) = ddl_manager
.submit_drop_table_task(cluster_id, drop_table_task)
.await?;
info!("Table: {table_id} is dropped via procedure_id {id:?}");
@@ -466,10 +499,11 @@ async fn handle_drop_table_task(
async fn handle_create_table_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
create_table_task: CreateTableTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, output) = ddl_manager
.submit_create_table_task(create_table_task)
.submit_create_table_task(cluster_id, create_table_task)
.await?;
let procedure_id = id.to_string();
@@ -491,6 +525,7 @@ async fn handle_create_table_task(
async fn handle_create_logical_table_tasks(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
create_table_tasks: Vec<CreateTableTask>,
) -> Result<SubmitDdlTaskResponse> {
ensure!(
@@ -507,7 +542,7 @@ async fn handle_create_logical_table_tasks(
let num_logical_tables = create_table_tasks.len();
let (id, output) = ddl_manager
.submit_create_logical_table_tasks(create_table_tasks, physical_table_id)
.submit_create_logical_table_tasks(cluster_id, create_table_tasks, physical_table_id)
.await?;
info!("{num_logical_tables} logical tables on physical table: {physical_table_id:?} is created via procedure_id {id:?}");
@@ -533,10 +568,11 @@ async fn handle_create_logical_table_tasks(
async fn handle_create_database_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
create_database_task: CreateDatabaseTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, _) = ddl_manager
.submit_create_database(create_database_task.clone())
.submit_create_database(cluster_id, create_database_task.clone())
.await?;
let procedure_id = id.to_string();
@@ -553,10 +589,11 @@ async fn handle_create_database_task(
async fn handle_drop_database_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
drop_database_task: DropDatabaseTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, _) = ddl_manager
.submit_drop_database(drop_database_task.clone())
.submit_drop_database(cluster_id, drop_database_task.clone())
.await?;
let procedure_id = id.to_string();
@@ -573,10 +610,11 @@ async fn handle_drop_database_task(
async fn handle_alter_database_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
alter_database_task: AlterDatabaseTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, _) = ddl_manager
.submit_alter_database(alter_database_task.clone())
.submit_alter_database(cluster_id, alter_database_task.clone())
.await?;
let procedure_id = id.to_string();
@@ -594,10 +632,11 @@ async fn handle_alter_database_task(
async fn handle_drop_flow_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
drop_flow_task: DropFlowTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, _) = ddl_manager
.submit_drop_flow_task(drop_flow_task.clone())
.submit_drop_flow_task(cluster_id, drop_flow_task.clone())
.await?;
let procedure_id = id.to_string();
@@ -614,10 +653,11 @@ async fn handle_drop_flow_task(
async fn handle_drop_view_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
drop_view_task: DropViewTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, _) = ddl_manager
.submit_drop_view_task(drop_view_task.clone())
.submit_drop_view_task(cluster_id, drop_view_task.clone())
.await?;
let procedure_id = id.to_string();
@@ -635,11 +675,12 @@ async fn handle_drop_view_task(
async fn handle_create_flow_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
create_flow_task: CreateFlowTask,
query_context: QueryContext,
) -> Result<SubmitDdlTaskResponse> {
let (id, output) = ddl_manager
.submit_create_flow_task(create_flow_task.clone(), query_context)
.submit_create_flow_task(cluster_id, create_flow_task.clone(), query_context)
.await?;
let procedure_id = id.to_string();
@@ -671,6 +712,7 @@ async fn handle_create_flow_task(
async fn handle_alter_logical_table_tasks(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
alter_table_tasks: Vec<AlterTableTask>,
) -> Result<SubmitDdlTaskResponse> {
ensure!(
@@ -691,7 +733,7 @@ async fn handle_alter_logical_table_tasks(
let num_logical_tables = alter_table_tasks.len();
let (id, _) = ddl_manager
.submit_alter_logical_table_tasks(alter_table_tasks, physical_table_id)
.submit_alter_logical_table_tasks(cluster_id, alter_table_tasks, physical_table_id)
.await?;
info!("{num_logical_tables} logical tables on physical table: {physical_table_id:?} is altered via procedure_id {id:?}");
@@ -707,10 +749,11 @@ async fn handle_alter_logical_table_tasks(
/// Handle the `[CreateViewTask]` and returns the DDL response when success.
async fn handle_create_view_task(
ddl_manager: &DdlManager,
cluster_id: ClusterId,
create_view_task: CreateViewTask,
) -> Result<SubmitDdlTaskResponse> {
let (id, output) = ddl_manager
.submit_create_view_task(create_view_task)
.submit_create_view_task(cluster_id, create_view_task)
.await?;
let procedure_id = id.to_string();
@@ -745,43 +788,55 @@ impl ProcedureExecutor for DdlManager {
.unwrap_or(TracingContext::from_current_span())
.attach(tracing::info_span!("DdlManager::submit_ddl_task"));
async move {
let cluster_id = ctx.cluster_id.unwrap_or_default();
debug!("Submitting Ddl task: {:?}", request.task);
match request.task {
CreateTable(create_table_task) => {
handle_create_table_task(self, create_table_task).await
handle_create_table_task(self, cluster_id, create_table_task).await
}
DropTable(drop_table_task) => {
handle_drop_table_task(self, cluster_id, drop_table_task).await
}
DropTable(drop_table_task) => handle_drop_table_task(self, drop_table_task).await,
AlterTable(alter_table_task) => {
handle_alter_table_task(self, alter_table_task).await
handle_alter_table_task(self, cluster_id, alter_table_task).await
}
TruncateTable(truncate_table_task) => {
handle_truncate_table_task(self, truncate_table_task).await
handle_truncate_table_task(self, cluster_id, truncate_table_task).await
}
CreateLogicalTables(create_table_tasks) => {
handle_create_logical_table_tasks(self, create_table_tasks).await
handle_create_logical_table_tasks(self, cluster_id, create_table_tasks).await
}
AlterLogicalTables(alter_table_tasks) => {
handle_alter_logical_table_tasks(self, alter_table_tasks).await
handle_alter_logical_table_tasks(self, cluster_id, alter_table_tasks).await
}
DropLogicalTables(_) => todo!(),
CreateDatabase(create_database_task) => {
handle_create_database_task(self, create_database_task).await
handle_create_database_task(self, cluster_id, create_database_task).await
}
DropDatabase(drop_database_task) => {
handle_drop_database_task(self, drop_database_task).await
handle_drop_database_task(self, cluster_id, drop_database_task).await
}
AlterDatabase(alter_database_task) => {
handle_alter_database_task(self, alter_database_task).await
handle_alter_database_task(self, cluster_id, alter_database_task).await
}
CreateFlow(create_flow_task) => {
handle_create_flow_task(self, create_flow_task, request.query_context.into())
.await
handle_create_flow_task(
self,
cluster_id,
create_flow_task,
request.query_context.into(),
)
.await
}
DropFlow(drop_flow_task) => {
handle_drop_flow_task(self, cluster_id, drop_flow_task).await
}
DropFlow(drop_flow_task) => handle_drop_flow_task(self, drop_flow_task).await,
CreateView(create_view_task) => {
handle_create_view_task(self, create_view_task).await
handle_create_view_task(self, cluster_id, create_view_task).await
}
DropView(drop_view_task) => {
handle_drop_view_task(self, cluster_id, drop_view_task).await
}
DropView(drop_view_task) => handle_drop_view_task(self, drop_view_task).await,
}
}
.trace(span)

View File

@@ -26,10 +26,11 @@ use crate::flow_name::FlowName;
use crate::key::schema_name::SchemaName;
use crate::key::FlowId;
use crate::peer::Peer;
use crate::{DatanodeId, FlownodeId};
use crate::{ClusterId, DatanodeId, FlownodeId};
#[derive(Eq, Hash, PartialEq, Clone, Debug, Serialize, Deserialize)]
pub struct RegionIdent {
pub cluster_id: ClusterId,
pub datanode_id: DatanodeId,
pub table_id: TableId,
pub region_number: RegionNumber,
@@ -46,8 +47,8 @@ impl Display for RegionIdent {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"RegionIdent(datanode_id='{}', table_id={}, region_number={}, engine = {})",
self.datanode_id, self.table_id, self.region_number, self.engine
"RegionIdent(datanode_id='{}.{}', table_id={}, region_number={}, engine = {})",
self.cluster_id, self.datanode_id, self.table_id, self.region_number, self.engine
)
}
}
@@ -261,6 +262,7 @@ mod tests {
fn test_serialize_instruction() {
let open_region = Instruction::OpenRegion(OpenRegion::new(
RegionIdent {
cluster_id: 1,
datanode_id: 2,
table_id: 1024,
region_number: 1,
@@ -275,11 +277,12 @@ mod tests {
let serialized = serde_json::to_string(&open_region).unwrap();
assert_eq!(
r#"{"OpenRegion":{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}}"#,
r#"{"OpenRegion":{"region_ident":{"cluster_id":1,"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}}"#,
serialized
);
let close_region = Instruction::CloseRegion(RegionIdent {
cluster_id: 1,
datanode_id: 2,
table_id: 1024,
region_number: 1,
@@ -289,7 +292,7 @@ mod tests {
let serialized = serde_json::to_string(&close_region).unwrap();
assert_eq!(
r#"{"CloseRegion":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}}"#,
r#"{"CloseRegion":{"cluster_id":1,"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}}"#,
serialized
);
}
@@ -304,6 +307,7 @@ mod tests {
#[test]
fn test_compatible_serialize_open_region() {
let region_ident = RegionIdent {
cluster_id: 1,
datanode_id: 2,
table_id: 1024,
region_number: 1,

View File

@@ -47,6 +47,8 @@ pub mod test_util;
pub mod util;
pub mod wal_options_allocator;
// The id of the cluster.
pub type ClusterId = u64;
// The id of the datanode.
pub type DatanodeId = u64;
// The id of the flownode.

View File

@@ -99,7 +99,7 @@ impl NodeExpiryListener {
in_memory: &ResettableKvBackendRef,
max_idle_time: Duration,
) -> error::Result<impl Iterator<Item = NodeInfoKey>> {
let prefix = NodeInfoKey::key_prefix();
let prefix = NodeInfoKey::key_prefix_with_cluster_id(0);
let req = RangeRequest::new().with_prefix(prefix);
let current_time_millis = common_time::util::current_time_millis();
let resp = in_memory.range(req).await?;

View File

@@ -19,7 +19,7 @@ use api::v1::meta::Peer as PbPeer;
use serde::{Deserialize, Serialize};
use crate::error::Error;
use crate::{DatanodeId, FlownodeId};
use crate::{ClusterId, DatanodeId, FlownodeId};
#[derive(Debug, Default, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)]
pub struct Peer {
@@ -72,8 +72,8 @@ impl Display for Peer {
/// can query peer given a node id
#[async_trait::async_trait]
pub trait PeerLookupService {
async fn datanode(&self, id: DatanodeId) -> Result<Option<Peer>, Error>;
async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>, Error>;
async fn datanode(&self, cluster_id: ClusterId, id: DatanodeId) -> Result<Option<Peer>, Error>;
async fn flownode(&self, cluster_id: ClusterId, id: FlownodeId) -> Result<Option<Peer>, Error>;
}
pub type PeerLookupServiceRef = Arc<dyn PeerLookupService + Send + Sync>;

View File

@@ -31,6 +31,11 @@ impl ResponseHeader {
self.0.protocol_version
}
#[inline]
pub fn cluster_id(&self) -> u64 {
self.0.cluster_id
}
#[inline]
pub fn error_code(&self) -> i32 {
match self.0.error.as_ref() {
@@ -138,6 +143,7 @@ mod tests {
fn test_response_header_trans() {
let pb_header = PbResponseHeader {
protocol_version: 101,
cluster_id: 1,
error: Some(Error {
code: 100,
err_msg: "test".to_string(),
@@ -146,6 +152,7 @@ mod tests {
let header = ResponseHeader(pb_header);
assert_eq!(101, header.protocol_version());
assert_eq!(1, header.cluster_id());
assert_eq!(100, header.error_code());
assert_eq!("test".to_string(), header.error_msg());
}

View File

@@ -37,7 +37,7 @@ use crate::peer::{Peer, PeerLookupService};
use crate::region_keeper::MemoryRegionKeeper;
use crate::sequence::SequenceBuilder;
use crate::wal_options_allocator::WalOptionsAllocator;
use crate::{DatanodeId, FlownodeId};
use crate::{ClusterId, DatanodeId, FlownodeId};
#[async_trait::async_trait]
pub trait MockDatanodeHandler: Sync + Send + Clone {
@@ -189,11 +189,11 @@ pub struct NoopPeerLookupService;
#[async_trait::async_trait]
impl PeerLookupService for NoopPeerLookupService {
async fn datanode(&self, id: DatanodeId) -> Result<Option<Peer>> {
async fn datanode(&self, _cluster_id: ClusterId, id: DatanodeId) -> Result<Option<Peer>> {
Ok(Some(Peer::empty(id)))
}
async fn flownode(&self, id: FlownodeId) -> Result<Option<Peer>> {
async fn flownode(&self, _cluster_id: ClusterId, id: FlownodeId) -> Result<Option<Peer>> {
Ok(Some(Peer::empty(id)))
}
}

View File

@@ -183,7 +183,7 @@ mod tests {
let expected = regions
.into_iter()
.zip(vec![encoded_wal_options; num_regions as usize])
.collect();
.collect::<HashMap<_, _>>();
assert_eq!(got, expected);
}

View File

@@ -24,6 +24,7 @@ use datatypes::arrow::datatypes::DataType as ArrowDatatype;
use datatypes::error::Error as DataTypeError;
use datatypes::prelude::ConcreteDataType;
use snafu::{Location, Snafu};
use statrs::StatsError;
#[derive(Snafu)]
#[snafu(visibility(pub))]
@@ -37,6 +38,14 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to generate function"))]
GenerateFunction {
#[snafu(source)]
error: StatsError,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Failed to cast scalar value into vector"))]
FromScalarValue {
#[snafu(implicit)]
@@ -88,6 +97,12 @@ pub enum Error {
location: Location,
},
#[snafu(display("unexpected: not constant column"))]
InvalidInputCol {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("General DataFusion error"))]
GeneralDataFusion {
#[snafu(source)]
@@ -233,6 +248,8 @@ impl ErrorExt for Error {
Error::CreateAccumulator { .. }
| Error::DowncastVector { .. }
| Error::InvalidInputState { .. }
| Error::InvalidInputCol { .. }
| Error::GenerateFunction { .. }
| Error::BadAccumulatorImpl { .. }
| Error::ToScalarValue { .. }
| Error::GetScalarVector { .. }

View File

@@ -235,6 +235,7 @@ mod tests {
Instruction::CloseRegion(RegionIdent {
table_id: region_id.table_id(),
region_number: region_id.region_number(),
cluster_id: 1,
datanode_id: 2,
engine: MITO_ENGINE_NAME.to_string(),
})
@@ -245,6 +246,7 @@ mod tests {
RegionIdent {
table_id: region_id.table_id(),
region_number: region_id.region_number(),
cluster_id: 1,
datanode_id: 2,
engine: MITO_ENGINE_NAME.to_string(),
},

View File

@@ -597,7 +597,7 @@ impl fmt::Display for FulltextAnalyzer {
}
/// Skipping options for a column.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
#[serde(rename_all = "kebab-case")]
pub struct SkippingIndexOptions {
/// The granularity of the skip index.
@@ -607,15 +607,6 @@ pub struct SkippingIndexOptions {
pub index_type: SkippingIndexType,
}
impl Default for SkippingIndexOptions {
fn default() -> Self {
Self {
granularity: DEFAULT_GRANULARITY,
index_type: SkippingIndexType::default(),
}
}
}
impl fmt::Display for SkippingIndexOptions {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "granularity={}", self.granularity)?;

View File

@@ -16,7 +16,6 @@ async-trait.workspace = true
bytes.workspace = true
cache.workspace = true
catalog.workspace = true
chrono.workspace = true
client.workspace = true
common-base.workspace = true
common-config.workspace = true

View File

@@ -49,13 +49,12 @@ pub(crate) use crate::adapter::node_context::FlownodeContext;
use crate::adapter::refill::RefillTask;
use crate::adapter::table_source::ManagedTableSource;
use crate::adapter::util::relation_desc_to_column_schemas_with_fallback;
pub(crate) use crate::adapter::worker::{create_worker, WorkerHandle};
pub(crate) use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
use crate::compute::ErrCollector;
use crate::df_optimizer::sql_to_flow_plan;
use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, InvalidQuerySnafu, UnexpectedSnafu};
use crate::expr::Batch;
use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_ROWS, METRIC_FLOW_RUN_INTERVAL_MS};
use crate::recording_rules::RecordingRuleEngine;
use crate::repr::{self, DiffRow, RelationDesc, Row, BATCH_SIZE};
mod flownode_impl;
@@ -64,7 +63,7 @@ pub(crate) mod refill;
mod stat;
#[cfg(test)]
mod tests;
pub(crate) mod util;
mod util;
mod worker;
pub(crate) mod node_context;
@@ -104,6 +103,7 @@ impl Default for FlowConfig {
#[serde(default)]
pub struct FlownodeOptions {
pub mode: Mode,
pub cluster_id: Option<u64>,
pub node_id: Option<u64>,
pub flow: FlowConfig,
pub grpc: GrpcOptions,
@@ -118,6 +118,7 @@ impl Default for FlownodeOptions {
fn default() -> Self {
Self {
mode: servers::Mode::Standalone,
cluster_id: None,
node_id: None,
flow: FlowConfig::default(),
grpc: GrpcOptions::default().with_bind_addr("127.0.0.1:3004"),
@@ -170,8 +171,6 @@ pub struct FlowWorkerManager {
flush_lock: RwLock<()>,
/// receive a oneshot sender to send state size report
state_report_handler: RwLock<Option<StateReportHandler>>,
/// engine for recording rule
rule_engine: RecordingRuleEngine,
}
/// Building FlownodeManager
@@ -186,7 +185,6 @@ impl FlowWorkerManager {
node_id: Option<u32>,
query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef,
rule_engine: RecordingRuleEngine,
) -> Self {
let srv_map = ManagedTableSource::new(
table_meta.table_info_manager().clone(),
@@ -209,7 +207,6 @@ impl FlowWorkerManager {
node_id,
flush_lock: RwLock::new(()),
state_report_handler: RwLock::new(None),
rule_engine,
}
}
@@ -218,6 +215,25 @@ impl FlowWorkerManager {
self
}
/// Create a flownode manager with one worker
pub fn new_with_workers<'s>(
node_id: Option<u32>,
query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef,
num_workers: usize,
) -> (Self, Vec<Worker<'s>>) {
let mut zelf = Self::new(node_id, query_engine, table_meta);
let workers: Vec<_> = (0..num_workers)
.map(|_| {
let (handle, worker) = create_worker();
zelf.add_worker_handle(handle);
worker
})
.collect();
(zelf, workers)
}
/// add a worker handler to manager, meaning this corresponding worker is under it's manage
pub fn add_worker_handle(&mut self, handle: WorkerHandle) {
self.worker_handles.push(handle);
@@ -735,11 +751,7 @@ pub struct CreateFlowArgs {
/// Create&Remove flow
impl FlowWorkerManager {
/// remove a flow by it's id
#[allow(unreachable_code)]
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
// TODO(discord9): reroute some back to streaming engine later
return self.rule_engine.remove_flow(flow_id).await;
for handle in self.worker_handles.iter() {
if handle.contains_flow(flow_id).await? {
handle.remove_flow(flow_id).await?;
@@ -755,10 +767,8 @@ impl FlowWorkerManager {
/// steps to create task:
/// 1. parse query into typed plan(and optional parse expire_after expr)
/// 2. render source/sink with output table id and used input table id
#[allow(clippy::too_many_arguments, unreachable_code)]
#[allow(clippy::too_many_arguments)]
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
// TODO(discord9): reroute some back to streaming engine later
return self.rule_engine.create_flow(args).await;
let CreateFlowArgs {
flow_id,
sink_table_name,

View File

@@ -153,13 +153,7 @@ impl Flownode for FlowWorkerManager {
}
}
#[allow(unreachable_code, unused)]
async fn handle_inserts(&self, request: InsertRequests) -> Result<FlowResponse> {
return self
.rule_engine
.handle_inserts(request)
.await
.map_err(to_meta_err(snafu::location!()));
// using try_read to ensure two things:
// 1. flush wouldn't happen until inserts before it is inserted
// 2. inserts happening concurrently with flush wouldn't be block by flush
@@ -212,15 +206,15 @@ impl Flownode for FlowWorkerManager {
.collect_vec();
let table_col_names = table_schema.relation_desc.names;
let table_col_names = table_col_names
.iter().enumerate()
.map(|(idx,name)| match name {
Some(name) => Ok(name.clone()),
None => InternalSnafu {
reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
}
.fail().map_err(BoxedError::new).context(ExternalSnafu),
})
.collect::<Result<Vec<_>>>()?;
.iter().enumerate()
.map(|(idx,name)| match name {
Some(name) => Ok(name.clone()),
None => InternalSnafu {
reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
}
.fail().map_err(BoxedError::new).context(ExternalSnafu),
})
.collect::<Result<Vec<_>>>()?;
let name_to_col = HashMap::<_, _>::from_iter(
insert_schema
.iter()

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! Some utility functions
use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper;

View File

@@ -16,7 +16,6 @@
use std::any::Any;
use arrow_schema::ArrowError;
use common_error::ext::BoxedError;
use common_error::{define_into_tonic_status, from_err_code_msg_to_header};
use common_macro::stack_trace_debug;
@@ -54,13 +53,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Time error"))]
Time {
source: common_time::error::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("External error"))]
External {
source: BoxedError,
@@ -164,15 +156,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Arrow error: {raw:?} in context: {context}"))]
Arrow {
#[snafu(source)]
raw: ArrowError,
context: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Datafusion error: {raw:?} in context: {context}"))]
Datafusion {
#[snafu(source)]
@@ -247,7 +230,6 @@ impl ErrorExt for Error {
match self {
Self::Eval { .. }
| Self::JoinTask { .. }
| Self::Arrow { .. }
| Self::Datafusion { .. }
| Self::InsertIntoFlow { .. } => StatusCode::Internal,
Self::FlowAlreadyExist { .. } => StatusCode::TableAlreadyExists,
@@ -256,9 +238,7 @@ impl ErrorExt for Error {
| Self::FlowNotFound { .. }
| Self::ListFlows { .. } => StatusCode::TableNotFound,
Self::Plan { .. } | Self::Datatypes { .. } => StatusCode::PlanQuery,
Self::InvalidQuery { .. } | Self::CreateFlow { .. } | Self::Time { .. } => {
StatusCode::EngineExecuteQuery
}
Self::InvalidQuery { .. } | Self::CreateFlow { .. } => StatusCode::EngineExecuteQuery,
Self::Unexpected { .. } => StatusCode::Unexpected,
Self::NotImplemented { .. } | Self::UnsupportedTemporalFilter { .. } => {
StatusCode::Unsupported

View File

@@ -238,7 +238,6 @@ mod test {
for (sql, current, expected) in &testcases {
let plan = sql_to_substrait(engine.clone(), sql).await;
let mut ctx = create_test_ctx();
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan)
.await

View File

@@ -130,6 +130,13 @@ impl HeartbeatTask {
pub fn shutdown(&self) {
info!("Close heartbeat task for flownode");
if self
.running
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
.is_err()
{
warn!("Call close heartbeat task multiple times");
}
}
fn new_heartbeat_request(

View File

@@ -33,7 +33,6 @@ mod expr;
pub mod heartbeat;
mod metrics;
mod plan;
mod recording_rules;
mod repr;
mod server;
mod transform;
@@ -44,5 +43,4 @@ mod test_utils;
pub use adapter::{FlowConfig, FlowWorkerManager, FlowWorkerManagerRef, FlownodeOptions};
pub use error::{Error, Result};
pub use recording_rules::FrontendClient;
pub use server::{FlownodeBuilder, FlownodeInstance, FlownodeServer, FrontendInvoker};

View File

@@ -28,32 +28,6 @@ lazy_static! {
&["table_id"]
)
.unwrap();
pub static ref METRIC_FLOW_RULE_ENGINE_QUERY_TIME: HistogramVec = register_histogram_vec!(
"greptime_flow_rule_engine_query_time",
"flow rule engine query time",
&["flow_id"],
vec![
0.0,
1.,
3.,
5.,
10.,
20.,
30.,
60.,
2. * 60.,
5. * 60.,
10. * 60.
]
)
.unwrap();
pub static ref METRIC_FLOW_RULE_ENGINE_SLOW_QUERY: HistogramVec = register_histogram_vec!(
"greptime_flow_rule_engine_slow_query",
"flow rule engine slow query",
&["flow_id", "sql", "peer"],
vec![60., 2. * 60., 3. * 60., 5. * 60., 10. * 60.]
)
.unwrap();
pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge =
register_int_gauge!("greptime_flow_run_interval_ms", "flow run interval in ms").unwrap();
pub static ref METRIC_FLOW_ROWS: IntCounterVec = register_int_counter_vec!(

View File

@@ -1,940 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Run flow as recording rule which is time-window-aware normal query triggered every tick set by user
mod engine;
mod frontend_client;
use std::collections::BTreeSet;
use std::sync::Arc;
use api::helper::pb_value_to_value_ref;
use catalog::CatalogManagerRef;
use common_error::ext::BoxedError;
use common_recordbatch::DfRecordBatch;
use common_telemetry::warn;
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use datafusion::error::Result as DfResult;
use datafusion::logical_expr::Expr;
use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
use datafusion::prelude::SessionContext;
use datafusion::sql::unparser::Unparser;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter};
use datafusion_common::{DFSchema, TableReference};
use datafusion_expr::{ColumnarValue, LogicalPlan};
use datafusion_physical_expr::PhysicalExprRef;
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::scalars::ScalarVector;
use datatypes::schema::TIME_INDEX_KEY;
use datatypes::value::Value;
use datatypes::vectors::{
TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
TimestampSecondVector, Vector,
};
pub use engine::RecordingRuleEngine;
pub use frontend_client::FrontendClient;
use itertools::Itertools;
use query::parser::QueryLanguageParser;
use query::QueryEngineRef;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use crate::adapter::util::from_proto_to_data_type;
use crate::df_optimizer::apply_df_optimizer;
use crate::error::{ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, UnexpectedSnafu};
use crate::expr::error::DataTypeSnafu;
use crate::Error;
#[derive(Debug, Clone)]
pub struct TimeWindowExpr {
phy_expr: PhysicalExprRef,
column_name: String,
logical_expr: Expr,
df_schema: DFSchema,
}
impl TimeWindowExpr {
pub fn from_expr(expr: &Expr, column_name: &str, df_schema: &DFSchema) -> Result<Self, Error> {
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
Ok(Self {
phy_expr,
column_name: column_name.to_string(),
logical_expr: expr.clone(),
df_schema: df_schema.clone(),
})
}
pub fn eval(
&self,
current: Timestamp,
) -> Result<(Option<Timestamp>, Option<Timestamp>), Error> {
let lower_bound =
find_expr_time_window_lower_bound(&self.logical_expr, &self.df_schema, current)?;
let upper_bound =
find_expr_time_window_upper_bound(&self.logical_expr, &self.df_schema, current)?;
Ok((lower_bound, upper_bound))
}
/// Find timestamps from rows using time window expr
pub async fn handle_rows(
&self,
rows_list: Vec<api::v1::Rows>,
) -> Result<BTreeSet<Timestamp>, Error> {
let mut time_windows = BTreeSet::new();
for rows in rows_list {
// pick the time index column and use it to eval on `self.expr`
let ts_col_index = rows
.schema
.iter()
.map(|col| col.column_name.clone())
.position(|name| name == self.column_name);
let Some(ts_col_index) = ts_col_index else {
warn!("can't found time index column in schema: {:?}", rows.schema);
continue;
};
let col_schema = &rows.schema[ts_col_index];
let cdt = from_proto_to_data_type(col_schema)?;
let column_values = rows
.rows
.iter()
.map(|row| &row.values[ts_col_index])
.collect_vec();
let mut vector = cdt.create_mutable_vector(column_values.len());
for value in column_values {
let value = pb_value_to_value_ref(value, &None);
vector.try_push_value_ref(value).context(DataTypeSnafu {
msg: "Failed to convert rows to columns",
})?;
}
let vector = vector.to_vector();
let df_schema = create_df_schema_for_ts_column(&self.column_name, cdt)?;
let rb =
DfRecordBatch::try_new(df_schema.inner().clone(), vec![vector.to_arrow_array()])
.with_context(|_e| ArrowSnafu {
context: format!(
"Failed to create record batch from {df_schema:?} and {vector:?}"
),
})?;
let eval_res = self
.phy_expr
.evaluate(&rb)
.with_context(|_| DatafusionSnafu {
context: format!(
"Failed to evaluate physical expression {:?} on {rb:?}",
self.phy_expr
),
})?;
let res = columnar_to_ts_vector(&eval_res)?;
for ts in res.into_iter().flatten() {
time_windows.insert(ts);
}
}
Ok(time_windows)
}
}
fn create_df_schema_for_ts_column(name: &str, cdt: ConcreteDataType) -> Result<DFSchema, Error> {
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
name,
cdt.as_arrow_type(),
false,
)]));
let df_schema = DFSchema::from_field_specific_qualified_schema(
vec![Some(TableReference::bare("TimeIndexOnlyTable"))],
&arrow_schema,
)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
})?;
Ok(df_schema)
}
/// Convert `ColumnarValue` to `Vec<Option<Timestamp>>`
fn columnar_to_ts_vector(columnar: &ColumnarValue) -> Result<Vec<Option<Timestamp>>, Error> {
let val = match columnar {
datafusion_expr::ColumnarValue::Array(array) => {
let ty = array.data_type();
let ty = ConcreteDataType::from_arrow_type(ty);
let time_unit = if let ConcreteDataType::Timestamp(ty) = ty {
ty.unit()
} else {
return UnexpectedSnafu {
reason: format!("Non-timestamp type: {ty:?}"),
}
.fail();
};
match time_unit {
TimeUnit::Second => TimestampSecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec(),
TimeUnit::Millisecond => {
TimestampMillisecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec()
}
TimeUnit::Microsecond => {
TimestampMicrosecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec()
}
TimeUnit::Nanosecond => {
TimestampNanosecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec()
}
}
}
datafusion_expr::ColumnarValue::Scalar(scalar) => {
let value = Value::try_from(scalar.clone()).with_context(|_| DatatypesSnafu {
extra: format!("Failed to convert scalar {scalar:?} to value"),
})?;
let ts = value.as_timestamp().context(UnexpectedSnafu {
reason: format!("Expect Timestamp, found {:?}", value),
})?;
vec![Some(ts)]
}
};
Ok(val)
}
/// Convert sql to datafusion logical plan
pub async fn sql_to_df_plan(
query_ctx: QueryContextRef,
engine: QueryEngineRef,
sql: &str,
optimize: bool,
) -> Result<LogicalPlan, Error> {
let stmt = QueryLanguageParser::parse_sql(sql, &query_ctx)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let plan = engine
.planner()
.plan(&stmt, query_ctx)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let plan = if optimize {
apply_df_optimizer(plan).await?
} else {
plan
};
Ok(plan)
}
/// Return (the column name of time index column, the time window expr, the expected time unit of time index column, the expr's schema for evaluating the time window)
async fn find_time_window_expr(
plan: &LogicalPlan,
catalog_man: CatalogManagerRef,
query_ctx: QueryContextRef,
) -> Result<(String, Option<datafusion_expr::Expr>, TimeUnit, DFSchema), Error> {
// TODO(discord9): find the expr that do time window
let mut table_name = None;
// first find the table source in the logical plan
plan.apply(|plan| {
let LogicalPlan::TableScan(table_scan) = plan else {
return Ok(TreeNodeRecursion::Continue);
};
table_name = Some(table_scan.table_name.clone());
Ok(TreeNodeRecursion::Stop)
})
.with_context(|_| DatafusionSnafu {
context: format!("Can't find table source in plan {plan:?}"),
})?;
let Some(table_name) = table_name else {
UnexpectedSnafu {
reason: format!("Can't find table source in plan {plan:?}"),
}
.fail()?
};
let current_schema = query_ctx.current_schema();
let catalog_name = table_name.catalog().unwrap_or(query_ctx.current_catalog());
let schema_name = table_name.schema().unwrap_or(&current_schema);
let table_name = table_name.table();
let Some(table_ref) = catalog_man
.table(catalog_name, schema_name, table_name, Some(&query_ctx))
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
else {
UnexpectedSnafu {
reason: format!(
"Can't find table {table_name:?} in catalog {catalog_name:?}/{schema_name:?}"
),
}
.fail()?
};
let schema = &table_ref.table_info().meta.schema;
let ts_index = schema.timestamp_column().context(UnexpectedSnafu {
reason: format!("Can't find timestamp column in table {table_name:?}"),
})?;
let ts_col_name = ts_index.name.clone();
let expected_time_unit = ts_index.data_type.as_timestamp().with_context(|| UnexpectedSnafu {
reason: format!(
"Expected timestamp column {ts_col_name:?} in table {table_name:?} to be timestamp, but got {ts_index:?}"
),
})?.unit();
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
ts_col_name.clone(),
ts_index.data_type.as_arrow_type(),
false,
)]));
let df_schema = DFSchema::from_field_specific_qualified_schema(
vec![Some(TableReference::bare(table_name))],
&arrow_schema,
)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
})?;
// find the time window expr which refers to the time index column
let mut aggr_expr = None;
let mut time_window_expr: Option<Expr> = None;
let find_inner_aggr_expr = |plan: &LogicalPlan| {
if let LogicalPlan::Aggregate(aggregate) = plan {
aggr_expr = Some(aggregate.clone());
};
Ok(TreeNodeRecursion::Continue)
};
plan.apply(find_inner_aggr_expr)
.with_context(|_| DatafusionSnafu {
context: format!("Can't find aggr expr in plan {plan:?}"),
})?;
if let Some(aggregate) = aggr_expr {
for group_expr in &aggregate.group_expr {
let refs = group_expr.column_refs();
if refs.len() != 1 {
continue;
}
let ref_col = refs.iter().next().unwrap();
let index = aggregate.input.schema().maybe_index_of_column(ref_col);
let Some(index) = index else {
continue;
};
let field = aggregate.input.schema().field(index);
let is_time_index = field.metadata().get(TIME_INDEX_KEY) == Some(&"true".to_string());
if is_time_index {
let rewrite_column = group_expr.clone();
let rewritten = rewrite_column
.rewrite(&mut RewriteColumn {
table_name: table_name.to_string(),
})
.with_context(|_| DatafusionSnafu {
context: format!("Rewrite expr failed, expr={:?}", group_expr),
})?
.data;
struct RewriteColumn {
table_name: String,
}
impl TreeNodeRewriter for RewriteColumn {
type Node = Expr;
fn f_down(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
let Expr::Column(mut column) = node else {
return Ok(Transformed::no(node));
};
column.relation = Some(TableReference::bare(self.table_name.clone()));
Ok(Transformed::yes(Expr::Column(column)))
}
}
time_window_expr = Some(rewritten);
break;
}
}
Ok((ts_col_name, time_window_expr, expected_time_unit, df_schema))
} else {
// can't found time window expr, return None
Ok((ts_col_name, None, expected_time_unit, df_schema))
}
}
/// Find nearest lower bound for time `current` in given `plan` for the time window expr.
/// i.e. for time window expr being `date_bin(INTERVAL '5 minutes', ts) as time_window` and `current="2021-07-01 00:01:01.000"`,
/// return `Some("2021-07-01 00:00:00.000")`
/// if `plan` doesn't contain a `TIME INDEX` column, return `None`
///
/// Time window expr is a expr that:
/// 1. ref only to a time index column
/// 2. is monotonic increasing
/// 3. show up in GROUP BY clause
///
/// note this plan should only contain one TableScan
pub async fn find_plan_time_window_bound(
plan: &LogicalPlan,
current: Timestamp,
query_ctx: QueryContextRef,
engine: QueryEngineRef,
) -> Result<(String, Option<Timestamp>, Option<Timestamp>), Error> {
// TODO(discord9): find the expr that do time window
let catalog_man = engine.engine_state().catalog_manager();
let (ts_col_name, time_window_expr, expected_time_unit, df_schema) =
find_time_window_expr(plan, catalog_man.clone(), query_ctx).await?;
// cast current to ts_index's type
let new_current = current
.convert_to(expected_time_unit)
.with_context(|| UnexpectedSnafu {
reason: format!("Failed to cast current timestamp {current:?} to {expected_time_unit}"),
})?;
// if no time_window_expr is found, return None
if let Some(time_window_expr) = time_window_expr {
let lower_bound =
find_expr_time_window_lower_bound(&time_window_expr, &df_schema, new_current)?;
let upper_bound =
find_expr_time_window_upper_bound(&time_window_expr, &df_schema, new_current)?;
Ok((ts_col_name, lower_bound, upper_bound))
} else {
Ok((ts_col_name, None, None))
}
}
/// Find the lower bound of time window in given `expr` and `current` timestamp.
///
/// i.e. for `current="2021-07-01 00:01:01.000"` and `expr=date_bin(INTERVAL '5 minutes', ts) as time_window` and `ts_col=ts`,
/// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
/// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
/// of current time window given the current timestamp
///
/// if return None, meaning this time window have no lower bound
fn find_expr_time_window_lower_bound(
expr: &Expr,
df_schema: &DFSchema,
current: Timestamp,
) -> Result<Option<Timestamp>, Error> {
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
let input_time_unit = cur_time_window.unit();
Ok(cur_time_window.convert_to(input_time_unit))
}
/// Find the upper bound for time window expression
fn find_expr_time_window_upper_bound(
expr: &Expr,
df_schema: &DFSchema,
current: Timestamp,
) -> Result<Option<Timestamp>, Error> {
use std::cmp::Ordering;
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
// search to find the lower bound
let mut offset: i64 = 1;
let mut lower_bound = Some(current);
let upper_bound;
// first expontial probe to found a range for binary search
loop {
let Some(next_val) = current.value().checked_add(offset) else {
// no upper bound if overflow
return Ok(None);
};
let next_time_probe = common_time::Timestamp::new(next_val, current.unit());
let next_time_window = eval_ts_to_ts(&phy_expr, df_schema, next_time_probe)?;
match next_time_window.cmp(&cur_time_window) {
Ordering::Less => {UnexpectedSnafu {
reason: format!(
"Unsupported time window expression, expect monotonic increasing for time window expression {expr:?}"
),
}
.fail()?
}
Ordering::Equal => {
lower_bound = Some(next_time_probe);
}
Ordering::Greater => {
upper_bound = Some(next_time_probe);
break
}
}
let Some(new_offset) = offset.checked_mul(2) else {
// no upper bound if overflow
return Ok(None);
};
offset = new_offset;
}
// binary search for the exact upper bound
ensure!(lower_bound.map(|v|v.unit())==upper_bound.map(|v|v.unit()), UnexpectedSnafu{
reason: format!(" unit mismatch for time window expression {expr:?}, found {lower_bound:?} and {upper_bound:?}"),
});
let output_unit = upper_bound
.context(UnexpectedSnafu {
reason: "should have lower bound",
})?
.unit();
let mut low = lower_bound
.context(UnexpectedSnafu {
reason: "should have lower bound",
})?
.value();
let mut high = upper_bound
.context(UnexpectedSnafu {
reason: "should have upper bound",
})?
.value();
while low < high {
let mid = (low + high) / 2;
let mid_probe = common_time::Timestamp::new(mid, output_unit);
let mid_time_window = eval_ts_to_ts(&phy_expr, df_schema, mid_probe)?;
match mid_time_window.cmp(&cur_time_window) {
Ordering::Less => UnexpectedSnafu {
reason: format!("Binary search failed for time window expression {expr:?}"),
}
.fail()?,
Ordering::Equal => low = mid + 1,
Ordering::Greater => high = mid,
}
}
let final_upper_bound_for_time_window = common_time::Timestamp::new(high, output_unit);
Ok(Some(final_upper_bound_for_time_window))
}
fn eval_ts_to_ts(
phy: &PhysicalExprRef,
df_schema: &DFSchema,
input_value: Timestamp,
) -> Result<Timestamp, Error> {
let schema_ty = df_schema.field(0).data_type();
let schema_cdt = ConcreteDataType::from_arrow_type(schema_ty);
let schema_unit = if let ConcreteDataType::Timestamp(ts) = schema_cdt {
ts.unit()
} else {
return UnexpectedSnafu {
reason: format!("Expect Timestamp, found {:?}", schema_cdt),
}
.fail();
};
let input_value = input_value
.convert_to(schema_unit)
.with_context(|| UnexpectedSnafu {
reason: format!("Failed to convert timestamp {input_value:?} to {schema_unit}"),
})?;
let ts_vector = match schema_unit {
TimeUnit::Second => {
TimestampSecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Millisecond => {
TimestampMillisecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Microsecond => {
TimestampMicrosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Nanosecond => {
TimestampNanosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
};
let rb = DfRecordBatch::try_new(df_schema.inner().clone(), vec![ts_vector.clone()])
.with_context(|_| ArrowSnafu {
context: format!("Failed to create record batch from {df_schema:?} and {ts_vector:?}"),
})?;
let eval_res = phy.evaluate(&rb).with_context(|_| DatafusionSnafu {
context: format!("Failed to evaluate physical expression {phy:?} on {rb:?}"),
})?;
if let Some(Some(ts)) = columnar_to_ts_vector(&eval_res)?.first() {
Ok(*ts)
} else {
UnexpectedSnafu {
reason: format!(
"Expected timestamp in expression {phy:?} but got {:?}",
eval_res
),
}
.fail()?
}
}
// TODO(discord9): a method to found out the precise time window
/// Find out the `Filter` Node corresponding to outermost `WHERE` and add a new filter expr to it
#[derive(Debug)]
pub struct AddFilterRewriter {
extra_filter: Expr,
is_rewritten: bool,
}
impl AddFilterRewriter {
fn new(filter: Expr) -> Self {
Self {
extra_filter: filter,
is_rewritten: false,
}
}
}
impl TreeNodeRewriter for AddFilterRewriter {
type Node = LogicalPlan;
fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
if self.is_rewritten {
return Ok(Transformed::no(node));
}
match node {
LogicalPlan::Filter(mut filter) if !filter.having => {
filter.predicate = filter.predicate.and(self.extra_filter.clone());
self.is_rewritten = true;
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
}
LogicalPlan::TableScan(_) => {
// add a new filter
let filter =
datafusion_expr::Filter::try_new(self.extra_filter.clone(), Arc::new(node))?;
self.is_rewritten = true;
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
}
_ => Ok(Transformed::no(node)),
}
}
}
fn df_plan_to_sql(plan: &LogicalPlan) -> Result<String, Error> {
/// A dialect that forces all identifiers to be quoted
struct ForceQuoteIdentifiers;
impl datafusion::sql::unparser::dialect::Dialect for ForceQuoteIdentifiers {
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
if identifier.to_lowercase() != identifier {
Some('"')
} else {
None
}
}
}
let unparser = Unparser::new(&ForceQuoteIdentifiers);
// first make all column qualified
let sql = unparser
.plan_to_sql(plan)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to unparse logical plan {plan:?}"),
})?;
Ok(sql.to_string())
}
#[cfg(test)]
mod test {
use datafusion_common::tree_node::TreeNode;
use pretty_assertions::assert_eq;
use session::context::QueryContext;
use super::{sql_to_df_plan, *};
use crate::recording_rules::{df_plan_to_sql, AddFilterRewriter};
use crate::test_utils::create_test_query_engine;
#[tokio::test]
async fn test_sql_plan_convert() {
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
let old = r#"SELECT "NUMBER" FROM "UPPERCASE_NUMBERS_WITH_TS""#;
let new = sql_to_df_plan(ctx.clone(), query_engine.clone(), old, false)
.await
.unwrap();
let new_sql = df_plan_to_sql(&new).unwrap();
assert_eq!(
r#"SELECT "UPPERCASE_NUMBERS_WITH_TS"."NUMBER" FROM "UPPERCASE_NUMBERS_WITH_TS""#,
new_sql
);
}
#[tokio::test]
async fn test_add_filter() {
let testcases = vec![
(
"SELECT number FROM numbers_with_ts GROUP BY number","SELECT numbers_with_ts.number FROM numbers_with_ts WHERE (number > 4) GROUP BY numbers_with_ts.number"
),
(
"SELECT number FROM numbers_with_ts WHERE number < 2 OR number >10",
"SELECT numbers_with_ts.number FROM numbers_with_ts WHERE ((numbers_with_ts.number < 2) OR (numbers_with_ts.number > 10)) AND (number > 4)"
),
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window",
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE (number > 4) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
)
];
use datafusion_expr::{col, lit};
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
for (before, after) in testcases {
let sql = before;
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
.await
.unwrap();
let mut add_filter = AddFilterRewriter::new(col("number").gt(lit(4u32)));
let plan = plan.rewrite(&mut add_filter).unwrap().data;
let new_sql = df_plan_to_sql(&plan).unwrap();
assert_eq!(after, new_sql);
}
}
#[tokio::test]
async fn test_plan_time_window_lower_bound() {
use datafusion_expr::{col, lit};
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
let testcases = [
// same alias is not same column
(
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts GROUP BY ts;",
Timestamp::new(1740394109, TimeUnit::Second),
(
"ts".to_string(),
Some(Timestamp::new(1740394109000, TimeUnit::Millisecond)),
Some(Timestamp::new(1740394109001, TimeUnit::Millisecond)),
),
r#"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:29' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:48:29.001' AS TIMESTAMP))) GROUP BY numbers_with_ts.ts"#
),
// complex time window index
(
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(1740394109, TimeUnit::Second),
(
"ts".to_string(),
Some(Timestamp::new(1740394080, TimeUnit::Second)),
Some(Timestamp::new(1740394140, TimeUnit::Second)),
),
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')"
),
// no time index
(
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;",
Timestamp::new(23, TimeUnit::Millisecond),
("ts".to_string(), None, None),
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;"
),
// time index
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// on spot
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(0, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// different time unit
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23_000_000, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// time index with other fields
(
"SELECT sum(number) as sum_up, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT sum(numbers_with_ts.number) AS sum_up, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// time index with other pks
(
"SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number"
),
// subquery
(
"SELECT number, time_window FROM (SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number);",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT numbers_with_ts.number, time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number)"
),
// cte
(
"with cte as (select number, date_bin('5 minutes', ts) as time_window from numbers_with_ts GROUP BY time_window, number) select number, time_window from cte;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT cte.number, cte.time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number) AS cte"
),
// complex subquery without alias
(
"SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) GROUP BY number, time_window, bucket_name;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT sum(numbers_with_ts.number), numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window, bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) GROUP BY numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts), bucket_name"
),
// complex subquery alias
(
"SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) as cte GROUP BY number, time_window, bucket_name;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT sum(cte.number), cte.number, date_bin('5 minutes', cte.ts) AS time_window, cte.bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) AS cte GROUP BY cte.number, date_bin('5 minutes', cte.ts), cte.bucket_name"
),
];
for (sql, current, expected, expected_unparsed) in testcases {
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, true)
.await
.unwrap();
let real =
find_plan_time_window_bound(&plan, current, ctx.clone(), query_engine.clone())
.await
.unwrap();
assert_eq!(expected, real);
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
.await
.unwrap();
let (col_name, lower, upper) = real;
let new_sql = if lower.is_some() {
let to_df_literal = |value| {
let value = Value::from(value);
value.try_to_scalar_value(&value.data_type()).unwrap()
};
let lower = to_df_literal(lower.unwrap());
let upper = to_df_literal(upper.unwrap());
let expr = col(&col_name)
.gt_eq(lit(lower))
.and(col(&col_name).lt_eq(lit(upper)));
let mut add_filter = AddFilterRewriter::new(expr);
let plan = plan.rewrite(&mut add_filter).unwrap().data;
df_plan_to_sql(&plan).unwrap()
} else {
sql.to_string()
};
assert_eq!(expected_unparsed, new_sql);
}
}
}

Some files were not shown because too many files have changed in this diff Show More