Compare commits

..

3 Commits

Author SHA1 Message Date
LFC
87d32fc255 fix: show tables too slow cherry pick (#7244)
v0.12.1

fix: `show tables` is too slow under large tables

Signed-off-by: luofucong <luofc@foxmail.com>
2025-11-18 12:29:40 +00:00
LFC
503585ba42 ci: update github runner on old branch (#7249)
* ci: update github runner (ubuntu-20.04 -> ubuntu-22.04)

Signed-off-by: luofucong <luofc@foxmail.com>
2025-11-18 19:52:03 +08:00
Lei, HUANG
ee2a7a5f10 fix: check if memtable is empty by stats (#5989)
fix/checking-memtable-empty-and-stats:
 - **Refactor timestamp updates**: Simplified timestamp range updates in `PartitionTreeMemtable` and `TimeSeriesMemtable` by replacing `update_timestamp_range` with `fetch_max` and `fetch_min` methods for `max_timestamp` and `min_timestamp`.
   - Affected files: `partition_tree.rs`, `time_series.rs`

 - **Remove unused code**: Deleted the `update_timestamp_range` method from `WriteMetrics` and removed unnecessary imports.
   - Affected file: `stats.rs`

 - **Optimize memtable filtering**: Streamlined the check for empty memtables in `ScanRegion` by directly using `time_range`.
   - Affected file: `scan_region.rs`

(cherry picked from commit 1a517ec8ac)
2025-06-04 11:36:42 +08:00
150 changed files with 1214 additions and 5272 deletions

View File

@@ -24,4 +24,9 @@ runs:
--set auth.rbac.token.enabled=false \
--set persistence.size=2Gi \
--create-namespace \
--set global.security.allowInsecureImages=true \
--set image.registry=docker.io \
--set image.repository=greptime/etcd \
--set image.tag=3.6.1-debian-12-r3 \
--version 12.0.8 \
-n ${{ inputs.namespace }}

View File

@@ -51,7 +51,7 @@ runs:
run: |
helm upgrade \
--install my-greptimedb \
--set meta.etcdEndpoints=${{ inputs.etcd-endpoints }} \
--set meta.backendStorage.etcd.endpoints=${{ inputs.etcd-endpoints }} \
--set meta.enableRegionFailover=${{ inputs.enable-region-failover }} \
--set image.registry=${{ inputs.image-registry }} \
--set image.repository=${{ inputs.image-repository }} \

View File

@@ -23,4 +23,8 @@ runs:
--set listeners.controller.protocol=PLAINTEXT \
--set listeners.client.protocol=PLAINTEXT \
--create-namespace \
--set image.registry=docker.io \
--set image.repository=greptime/kafka \
--set image.tag=3.9.0-debian-12-r1 \
--version 31.0.0 \
-n ${{ inputs.namespace }}

34
.github/scripts/pull-test-deps-images.sh vendored Executable file
View File

@@ -0,0 +1,34 @@
#!/bin/bash
# This script is used to pull the test dependency images that are stored in public ECR one by one to avoid rate limiting.
set -e
MAX_RETRIES=3
IMAGES=(
"greptime/zookeeper:3.7"
"greptime/kafka:3.9.0-debian-12-r1"
"greptime/etcd:3.6.1-debian-12-r3"
"greptime/minio:2024"
"greptime/mysql:5.7"
)
for image in "${IMAGES[@]}"; do
for ((attempt=1; attempt<=MAX_RETRIES; attempt++)); do
if docker pull "$image"; then
# Successfully pulled the image.
break
else
# Use some simple exponential backoff to avoid rate limiting.
if [ $attempt -lt $MAX_RETRIES ]; then
sleep_seconds=$((attempt * 5))
echo "Attempt $attempt failed for $image, waiting $sleep_seconds seconds"
sleep $sleep_seconds # 5s, 10s delays
else
echo "Failed to pull $image after $MAX_RETRIES attempts"
exit 1
fi
fi
done
done

View File

@@ -14,7 +14,7 @@ name: Build API docs
jobs:
apidoc:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:

View File

@@ -16,11 +16,11 @@ on:
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-20.04
- ubuntu-20.04-8-cores
- ubuntu-20.04-16-cores
- ubuntu-20.04-32-cores
- ubuntu-20.04-64-cores
- ubuntu-22.04
- ubuntu-22.04-8-cores
- ubuntu-22.04-16-cores
- ubuntu-22.04-32-cores
- ubuntu-22.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
@@ -83,7 +83,7 @@ jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -218,7 +218,7 @@ jobs:
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
outputs:
build-result: ${{ steps.set-build-result.outputs.build-result }}
steps:
@@ -251,7 +251,7 @@ jobs:
allocate-runners,
release-images-to-dockerhub,
]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
continue-on-error: true
steps:
- uses: actions/checkout@v4
@@ -283,7 +283,7 @@ jobs:
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
needs: [
allocate-runners,
build-linux-amd64-artifacts,
@@ -309,7 +309,7 @@ jobs:
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
needs: [
allocate-runners,
build-linux-arm64-artifacts,
@@ -337,7 +337,7 @@ jobs:
needs: [
release-images-to-dockerhub
]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
permissions:
issues: write

View File

@@ -21,14 +21,13 @@ concurrency:
cancel-in-progress: true
jobs:
check-typos-and-docs:
name: Check typos and docs
runs-on: ubuntu-20.04
check-docs:
name: Check docs
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: crate-ci/typos@master
- name: Check the config docs
run: |
make config-docs && \
@@ -36,7 +35,7 @@ jobs:
|| (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)
license-header-check:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
name: Check License Header
steps:
- uses: actions/checkout@v4
@@ -49,7 +48,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
os: [ ubuntu-22.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -72,7 +71,7 @@ jobs:
toml:
name: Toml Check
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -89,7 +88,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
os: [ ubuntu-22.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -248,7 +247,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
os: [ ubuntu-22.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -568,7 +567,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
os: [ ubuntu-22.04 ]
mode:
- name: "Basic"
opts: ""
@@ -587,7 +586,8 @@ jobs:
- if: matrix.mode.kafka
name: Setup kafka server
working-directory: tests-integration/fixtures
run: docker compose up -d --wait kafka
run: ../../.github/scripts/pull-test-deps-images.sh && docker compose up -d --wait kafka
- name: Download pre-built binaries
uses: actions/download-artifact@v4
with:
@@ -607,7 +607,7 @@ jobs:
fmt:
name: Rustfmt
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -624,7 +624,7 @@ jobs:
clippy:
name: Clippy
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -685,7 +685,8 @@ jobs:
uses: taiki-e/install-action@nextest
- name: Setup external services
working-directory: tests-integration/fixtures
run: docker compose up -d --wait
run: ../../.github/scripts/pull-test-deps-images.sh && docker compose up -d --wait
- name: Run nextest cases
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend
env:
@@ -710,7 +711,7 @@ jobs:
coverage:
if: github.event_name == 'merge_group'
runs-on: ubuntu-20.04-8-cores
runs-on: ubuntu-22.04-8-cores
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
@@ -737,7 +738,8 @@ jobs:
uses: taiki-e/install-action@cargo-llvm-cov
- name: Setup external services
working-directory: tests-integration/fixtures
run: docker compose up -d --wait
run: ../../.github/scripts/pull-test-deps-images.sh && docker compose up -d --wait
- name: Run nextest cases
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend
env:
@@ -770,7 +772,7 @@ jobs:
# compat:
# name: Compatibility Test
# needs: build
# runs-on: ubuntu-20.04
# runs-on: ubuntu-22.04
# timeout-minutes: 60
# steps:
# - uses: actions/checkout@v4

View File

@@ -3,13 +3,9 @@ on:
pull_request_target:
types: [opened, edited]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
docbot:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
permissions:
pull-requests: write
contents: read

View File

@@ -31,7 +31,7 @@ name: CI
jobs:
typos:
name: Spell Check with Typos
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
@@ -39,7 +39,7 @@ jobs:
- uses: crate-ci/typos@master
license-header-check:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
name: Check License Header
steps:
- uses: actions/checkout@v4
@@ -49,29 +49,29 @@ jobs:
check:
name: Check
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- run: 'echo "No action required"'
fmt:
name: Rustfmt
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- run: 'echo "No action required"'
clippy:
name: Clippy
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- run: 'echo "No action required"'
coverage:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- run: 'echo "No action required"'
test:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- run: 'echo "No action required"'
@@ -80,7 +80,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-20.04 ]
os: [ ubuntu-22.04 ]
mode:
- name: "Basic"
- name: "Remote WAL"

View File

@@ -14,11 +14,11 @@ on:
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-20.04
- ubuntu-20.04-8-cores
- ubuntu-20.04-16-cores
- ubuntu-20.04-32-cores
- ubuntu-20.04-64-cores
- ubuntu-22.04
- ubuntu-22.04-8-cores
- ubuntu-22.04-16-cores
- ubuntu-22.04-32-cores
- ubuntu-22.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
@@ -70,7 +70,7 @@ jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -182,7 +182,7 @@ jobs:
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
outputs:
nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
steps:
@@ -214,7 +214,7 @@ jobs:
allocate-runners,
release-images-to-dockerhub,
]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
# When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -249,7 +249,7 @@ jobs:
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
needs: [
allocate-runners,
build-linux-amd64-artifacts,
@@ -275,7 +275,7 @@ jobs:
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
needs: [
allocate-runners,
build-linux-arm64-artifacts,
@@ -303,7 +303,7 @@ jobs:
needs: [
release-images-to-dockerhub
]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
permissions:
issues: write
env:

View File

@@ -133,7 +133,7 @@ jobs:
name: Check status
needs: [sqlness-test, sqlness-windows, test-on-windows]
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
outputs:
check-result: ${{ steps.set-check-result.outputs.check-result }}
steps:
@@ -146,7 +146,7 @@ jobs:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' && always() }} # Not requiring successful dependent jobs, always run.
name: Send notification to Greptime team
needs: [check-status]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:

View File

@@ -29,7 +29,7 @@ jobs:
release-dev-builder-images:
name: Release dev builder images
if: ${{ inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }} # Only manually trigger this job.
runs-on: ubuntu-20.04-16-cores
runs-on: ubuntu-22.04-16-cores
outputs:
version: ${{ steps.set-version.outputs.version }}
steps:
@@ -63,7 +63,7 @@ jobs:
release-dev-builder-images-ecr:
name: Release dev builder images to AWS ECR
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
needs: [
release-dev-builder-images
]
@@ -148,7 +148,7 @@ jobs:
release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
name: Release dev builder images to CN region
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
needs: [
release-dev-builder-images
]

View File

@@ -18,11 +18,11 @@ on:
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-20.04
- ubuntu-20.04-8-cores
- ubuntu-20.04-16-cores
- ubuntu-20.04-32-cores
- ubuntu-20.04-64-cores
- ubuntu-22.04
- ubuntu-22.04-8-cores
- ubuntu-22.04-16-cores
- ubuntu-22.04-32-cores
- ubuntu-22.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
@@ -97,7 +97,7 @@ jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -335,7 +335,7 @@ jobs:
build-windows-artifacts,
release-images-to-dockerhub,
]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
# When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -377,7 +377,7 @@ jobs:
build-windows-artifacts,
release-images-to-dockerhub,
]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
@@ -396,7 +396,7 @@ jobs:
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
needs: [
allocate-runners,
build-linux-amd64-artifacts,
@@ -422,7 +422,7 @@ jobs:
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
needs: [
allocate-runners,
build-linux-arm64-artifacts,
@@ -448,7 +448,7 @@ jobs:
name: Bump doc version
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [allocate-runners]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:
issues: write # Allows the action to create issues for cyborg.
@@ -475,7 +475,7 @@ jobs:
build-macos-artifacts,
build-windows-artifacts,
]
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:
issues: write # Allows the action to create issues for cyborg.

View File

@@ -7,13 +7,9 @@ on:
- reopened
- edited
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
check:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
timeout-minutes: 10
steps:
- uses: actions/checkout@v4

View File

@@ -3,28 +3,30 @@
## Individual Committers (in alphabetical order)
* [CookiePieWw](https://github.com/CookiePieWw)
* [KKould](https://github.com/KKould)
* [NiwakaDev](https://github.com/NiwakaDev)
* [etolbakov](https://github.com/etolbakov)
* [irenjj](https://github.com/irenjj)
* [KKould](https://github.com/KKould)
* [Lanqing Yang](https://github.com/lyang24)
* [NiwakaDev](https://github.com/NiwakaDev)
* [tisonkun](https://github.com/tisonkun)
* [Lanqing Yang](https://github.com/lyang24)
## Team Members (in alphabetical order)
* [Breeze-P](https://github.com/Breeze-P)
* [GrepTime](https://github.com/GrepTime)
* [MichaelScofield](https://github.com/MichaelScofield)
* [Wenjie0329](https://github.com/Wenjie0329)
* [WenyXu](https://github.com/WenyXu)
* [ZonaHex](https://github.com/ZonaHex)
* [apdong2022](https://github.com/apdong2022)
* [beryl678](https://github.com/beryl678)
* [Breeze-P](https://github.com/Breeze-P)
* [daviderli614](https://github.com/daviderli614)
* [discord9](https://github.com/discord9)
* [evenyag](https://github.com/evenyag)
* [fengjiachun](https://github.com/fengjiachun)
* [fengys1996](https://github.com/fengys1996)
* [GrepTime](https://github.com/GrepTime)
* [holalengyu](https://github.com/holalengyu)
* [killme2008](https://github.com/killme2008)
* [MichaelScofield](https://github.com/MichaelScofield)
* [nicecui](https://github.com/nicecui)
* [paomian](https://github.com/paomian)
* [shuiyisong](https://github.com/shuiyisong)
@@ -32,14 +34,11 @@
* [sunng87](https://github.com/sunng87)
* [v0y4g3r](https://github.com/v0y4g3r)
* [waynexia](https://github.com/waynexia)
* [Wenjie0329](https://github.com/Wenjie0329)
* [WenyXu](https://github.com/WenyXu)
* [xtang](https://github.com/xtang)
* [zhaoyingnan01](https://github.com/zhaoyingnan01)
* [zhongzc](https://github.com/zhongzc)
* [ZonaHex](https://github.com/ZonaHex)
* [zyy17](https://github.com/zyy17)
## All Contributors
To see the full list of contributors, please visit our [Contributors page](https://github.com/GreptimeTeam/greptimedb/graphs/contributors)
[![All Contributors](https://contrib.rocks/image?repo=GreptimeTeam/greptimedb)](https://github.com/GreptimeTeam/greptimedb/graphs/contributors)

221
Cargo.lock generated
View File

@@ -185,7 +185,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
[[package]]
name = "api"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"common-base",
"common-decimal",
@@ -432,7 +432,7 @@ dependencies = [
"arrow-schema",
"chrono",
"half",
"indexmap 2.7.1",
"indexmap 2.6.0",
"lexical-core",
"num",
"serde",
@@ -710,7 +710,7 @@ dependencies = [
[[package]]
name = "auth"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"async-trait",
@@ -1324,7 +1324,7 @@ dependencies = [
[[package]]
name = "cache"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"catalog",
"common-error",
@@ -1348,7 +1348,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "catalog"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"arrow",
@@ -1475,7 +1475,7 @@ version = "0.13.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6026d8cd82ada8bbcfe337805dd1eb6afdc9e80fa4d57e977b3a36315e0c5525"
dependencies = [
"indexmap 2.7.1",
"indexmap 2.6.0",
"lazy_static",
"num-traits",
"regex",
@@ -1661,7 +1661,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "cli"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-trait",
"auth",
@@ -1703,7 +1703,7 @@ dependencies = [
"session",
"snafu 0.8.5",
"store-api",
"substrait 0.12.0",
"substrait 0.12.1",
"table",
"tempfile",
"tokio",
@@ -1712,7 +1712,7 @@ dependencies = [
[[package]]
name = "client"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"arc-swap",
@@ -1739,7 +1739,7 @@ dependencies = [
"rand",
"serde_json",
"snafu 0.8.5",
"substrait 0.12.0",
"substrait 0.12.1",
"substrait 0.37.3",
"tokio",
"tokio-stream",
@@ -1780,7 +1780,7 @@ dependencies = [
[[package]]
name = "cmd"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-trait",
"auth",
@@ -1841,7 +1841,7 @@ dependencies = [
"similar-asserts",
"snafu 0.8.5",
"store-api",
"substrait 0.12.0",
"substrait 0.12.1",
"table",
"temp-env",
"tempfile",
@@ -1887,7 +1887,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
[[package]]
name = "common-base"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"anymap2",
"async-trait",
@@ -1909,11 +1909,11 @@ dependencies = [
[[package]]
name = "common-catalog"
version = "0.12.0"
version = "0.12.1"
[[package]]
name = "common-config"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"common-base",
"common-error",
@@ -1938,7 +1938,7 @@ dependencies = [
[[package]]
name = "common-datasource"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"arrow",
"arrow-schema",
@@ -1974,7 +1974,7 @@ dependencies = [
[[package]]
name = "common-decimal"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"bigdecimal 0.4.5",
"common-error",
@@ -1987,7 +1987,7 @@ dependencies = [
[[package]]
name = "common-error"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"http 1.1.0",
"snafu 0.8.5",
@@ -1997,7 +1997,7 @@ dependencies = [
[[package]]
name = "common-frontend"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-trait",
"common-error",
@@ -2007,14 +2007,12 @@ dependencies = [
[[package]]
name = "common-function"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"ahash 0.8.11",
"api",
"approx 0.5.1",
"arc-swap",
"async-trait",
"bincode",
"common-base",
"common-catalog",
"common-error",
@@ -2032,7 +2030,6 @@ dependencies = [
"geo-types",
"geohash",
"h3o",
"hyperloglogplus",
"jsonb",
"nalgebra 0.33.2",
"num",
@@ -2049,13 +2046,12 @@ dependencies = [
"store-api",
"table",
"tokio",
"uddsketch",
"wkt",
]
[[package]]
name = "common-greptimedb-telemetry"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-trait",
"common-runtime",
@@ -2072,7 +2068,7 @@ dependencies = [
[[package]]
name = "common-grpc"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"arrow-flight",
@@ -2100,7 +2096,7 @@ dependencies = [
[[package]]
name = "common-grpc-expr"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"common-base",
@@ -2119,7 +2115,7 @@ dependencies = [
[[package]]
name = "common-macro"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"arc-swap",
"common-query",
@@ -2133,7 +2129,7 @@ dependencies = [
[[package]]
name = "common-mem-prof"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"common-error",
"common-macro",
@@ -2146,7 +2142,7 @@ dependencies = [
[[package]]
name = "common-meta"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"anymap2",
"api",
@@ -2206,7 +2202,7 @@ dependencies = [
[[package]]
name = "common-options"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"common-grpc",
"humantime-serde",
@@ -2215,11 +2211,11 @@ dependencies = [
[[package]]
name = "common-plugins"
version = "0.12.0"
version = "0.12.1"
[[package]]
name = "common-pprof"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"common-error",
"common-macro",
@@ -2231,7 +2227,7 @@ dependencies = [
[[package]]
name = "common-procedure"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-stream",
"async-trait",
@@ -2258,7 +2254,7 @@ dependencies = [
[[package]]
name = "common-procedure-test"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-trait",
"common-procedure",
@@ -2266,7 +2262,7 @@ dependencies = [
[[package]]
name = "common-query"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"async-trait",
@@ -2292,7 +2288,7 @@ dependencies = [
[[package]]
name = "common-recordbatch"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"arc-swap",
"common-error",
@@ -2311,7 +2307,7 @@ dependencies = [
[[package]]
name = "common-runtime"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-trait",
"clap 4.5.19",
@@ -2341,7 +2337,7 @@ dependencies = [
[[package]]
name = "common-telemetry"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"atty",
"backtrace",
@@ -2369,7 +2365,7 @@ dependencies = [
[[package]]
name = "common-test-util"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"client",
"common-query",
@@ -2381,7 +2377,7 @@ dependencies = [
[[package]]
name = "common-time"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"arrow",
"chrono",
@@ -2399,7 +2395,7 @@ dependencies = [
[[package]]
name = "common-version"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"build-data",
"const_format",
@@ -2409,7 +2405,7 @@ dependencies = [
[[package]]
name = "common-wal"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"common-base",
"common-error",
@@ -2976,7 +2972,7 @@ dependencies = [
"chrono",
"half",
"hashbrown 0.14.5",
"indexmap 2.7.1",
"indexmap 2.6.0",
"libc",
"object_store",
"parquet",
@@ -3036,7 +3032,7 @@ dependencies = [
"datafusion-functions-aggregate-common",
"datafusion-functions-window-common",
"datafusion-physical-expr-common",
"indexmap 2.7.1",
"indexmap 2.6.0",
"paste",
"recursive",
"serde_json",
@@ -3158,7 +3154,7 @@ dependencies = [
"datafusion-physical-expr-common",
"datafusion-physical-plan",
"half",
"indexmap 2.7.1",
"indexmap 2.6.0",
"log",
"parking_lot 0.12.3",
"paste",
@@ -3209,7 +3205,7 @@ dependencies = [
"datafusion-common",
"datafusion-expr",
"datafusion-physical-expr",
"indexmap 2.7.1",
"indexmap 2.6.0",
"itertools 0.13.0",
"log",
"recursive",
@@ -3234,7 +3230,7 @@ dependencies = [
"datafusion-physical-expr-common",
"half",
"hashbrown 0.14.5",
"indexmap 2.7.1",
"indexmap 2.6.0",
"itertools 0.13.0",
"log",
"paste",
@@ -3293,7 +3289,7 @@ dependencies = [
"futures",
"half",
"hashbrown 0.14.5",
"indexmap 2.7.1",
"indexmap 2.6.0",
"itertools 0.13.0",
"log",
"once_cell",
@@ -3313,7 +3309,7 @@ dependencies = [
"arrow-schema",
"datafusion-common",
"datafusion-expr",
"indexmap 2.7.1",
"indexmap 2.6.0",
"log",
"recursive",
"regex",
@@ -3340,7 +3336,7 @@ dependencies = [
[[package]]
name = "datanode"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"arrow-flight",
@@ -3392,7 +3388,7 @@ dependencies = [
"session",
"snafu 0.8.5",
"store-api",
"substrait 0.12.0",
"substrait 0.12.1",
"table",
"tokio",
"toml 0.8.19",
@@ -3401,7 +3397,7 @@ dependencies = [
[[package]]
name = "datatypes"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"arrow",
"arrow-array",
@@ -4045,7 +4041,7 @@ dependencies = [
[[package]]
name = "file-engine"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"async-trait",
@@ -4155,7 +4151,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
[[package]]
name = "flow"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"arrow",
@@ -4216,7 +4212,7 @@ dependencies = [
"snafu 0.8.5",
"store-api",
"strum 0.25.0",
"substrait 0.12.0",
"substrait 0.12.1",
"table",
"tokio",
"tonic 0.12.3",
@@ -4271,7 +4267,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
[[package]]
name = "frontend"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"arc-swap",
@@ -4699,7 +4695,7 @@ dependencies = [
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=072ce580502e015df1a6b03a185b60309a7c2a7a#072ce580502e015df1a6b03a185b60309a7c2a7a"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=a25adc8a01340231121646d8f0a29d0e92f45461#a25adc8a01340231121646d8f0a29d0e92f45461"
dependencies = [
"prost 0.13.3",
"serde",
@@ -4722,7 +4718,7 @@ dependencies = [
"futures-sink",
"futures-util",
"http 0.2.12",
"indexmap 2.7.1",
"indexmap 2.6.0",
"slab",
"tokio",
"tokio-util",
@@ -4741,7 +4737,7 @@ dependencies = [
"futures-core",
"futures-sink",
"http 1.1.0",
"indexmap 2.7.1",
"indexmap 2.6.0",
"slab",
"tokio",
"tokio-util",
@@ -5291,15 +5287,6 @@ dependencies = [
"tracing",
]
[[package]]
name = "hyperloglogplus"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3"
dependencies = [
"serde",
]
[[package]]
name = "i_float"
version = "1.3.1"
@@ -5539,7 +5526,7 @@ dependencies = [
[[package]]
name = "index"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-trait",
"asynchronous-codec",
@@ -5588,9 +5575,9 @@ dependencies = [
[[package]]
name = "indexmap"
version = "2.7.1"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
dependencies = [
"equivalent",
"hashbrown 0.15.2",
@@ -5604,7 +5591,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
dependencies = [
"ahash 0.8.11",
"indexmap 2.7.1",
"indexmap 2.6.0",
"is-terminal",
"itoa",
"log",
@@ -5951,7 +5938,7 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee7893dab2e44ae5f9d0173f26ff4aa327c10b01b06a72b52dd9405b628640d"
dependencies = [
"indexmap 2.7.1",
"indexmap 2.6.0",
]
[[package]]
@@ -6331,7 +6318,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "log-query"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"chrono",
"common-error",
@@ -6343,7 +6330,7 @@ dependencies = [
[[package]]
name = "log-store"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-stream",
"async-trait",
@@ -6434,7 +6421,7 @@ dependencies = [
"cactus",
"cfgrammar",
"filetime",
"indexmap 2.7.1",
"indexmap 2.6.0",
"lazy_static",
"lrtable",
"num-traits",
@@ -6636,7 +6623,7 @@ dependencies = [
[[package]]
name = "meta-client"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"async-trait",
@@ -6663,7 +6650,7 @@ dependencies = [
[[package]]
name = "meta-srv"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"async-trait",
@@ -6749,7 +6736,7 @@ dependencies = [
[[package]]
name = "metric-engine"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"aquamarine",
@@ -6847,7 +6834,7 @@ dependencies = [
[[package]]
name = "mito2"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"aquamarine",
@@ -7544,7 +7531,7 @@ dependencies = [
[[package]]
name = "object-store"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"anyhow",
"bytes",
@@ -7675,7 +7662,7 @@ checksum = "1e32339a5dc40459130b3bd269e9892439f55b33e772d2a9d402a789baaf4e8a"
dependencies = [
"futures-core",
"futures-sink",
"indexmap 2.7.1",
"indexmap 2.6.0",
"js-sys",
"once_cell",
"pin-project-lite",
@@ -7793,7 +7780,7 @@ dependencies = [
[[package]]
name = "operator"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"ahash 0.8.11",
"api",
@@ -7841,7 +7828,7 @@ dependencies = [
"sql",
"sqlparser 0.52.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=71dd86058d2af97b9925093d40c4e03360403170)",
"store-api",
"substrait 0.12.0",
"substrait 0.12.1",
"table",
"tokio",
"tokio-util",
@@ -8078,7 +8065,7 @@ dependencies = [
[[package]]
name = "partition"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"async-trait",
@@ -8247,7 +8234,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
dependencies = [
"fixedbitset",
"indexmap 2.7.1",
"indexmap 2.6.0",
]
[[package]]
@@ -8346,7 +8333,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pipeline"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"ahash 0.8.11",
"api",
@@ -8486,7 +8473,7 @@ dependencies = [
[[package]]
name = "plugins"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"auth",
"clap 4.5.19",
@@ -8748,7 +8735,7 @@ dependencies = [
[[package]]
name = "promql"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"ahash 0.8.11",
"async-trait",
@@ -8993,7 +8980,7 @@ dependencies = [
[[package]]
name = "puffin"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-compression 0.4.13",
"async-trait",
@@ -9034,7 +9021,7 @@ dependencies = [
[[package]]
name = "query"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"ahash 0.8.11",
"api",
@@ -9099,7 +9086,7 @@ dependencies = [
"sqlparser 0.52.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=71dd86058d2af97b9925093d40c4e03360403170)",
"statrs",
"store-api",
"substrait 0.12.0",
"substrait 0.12.1",
"table",
"tokio",
"tokio-stream",
@@ -10338,7 +10325,7 @@ version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "930cfb6e6abf99298aaad7d29abbef7a9999a9a8806a40088f55f0dcec03146b"
dependencies = [
"indexmap 2.7.1",
"indexmap 2.6.0",
"itoa",
"memchr",
"ryu",
@@ -10409,7 +10396,7 @@ dependencies = [
"chrono",
"hex",
"indexmap 1.9.3",
"indexmap 2.7.1",
"indexmap 2.6.0",
"serde",
"serde_derive",
"serde_json",
@@ -10435,7 +10422,7 @@ version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [
"indexmap 2.7.1",
"indexmap 2.6.0",
"itoa",
"ryu",
"serde",
@@ -10444,7 +10431,7 @@ dependencies = [
[[package]]
name = "servers"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"ahash 0.8.11",
"api",
@@ -10496,7 +10483,6 @@ dependencies = [
"humantime",
"humantime-serde",
"hyper 1.4.1",
"indexmap 2.7.1",
"influxdb_line_protocol",
"itertools 0.10.5",
"json5",
@@ -10561,7 +10547,7 @@ dependencies = [
[[package]]
name = "session"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"arc-swap",
@@ -10870,7 +10856,7 @@ dependencies = [
[[package]]
name = "sql"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"chrono",
@@ -10907,12 +10893,12 @@ dependencies = [
[[package]]
name = "sqlness"
version = "0.6.1"
source = "git+https://github.com/CeresDB/sqlness.git?rev=bb91f31ff58993e07ea89845791235138283a24c#bb91f31ff58993e07ea89845791235138283a24c"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308a7338f2211813d6e9da117e9b9b7aee5d072872d11a934002fd2bd4ab5276"
dependencies = [
"async-trait",
"derive_builder 0.11.2",
"duration-str",
"futures",
"minijinja",
"prettydiff",
"regex",
@@ -10924,7 +10910,7 @@ dependencies = [
[[package]]
name = "sqlness-runner"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-trait",
"clap 4.5.19",
@@ -10938,7 +10924,6 @@ dependencies = [
"hex",
"local-ip-address",
"mysql",
"num_cpus",
"reqwest",
"serde",
"serde_json",
@@ -11038,7 +11023,7 @@ dependencies = [
"futures-util",
"hashbrown 0.15.2",
"hashlink",
"indexmap 2.7.1",
"indexmap 2.6.0",
"log",
"memchr",
"once_cell",
@@ -11241,7 +11226,7 @@ dependencies = [
[[package]]
name = "store-api"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"aquamarine",
@@ -11371,7 +11356,7 @@ dependencies = [
[[package]]
name = "substrait"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"async-trait",
"bytes",
@@ -11552,7 +11537,7 @@ dependencies = [
[[package]]
name = "table"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"async-trait",
@@ -11803,7 +11788,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]]
name = "tests-fuzz"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"arbitrary",
"async-trait",
@@ -11847,7 +11832,7 @@ dependencies = [
[[package]]
name = "tests-integration"
version = "0.12.0"
version = "0.12.1"
dependencies = [
"api",
"arrow-flight",
@@ -11913,7 +11898,7 @@ dependencies = [
"sql",
"sqlx",
"store-api",
"substrait 0.12.0",
"substrait 0.12.1",
"table",
"tempfile",
"time",
@@ -12334,7 +12319,7 @@ version = "0.19.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
dependencies = [
"indexmap 2.7.1",
"indexmap 2.6.0",
"toml_datetime",
"winnow 0.5.40",
]
@@ -12345,7 +12330,7 @@ version = "0.22.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5"
dependencies = [
"indexmap 2.7.1",
"indexmap 2.6.0",
"serde",
"serde_spanned",
"toml_datetime",
@@ -12483,7 +12468,7 @@ dependencies = [
"futures-core",
"futures-util",
"hdrhistogram",
"indexmap 2.7.1",
"indexmap 2.6.0",
"pin-project-lite",
"slab",
"sync_wrapper 1.0.1",
@@ -12971,14 +12956,6 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]]
name = "uddsketch"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/timescaledb-toolkit.git?rev=84828fe8fb494a6a61412a3da96517fc80f7bb20#84828fe8fb494a6a61412a3da96517fc80f7bb20"
dependencies = [
"serde",
]
[[package]]
name = "unescaper"
version = "0.1.5"

View File

@@ -67,7 +67,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.12.0"
version = "0.12.1"
edition = "2021"
license = "Apache-2.0"
@@ -129,7 +129,7 @@ etcd-client = "0.14"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "072ce580502e015df1a6b03a185b60309a7c2a7a" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a25adc8a01340231121646d8f0a29d0e92f45461" }
hex = "0.4"
http = "1"
humantime = "2.1"

View File

@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashSet;
use std::sync::{Arc, Weak};
use arrow_schema::SchemaRef as ArrowSchemaRef;
@@ -244,14 +243,17 @@ impl InformationSchemaTablesBuilder {
// TODO(dennis): `region_stats` API is not stable in distributed cluster because of network issue etc.
// But we don't want the statements such as `show tables` fail,
// so using `unwrap_or_else` here instead of `?` operator.
let region_stats = information_extension
.region_stats()
.await
.map_err(|e| {
error!(e; "Failed to call region_stats");
e
})
.unwrap_or_else(|_| vec![]);
let region_stats = {
let mut x = information_extension
.region_stats()
.await
.unwrap_or_else(|e| {
error!(e; "Failed to find region stats in information_schema, fallback to all empty");
vec![]
});
x.sort_unstable_by_key(|x| x.id);
x
};
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None);
@@ -262,16 +264,16 @@ impl InformationSchemaTablesBuilder {
// TODO(dennis): make it working for metric engine
let table_region_stats =
if table_info.meta.engine == MITO_ENGINE || table_info.is_physical_table() {
let region_ids = table_info
table_info
.meta
.region_numbers
.iter()
.map(|n| RegionId::new(table_info.ident.table_id, *n))
.collect::<HashSet<_>>();
region_stats
.iter()
.filter(|stat| region_ids.contains(&stat.id))
.flat_map(|region_id| {
region_stats
.binary_search_by_key(&region_id, |x| x.id)
.map(|i| &region_stats[i])
})
.collect::<Vec<_>>()
} else {
vec![]

View File

@@ -16,6 +16,7 @@
mod client;
pub mod client_manager;
#[cfg(feature = "testing")]
mod database;
pub mod error;
pub mod flow;
@@ -33,6 +34,7 @@ pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
use snafu::OptionExt;
pub use self::client::Client;
#[cfg(feature = "testing")]
pub use self::database::Database;
pub use self::error::{Error, Result};
use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};

View File

@@ -32,7 +32,7 @@ use common_meta::key::TableMetadataManager;
use common_telemetry::info;
use common_telemetry::logging::TracingOptions;
use common_version::{short_version, version};
use flow::{FlownodeBuilder, FlownodeInstance, FrontendClient, FrontendInvoker};
use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
use meta_client::{MetaClientOptions, MetaClientType};
use servers::Mode;
use snafu::{OptionExt, ResultExt};
@@ -317,8 +317,6 @@ impl StartCommand {
Arc::new(executor),
);
let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
let flownode_builder = FlownodeBuilder::new(
opts,
@@ -326,7 +324,6 @@ impl StartCommand {
table_metadata_manager,
catalog_manager.clone(),
flow_metadata_manager,
Arc::new(frontend_client),
)
.with_heartbeat_task(heartbeat_task);

View File

@@ -54,10 +54,7 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::region_server::RegionServer;
use file_engine::config::EngineConfig as FileEngineConfig;
use flow::{
FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendClient,
FrontendInvoker,
};
use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
@@ -536,16 +533,12 @@ impl StartCommand {
flow: opts.flow.clone(),
..Default::default()
};
let fe_server_addr = fe_opts.grpc.bind_addr.clone();
let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
let flow_builder = FlownodeBuilder::new(
flownode_options,
plugins.clone(),
table_metadata_manager.clone(),
catalog_manager.clone(),
flow_metadata_manager.clone(),
Arc::new(frontend_client),
);
let flownode = Arc::new(
flow_builder

View File

@@ -12,11 +12,9 @@ default = ["geo"]
geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
[dependencies]
ahash = "0.8"
api.workspace = true
arc-swap = "1.0"
async-trait.workspace = true
bincode = "1.3"
common-base.workspace = true
common-catalog.workspace = true
common-error.workspace = true
@@ -34,7 +32,6 @@ geo = { version = "0.29", optional = true }
geo-types = { version = "0.7", optional = true }
geohash = { version = "0.13", optional = true }
h3o = { version = "0.6", optional = true }
hyperloglogplus = "0.4"
jsonb.workspace = true
nalgebra.workspace = true
num = "0.4"
@@ -50,7 +47,6 @@ sql.workspace = true
statrs = "0.16"
store-api.workspace = true
table.workspace = true
uddsketch = { git = "https://github.com/GreptimeTeam/timescaledb-toolkit.git", rev = "84828fe8fb494a6a61412a3da96517fc80f7bb20" }
wkt = { version = "0.11", optional = true }
[dev-dependencies]

View File

@@ -1,20 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod hll;
mod uddsketch_state;
pub(crate) use hll::HllStateType;
pub use hll::{HllState, HLL_MERGE_NAME, HLL_NAME};
pub use uddsketch_state::{UddSketchState, UDDSKETCH_STATE_NAME};

View File

@@ -1,319 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_query::prelude::*;
use common_telemetry::trace;
use datafusion::arrow::array::ArrayRef;
use datafusion::common::cast::{as_binary_array, as_string_array};
use datafusion::common::not_impl_err;
use datafusion::error::{DataFusionError, Result as DfResult};
use datafusion::logical_expr::function::AccumulatorArgs;
use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF};
use datafusion::prelude::create_udaf;
use datatypes::arrow::datatypes::DataType;
use hyperloglogplus::{HyperLogLog, HyperLogLogPlus};
use crate::utils::FixedRandomState;
pub const HLL_NAME: &str = "hll";
pub const HLL_MERGE_NAME: &str = "hll_merge";
const DEFAULT_PRECISION: u8 = 14;
pub(crate) type HllStateType = HyperLogLogPlus<String, FixedRandomState>;
pub struct HllState {
hll: HllStateType,
}
impl std::fmt::Debug for HllState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "HllState<Opaque>")
}
}
impl Default for HllState {
fn default() -> Self {
Self::new()
}
}
impl HllState {
pub fn new() -> Self {
Self {
// Safety: the DEFAULT_PRECISION is fixed and valid
hll: HllStateType::new(DEFAULT_PRECISION, FixedRandomState::new()).unwrap(),
}
}
/// Create a UDF for the `hll` function.
///
/// `hll` accepts a string column and aggregates the
/// values into a HyperLogLog state.
pub fn state_udf_impl() -> AggregateUDF {
create_udaf(
HLL_NAME,
vec![DataType::Utf8],
Arc::new(DataType::Binary),
Volatility::Immutable,
Arc::new(Self::create_accumulator),
Arc::new(vec![DataType::Binary]),
)
}
/// Create a UDF for the `hll_merge` function.
///
/// `hll_merge` accepts a binary column of states generated by `hll`
/// and merges them into a single state.
pub fn merge_udf_impl() -> AggregateUDF {
create_udaf(
HLL_MERGE_NAME,
vec![DataType::Binary],
Arc::new(DataType::Binary),
Volatility::Immutable,
Arc::new(Self::create_merge_accumulator),
Arc::new(vec![DataType::Binary]),
)
}
fn update(&mut self, value: &str) {
self.hll.insert(value);
}
fn merge(&mut self, raw: &[u8]) {
if let Ok(serialized) = bincode::deserialize::<HllStateType>(raw) {
if let Ok(()) = self.hll.merge(&serialized) {
return;
}
}
trace!("Warning: Failed to merge HyperLogLog from {:?}", raw);
}
fn create_accumulator(acc_args: AccumulatorArgs) -> DfResult<Box<dyn DfAccumulator>> {
let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
match data_type {
DataType::Utf8 => Ok(Box::new(HllState::new())),
other => not_impl_err!("{HLL_NAME} does not support data type: {other}"),
}
}
fn create_merge_accumulator(acc_args: AccumulatorArgs) -> DfResult<Box<dyn DfAccumulator>> {
let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
match data_type {
DataType::Binary => Ok(Box::new(HllState::new())),
other => not_impl_err!("{HLL_MERGE_NAME} does not support data type: {other}"),
}
}
}
impl DfAccumulator for HllState {
fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
let array = &values[0];
match array.data_type() {
DataType::Utf8 => {
let string_array = as_string_array(array)?;
for value in string_array.iter().flatten() {
self.update(value);
}
}
DataType::Binary => {
let binary_array = as_binary_array(array)?;
for v in binary_array.iter().flatten() {
self.merge(v);
}
}
_ => {
return not_impl_err!(
"HLL functions do not support data type: {}",
array.data_type()
)
}
}
Ok(())
}
fn evaluate(&mut self) -> DfResult<ScalarValue> {
Ok(ScalarValue::Binary(Some(
bincode::serialize(&self.hll).map_err(|e| {
DataFusionError::Internal(format!("Failed to serialize HyperLogLog: {}", e))
})?,
)))
}
fn size(&self) -> usize {
std::mem::size_of_val(&self.hll)
}
fn state(&mut self) -> DfResult<Vec<ScalarValue>> {
Ok(vec![ScalarValue::Binary(Some(
bincode::serialize(&self.hll).map_err(|e| {
DataFusionError::Internal(format!("Failed to serialize HyperLogLog: {}", e))
})?,
))])
}
fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
let array = &states[0];
let binary_array = as_binary_array(array)?;
for v in binary_array.iter().flatten() {
self.merge(v);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use datafusion::arrow::array::{BinaryArray, StringArray};
use super::*;
#[test]
fn test_hll_basic() {
let mut state = HllState::new();
state.update("1");
state.update("2");
state.update("3");
let result = state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
assert_eq!(hll.count().trunc() as u32, 3);
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_hll_roundtrip() {
let mut state = HllState::new();
state.update("1");
state.update("2");
// Serialize
let serialized = state.evaluate().unwrap();
// Create new state and merge the serialized data
let mut new_state = HllState::new();
if let ScalarValue::Binary(Some(bytes)) = &serialized {
new_state.merge(bytes);
// Verify the merged state matches original
let result = new_state.evaluate().unwrap();
if let ScalarValue::Binary(Some(new_bytes)) = result {
let mut original: HllStateType = bincode::deserialize(bytes).unwrap();
let mut merged: HllStateType = bincode::deserialize(&new_bytes).unwrap();
assert_eq!(original.count(), merged.count());
} else {
panic!("Expected binary scalar value");
}
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_hll_batch_update() {
let mut state = HllState::new();
// Test string values
let str_values = vec!["a", "b", "c", "d", "e", "f", "g", "h", "i"];
let str_array = Arc::new(StringArray::from(str_values)) as ArrayRef;
state.update_batch(&[str_array]).unwrap();
let result = state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
assert_eq!(hll.count().trunc() as u32, 9);
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_hll_merge_batch() {
let mut state1 = HllState::new();
state1.update("1");
let state1_binary = state1.evaluate().unwrap();
let mut state2 = HllState::new();
state2.update("2");
let state2_binary = state2.evaluate().unwrap();
let mut merged_state = HllState::new();
if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
(&state1_binary, &state2_binary)
{
let binary_array = Arc::new(BinaryArray::from(vec![
bytes1.as_slice(),
bytes2.as_slice(),
])) as ArrayRef;
merged_state.merge_batch(&[binary_array]).unwrap();
let result = merged_state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
assert_eq!(hll.count().trunc() as u32, 2);
} else {
panic!("Expected binary scalar value");
}
} else {
panic!("Expected binary scalar values");
}
}
#[test]
fn test_hll_merge_function() {
// Create two HLL states with different values
let mut state1 = HllState::new();
state1.update("1");
state1.update("2");
let state1_binary = state1.evaluate().unwrap();
let mut state2 = HllState::new();
state2.update("2");
state2.update("3");
let state2_binary = state2.evaluate().unwrap();
// Create a merge state and merge both states
let mut merge_state = HllState::new();
if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
(&state1_binary, &state2_binary)
{
let binary_array = Arc::new(BinaryArray::from(vec![
bytes1.as_slice(),
bytes2.as_slice(),
])) as ArrayRef;
merge_state.update_batch(&[binary_array]).unwrap();
let result = merge_state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
// Should have 3 unique values: "1", "2", "3"
assert_eq!(hll.count().trunc() as u32, 3);
} else {
panic!("Expected binary scalar value");
}
} else {
panic!("Expected binary scalar values");
}
}
}

View File

@@ -1,307 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_query::prelude::*;
use common_telemetry::trace;
use datafusion::common::cast::{as_binary_array, as_primitive_array};
use datafusion::common::not_impl_err;
use datafusion::error::{DataFusionError, Result as DfResult};
use datafusion::logical_expr::function::AccumulatorArgs;
use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF};
use datafusion::physical_plan::expressions::Literal;
use datafusion::prelude::create_udaf;
use datatypes::arrow::array::ArrayRef;
use datatypes::arrow::datatypes::{DataType, Float64Type};
use uddsketch::{SketchHashKey, UDDSketch};
pub const UDDSKETCH_STATE_NAME: &str = "uddsketch_state";
#[derive(Debug)]
pub struct UddSketchState {
uddsketch: UDDSketch,
}
impl UddSketchState {
pub fn new(bucket_size: u64, error_rate: f64) -> Self {
Self {
uddsketch: UDDSketch::new(bucket_size, error_rate),
}
}
pub fn udf_impl() -> AggregateUDF {
create_udaf(
UDDSKETCH_STATE_NAME,
vec![DataType::Int64, DataType::Float64, DataType::Float64],
Arc::new(DataType::Binary),
Volatility::Immutable,
Arc::new(|args| {
let (bucket_size, error_rate) = downcast_accumulator_args(args)?;
Ok(Box::new(UddSketchState::new(bucket_size, error_rate)))
}),
Arc::new(vec![DataType::Binary]),
)
}
fn update(&mut self, value: f64) {
self.uddsketch.add_value(value);
}
fn merge(&mut self, raw: &[u8]) {
if let Ok(uddsketch) = bincode::deserialize::<UDDSketch>(raw) {
if uddsketch.count() != 0 {
self.uddsketch.merge_sketch(&uddsketch);
}
} else {
trace!("Warning: Failed to deserialize UDDSketch from {:?}", raw);
}
}
}
fn downcast_accumulator_args(args: AccumulatorArgs) -> DfResult<(u64, f64)> {
let bucket_size = match args.exprs[0]
.as_any()
.downcast_ref::<Literal>()
.map(|lit| lit.value())
{
Some(ScalarValue::Int64(Some(value))) => *value as u64,
_ => {
return not_impl_err!(
"{} not supported for bucket size: {}",
UDDSKETCH_STATE_NAME,
&args.exprs[0]
)
}
};
let error_rate = match args.exprs[1]
.as_any()
.downcast_ref::<Literal>()
.map(|lit| lit.value())
{
Some(ScalarValue::Float64(Some(value))) => *value,
_ => {
return not_impl_err!(
"{} not supported for error rate: {}",
UDDSKETCH_STATE_NAME,
&args.exprs[1]
)
}
};
Ok((bucket_size, error_rate))
}
impl DfAccumulator for UddSketchState {
fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
let array = &values[2]; // the third column is data value
let f64_array = as_primitive_array::<Float64Type>(array)?;
for v in f64_array.iter().flatten() {
self.update(v);
}
Ok(())
}
fn evaluate(&mut self) -> DfResult<ScalarValue> {
Ok(ScalarValue::Binary(Some(
bincode::serialize(&self.uddsketch).map_err(|e| {
DataFusionError::Internal(format!("Failed to serialize UDDSketch: {}", e))
})?,
)))
}
fn size(&self) -> usize {
// Base size of UDDSketch struct fields
let mut total_size = std::mem::size_of::<f64>() * 3 + // alpha, gamma, values_sum
std::mem::size_of::<u32>() + // compactions
std::mem::size_of::<u64>() * 2; // max_buckets, num_values
// Size of buckets (SketchHashMap)
// Each bucket entry contains:
// - SketchHashKey (enum with i64/Zero/Invalid variants)
// - SketchHashEntry (count: u64, next: SketchHashKey)
let bucket_entry_size = std::mem::size_of::<SketchHashKey>() + // key
std::mem::size_of::<u64>() + // count
std::mem::size_of::<SketchHashKey>(); // next
total_size += self.uddsketch.current_buckets_count() * bucket_entry_size;
total_size
}
fn state(&mut self) -> DfResult<Vec<ScalarValue>> {
Ok(vec![ScalarValue::Binary(Some(
bincode::serialize(&self.uddsketch).map_err(|e| {
DataFusionError::Internal(format!("Failed to serialize UDDSketch: {}", e))
})?,
))])
}
fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
let array = &states[0];
let binary_array = as_binary_array(array)?;
for v in binary_array.iter().flatten() {
self.merge(v);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use datafusion::arrow::array::{BinaryArray, Float64Array};
use super::*;
#[test]
fn test_uddsketch_state_basic() {
let mut state = UddSketchState::new(10, 0.01);
state.update(1.0);
state.update(2.0);
state.update(3.0);
let result = state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
assert_eq!(deserialized.count(), 3);
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_uddsketch_state_roundtrip() {
let mut state = UddSketchState::new(10, 0.01);
state.update(1.0);
state.update(2.0);
// Serialize
let serialized = state.evaluate().unwrap();
// Create new state and merge the serialized data
let mut new_state = UddSketchState::new(10, 0.01);
if let ScalarValue::Binary(Some(bytes)) = &serialized {
new_state.merge(bytes);
// Verify the merged state matches original by comparing deserialized values
let original_sketch: UDDSketch = bincode::deserialize(bytes).unwrap();
let new_result = new_state.evaluate().unwrap();
if let ScalarValue::Binary(Some(new_bytes)) = new_result {
let new_sketch: UDDSketch = bincode::deserialize(&new_bytes).unwrap();
assert_eq!(original_sketch.count(), new_sketch.count());
assert_eq!(original_sketch.sum(), new_sketch.sum());
assert_eq!(original_sketch.mean(), new_sketch.mean());
assert_eq!(original_sketch.max_error(), new_sketch.max_error());
// Compare a few quantiles to ensure statistical equivalence
for q in [0.1, 0.5, 0.9].iter() {
assert!(
(original_sketch.estimate_quantile(*q) - new_sketch.estimate_quantile(*q))
.abs()
< 1e-10,
"Quantile {} mismatch: original={}, new={}",
q,
original_sketch.estimate_quantile(*q),
new_sketch.estimate_quantile(*q)
);
}
} else {
panic!("Expected binary scalar value");
}
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_uddsketch_state_batch_update() {
let mut state = UddSketchState::new(10, 0.01);
let values = vec![1.0f64, 2.0, 3.0];
let array = Arc::new(Float64Array::from(values)) as ArrayRef;
state
.update_batch(&[array.clone(), array.clone(), array])
.unwrap();
let result = state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
assert_eq!(deserialized.count(), 3);
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_uddsketch_state_merge_batch() {
let mut state1 = UddSketchState::new(10, 0.01);
state1.update(1.0);
let state1_binary = state1.evaluate().unwrap();
let mut state2 = UddSketchState::new(10, 0.01);
state2.update(2.0);
let state2_binary = state2.evaluate().unwrap();
let mut merged_state = UddSketchState::new(10, 0.01);
if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
(&state1_binary, &state2_binary)
{
let binary_array = Arc::new(BinaryArray::from(vec![
bytes1.as_slice(),
bytes2.as_slice(),
])) as ArrayRef;
merged_state.merge_batch(&[binary_array]).unwrap();
let result = merged_state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
assert_eq!(deserialized.count(), 2);
} else {
panic!("Expected binary scalar value");
}
} else {
panic!("Expected binary scalar values");
}
}
#[test]
fn test_uddsketch_state_size() {
let mut state = UddSketchState::new(10, 0.01);
let initial_size = state.size();
// Add some values to create buckets
state.update(1.0);
state.update(2.0);
state.update(3.0);
let size_with_values = state.size();
assert!(
size_with_values > initial_size,
"Size should increase after adding values: initial={}, with_values={}",
initial_size,
size_with_values
);
// Verify size increases with more buckets
state.update(10.0); // This should create a new bucket
assert!(
state.size() > size_with_values,
"Size should increase after adding new bucket: prev={}, new={}",
size_with_values,
state.size()
);
}
}

View File

@@ -22,12 +22,10 @@ use crate::function::{AsyncFunctionRef, FunctionRef};
use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
use crate::scalars::date::DateFunction;
use crate::scalars::expression::ExpressionFunction;
use crate::scalars::hll_count::HllCalcFunction;
use crate::scalars::json::JsonFunction;
use crate::scalars::matches::MatchesFunction;
use crate::scalars::math::MathFunction;
use crate::scalars::timestamp::TimestampFunction;
use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
use crate::scalars::vector::VectorFunction;
use crate::system::SystemFunction;
use crate::table::TableFunction;
@@ -107,8 +105,6 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
TimestampFunction::register(&function_registry);
DateFunction::register(&function_registry);
ExpressionFunction::register(&function_registry);
UddSketchCalcFunction::register(&function_registry);
HllCalcFunction::register(&function_registry);
// Aggregate functions
AggregateFunctions::register(&function_registry);

View File

@@ -21,7 +21,6 @@ pub mod scalars;
mod system;
mod table;
pub mod aggr;
pub mod function;
pub mod function_registry;
pub mod handlers;

View File

@@ -22,9 +22,7 @@ pub mod matches;
pub mod math;
pub mod vector;
pub(crate) mod hll_count;
#[cfg(test)]
pub(crate) mod test;
pub(crate) mod timestamp;
pub(crate) mod uddsketch_calc;
pub mod udf;

View File

@@ -1,175 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Implementation of the scalar function `hll_count`.
use std::fmt;
use std::fmt::Display;
use std::sync::Arc;
use common_query::error::{DowncastVectorSnafu, InvalidFuncArgsSnafu, Result};
use common_query::prelude::{Signature, Volatility};
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::Vector;
use datatypes::scalars::{ScalarVector, ScalarVectorBuilder};
use datatypes::vectors::{BinaryVector, MutableVector, UInt64VectorBuilder, VectorRef};
use hyperloglogplus::HyperLogLog;
use snafu::OptionExt;
use crate::aggr::HllStateType;
use crate::function::{Function, FunctionContext};
use crate::function_registry::FunctionRegistry;
const NAME: &str = "hll_count";
/// HllCalcFunction implements the scalar function `hll_count`.
///
/// It accepts one argument:
/// 1. The serialized HyperLogLogPlus state, as produced by the aggregator (binary).
///
/// For each row, it deserializes the sketch and returns the estimated cardinality.
#[derive(Debug, Default)]
pub struct HllCalcFunction;
impl HllCalcFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(HllCalcFunction));
}
}
impl Display for HllCalcFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
impl Function for HllCalcFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint64_datatype())
}
fn signature(&self) -> Signature {
// Only argument: HyperLogLogPlus state (binary)
Signature::exact(
vec![ConcreteDataType::binary_datatype()],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
if columns.len() != 1 {
return InvalidFuncArgsSnafu {
err_msg: format!("hll_count expects 1 argument, got {}", columns.len()),
}
.fail();
}
let hll_vec = columns[0]
.as_any()
.downcast_ref::<BinaryVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!("expect BinaryVector, got {}", columns[0].vector_type_name()),
})?;
let len = hll_vec.len();
let mut builder = UInt64VectorBuilder::with_capacity(len);
for i in 0..len {
let hll_opt = hll_vec.get_data(i);
if hll_opt.is_none() {
builder.push_null();
continue;
}
let hll_bytes = hll_opt.unwrap();
// Deserialize the HyperLogLogPlus from its bincode representation
let mut hll: HllStateType = match bincode::deserialize(hll_bytes) {
Ok(h) => h,
Err(e) => {
common_telemetry::trace!("Failed to deserialize HyperLogLogPlus: {}", e);
builder.push_null();
continue;
}
};
builder.push(Some(hll.count().round() as u64));
}
Ok(builder.to_vector())
}
}
#[cfg(test)]
mod tests {
use datatypes::vectors::BinaryVector;
use super::*;
use crate::utils::FixedRandomState;
#[test]
fn test_hll_count_function() {
let function = HllCalcFunction;
assert_eq!("hll_count", function.name());
assert_eq!(
ConcreteDataType::uint64_datatype(),
function
.return_type(&[ConcreteDataType::uint64_datatype()])
.unwrap()
);
// Create a test HLL
let mut hll = HllStateType::new(14, FixedRandomState::new()).unwrap();
for i in 1..=10 {
hll.insert(&i.to_string());
}
let serialized_bytes = bincode::serialize(&hll).unwrap();
let args: Vec<VectorRef> = vec![Arc::new(BinaryVector::from(vec![Some(serialized_bytes)]))];
let result = function.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(result.len(), 1);
// Test cardinality estimate
if let datatypes::value::Value::UInt64(v) = result.get(0) {
assert_eq!(v, 10);
} else {
panic!("Expected uint64 value");
}
}
#[test]
fn test_hll_count_function_errors() {
let function = HllCalcFunction;
// Test with invalid number of arguments
let args: Vec<VectorRef> = vec![];
let result = function.eval(FunctionContext::default(), &args);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("hll_count expects 1 argument"));
// Test with invalid binary data
let args: Vec<VectorRef> = vec![Arc::new(BinaryVector::from(vec![Some(vec![1, 2, 3])]))]; // Invalid binary data
let result = function.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(result.len(), 1);
assert!(matches!(result.get(0), datatypes::value::Value::Null));
}
}

View File

@@ -1,211 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Implementation of the scalar function `uddsketch_calc`.
use std::fmt;
use std::fmt::Display;
use std::sync::Arc;
use common_query::error::{DowncastVectorSnafu, InvalidFuncArgsSnafu, Result};
use common_query::prelude::{Signature, Volatility};
use datatypes::data_type::ConcreteDataType;
use datatypes::prelude::Vector;
use datatypes::scalars::{ScalarVector, ScalarVectorBuilder};
use datatypes::vectors::{BinaryVector, Float64VectorBuilder, MutableVector, VectorRef};
use snafu::OptionExt;
use uddsketch::UDDSketch;
use crate::function::{Function, FunctionContext};
use crate::function_registry::FunctionRegistry;
const NAME: &str = "uddsketch_calc";
/// UddSketchCalcFunction implements the scalar function `uddsketch_calc`.
///
/// It accepts two arguments:
/// 1. A percentile (as f64) for which to compute the estimated quantile (e.g. 0.95 for p95).
/// 2. The serialized UDDSketch state, as produced by the aggregator (binary).
///
/// For each row, it deserializes the sketch and returns the computed quantile value.
#[derive(Debug, Default)]
pub struct UddSketchCalcFunction;
impl UddSketchCalcFunction {
pub fn register(registry: &FunctionRegistry) {
registry.register(Arc::new(UddSketchCalcFunction));
}
}
impl Display for UddSketchCalcFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", NAME.to_ascii_uppercase())
}
}
impl Function for UddSketchCalcFunction {
fn name(&self) -> &str {
NAME
}
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::float64_datatype())
}
fn signature(&self) -> Signature {
// First argument: percentile (float64)
// Second argument: UDDSketch state (binary)
Signature::exact(
vec![
ConcreteDataType::float64_datatype(),
ConcreteDataType::binary_datatype(),
],
Volatility::Immutable,
)
}
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
if columns.len() != 2 {
return InvalidFuncArgsSnafu {
err_msg: format!("uddsketch_calc expects 2 arguments, got {}", columns.len()),
}
.fail();
}
let perc_vec = &columns[0];
let sketch_vec = columns[1]
.as_any()
.downcast_ref::<BinaryVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!("expect BinaryVector, got {}", columns[1].vector_type_name()),
})?;
let len = sketch_vec.len();
let mut builder = Float64VectorBuilder::with_capacity(len);
for i in 0..len {
let perc_opt = perc_vec.get(i).as_f64_lossy();
let sketch_opt = sketch_vec.get_data(i);
if sketch_opt.is_none() || perc_opt.is_none() {
builder.push_null();
continue;
}
let sketch_bytes = sketch_opt.unwrap();
let perc = perc_opt.unwrap();
// Deserialize the UDDSketch from its bincode representation
let sketch: UDDSketch = match bincode::deserialize(sketch_bytes) {
Ok(s) => s,
Err(e) => {
common_telemetry::trace!("Failed to deserialize UDDSketch: {}", e);
builder.push_null();
continue;
}
};
// Compute the estimated quantile from the sketch
let result = sketch.estimate_quantile(perc);
builder.push(Some(result));
}
Ok(builder.to_vector())
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use datatypes::vectors::{BinaryVector, Float64Vector};
use super::*;
#[test]
fn test_uddsketch_calc_function() {
let function = UddSketchCalcFunction;
assert_eq!("uddsketch_calc", function.name());
assert_eq!(
ConcreteDataType::float64_datatype(),
function
.return_type(&[ConcreteDataType::float64_datatype()])
.unwrap()
);
// Create a test sketch
let mut sketch = UDDSketch::new(128, 0.01);
sketch.add_value(10.0);
sketch.add_value(20.0);
sketch.add_value(30.0);
sketch.add_value(40.0);
sketch.add_value(50.0);
sketch.add_value(60.0);
sketch.add_value(70.0);
sketch.add_value(80.0);
sketch.add_value(90.0);
sketch.add_value(100.0);
// Get expected values directly from the sketch
let expected_p50 = sketch.estimate_quantile(0.5);
let expected_p90 = sketch.estimate_quantile(0.9);
let expected_p95 = sketch.estimate_quantile(0.95);
let serialized = bincode::serialize(&sketch).unwrap();
let percentiles = vec![0.5, 0.9, 0.95];
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(percentiles.clone())),
Arc::new(BinaryVector::from(vec![Some(serialized.clone()); 3])),
];
let result = function.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(result.len(), 3);
// Test median (p50)
assert!(
matches!(result.get(0), datatypes::value::Value::Float64(v) if (v - expected_p50).abs() < 1e-10)
);
// Test p90
assert!(
matches!(result.get(1), datatypes::value::Value::Float64(v) if (v - expected_p90).abs() < 1e-10)
);
// Test p95
assert!(
matches!(result.get(2), datatypes::value::Value::Float64(v) if (v - expected_p95).abs() < 1e-10)
);
}
#[test]
fn test_uddsketch_calc_function_errors() {
let function = UddSketchCalcFunction;
// Test with invalid number of arguments
let args: Vec<VectorRef> = vec![Arc::new(Float64Vector::from_vec(vec![0.95]))];
let result = function.eval(FunctionContext::default(), &args);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("uddsketch_calc expects 2 arguments"));
// Test with invalid binary data
let args: Vec<VectorRef> = vec![
Arc::new(Float64Vector::from_vec(vec![0.95])),
Arc::new(BinaryVector::from(vec![Some(vec![1, 2, 3])])), // Invalid binary data
];
let result = function.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(result.len(), 1);
assert!(matches!(result.get(0), datatypes::value::Value::Null));
}
}

View File

@@ -12,11 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::hash::BuildHasher;
use ahash::RandomState;
use serde::{Deserialize, Serialize};
/// Escapes special characters in the provided pattern string for `LIKE`.
///
/// Specifically, it prefixes the backslash (`\`), percent (`%`), and underscore (`_`)
@@ -37,71 +32,6 @@ pub fn escape_like_pattern(pattern: &str) -> String {
})
.collect::<String>()
}
/// A random state with fixed seeds.
///
/// This is used to ensure that the hash values are consistent across
/// different processes, and easy to serialize and deserialize.
#[derive(Debug)]
pub struct FixedRandomState {
state: RandomState,
}
impl FixedRandomState {
// some random seeds
const RANDOM_SEED_0: u64 = 0x517cc1b727220a95;
const RANDOM_SEED_1: u64 = 0x428a2f98d728ae22;
const RANDOM_SEED_2: u64 = 0x7137449123ef65cd;
const RANDOM_SEED_3: u64 = 0xb5c0fbcfec4d3b2f;
pub fn new() -> Self {
Self {
state: ahash::RandomState::with_seeds(
Self::RANDOM_SEED_0,
Self::RANDOM_SEED_1,
Self::RANDOM_SEED_2,
Self::RANDOM_SEED_3,
),
}
}
}
impl Default for FixedRandomState {
fn default() -> Self {
Self::new()
}
}
impl BuildHasher for FixedRandomState {
type Hasher = ahash::AHasher;
fn build_hasher(&self) -> Self::Hasher {
self.state.build_hasher()
}
fn hash_one<T: std::hash::Hash>(&self, x: T) -> u64 {
self.state.hash_one(x)
}
}
impl Serialize for FixedRandomState {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_unit()
}
}
impl<'de> Deserialize<'de> for FixedRandomState {
fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
Ok(Self::new())
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -16,6 +16,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use futures::future::BoxFuture;
use futures::TryStreamExt;
use moka::future::Cache;
use moka::ops::compute::Op;
use table::metadata::TableId;
@@ -53,13 +54,9 @@ fn init_factory(table_flow_manager: TableFlowManagerRef) -> Initializer<TableId,
Box::pin(async move {
table_flow_manager
.flows(table_id)
.map_ok(|(key, value)| (key.flownode_id(), value.peer))
.try_collect::<HashMap<_, _>>()
.await
.map(|flows| {
flows
.into_iter()
.map(|(key, value)| (key.flownode_id(), value.peer))
.collect::<HashMap<_, _>>()
})
// We must cache the `HashSet` even if it's empty,
// to avoid future requests to the remote storage next time;
// If the value is added to the remote storage,

View File

@@ -15,7 +15,6 @@
mod metadata;
use std::collections::BTreeMap;
use std::fmt;
use api::v1::flow::flow_request::Body as PbFlowRequest;
use api::v1::flow::{CreateRequest, FlowRequest, FlowRequestHeader};
@@ -29,6 +28,7 @@ use common_procedure::{
use common_telemetry::info;
use common_telemetry::tracing_context::TracingContext;
use futures::future::join_all;
use futures::TryStreamExt;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use snafu::{ensure, ResultExt};
@@ -77,7 +77,6 @@ impl CreateFlowProcedure {
query_context,
state: CreateFlowState::Prepare,
prev_flow_info_value: None,
flow_type: None,
},
}
}
@@ -105,7 +104,7 @@ impl CreateFlowProcedure {
if create_if_not_exists && or_replace {
// this is forbidden because not clear what does that mean exactly
return error::UnsupportedSnafu {
operation: "Create flow with both `IF NOT EXISTS` and `OR REPLACE`",
operation: "Create flow with both `IF NOT EXISTS` and `OR REPLACE`".to_string(),
}
.fail();
}
@@ -130,10 +129,9 @@ impl CreateFlowProcedure {
.flow_metadata_manager
.flow_route_manager()
.routes(flow_id)
.await?
.into_iter()
.map(|(_, value)| value.peer)
.collect::<Vec<_>>();
.map_ok(|(_, value)| value.peer)
.try_collect::<Vec<_>>()
.await?;
self.data.flow_id = Some(flow_id);
self.data.peers = peers;
info!("Replacing flow, flow_id: {}", flow_id);
@@ -177,8 +175,6 @@ impl CreateFlowProcedure {
self.allocate_flow_id().await?;
}
self.data.state = CreateFlowState::CreateFlows;
// determine flow type
self.data.flow_type = Some(determine_flow_type(&self.data.task));
Ok(Status::executing(true))
}
@@ -313,11 +309,6 @@ impl Procedure for CreateFlowProcedure {
}
}
pub fn determine_flow_type(_flow_task: &CreateFlowTask) -> FlowType {
// TODO(discord9): determine flow type
FlowType::RecordingRule
}
/// The state of [CreateFlowProcedure].
#[derive(Debug, Clone, Serialize, Deserialize, AsRefStr, PartialEq)]
pub enum CreateFlowState {
@@ -331,36 +322,6 @@ pub enum CreateFlowState {
CreateMetadata,
}
/// The type of flow.
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum FlowType {
/// The flow is a recording rule task.
RecordingRule,
/// The flow is a streaming task.
Streaming,
}
impl FlowType {
pub const RECORDING_RULE: &str = "recording_rule";
pub const STREAMING: &str = "streaming";
pub const FLOW_TYPE_KEY: &str = "flow_type";
}
impl Default for FlowType {
fn default() -> Self {
Self::RecordingRule
}
}
impl fmt::Display for FlowType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
FlowType::RecordingRule => write!(f, "{}", FlowType::RECORDING_RULE),
FlowType::Streaming => write!(f, "{}", FlowType::STREAMING),
}
}
}
/// The serializable data.
#[derive(Debug, Serialize, Deserialize)]
pub struct CreateFlowData {
@@ -374,7 +335,6 @@ pub struct CreateFlowData {
/// For verify if prev value is consistent when need to update flow metadata.
/// only set when `or_replace` is true.
pub(crate) prev_flow_info_value: Option<DeserializedValueWithBytes<FlowInfoValue>>,
pub(crate) flow_type: Option<FlowType>,
}
impl From<&CreateFlowData> for CreateRequest {
@@ -382,7 +342,7 @@ impl From<&CreateFlowData> for CreateRequest {
let flow_id = value.flow_id.unwrap();
let source_table_ids = &value.source_table_ids;
let mut req = CreateRequest {
CreateRequest {
flow_id: Some(api::v1::FlowId { id: flow_id }),
source_table_ids: source_table_ids
.iter()
@@ -396,12 +356,7 @@ impl From<&CreateFlowData> for CreateRequest {
comment: value.task.comment.clone(),
sql: value.task.sql.clone(),
flow_options: value.task.flow_options.clone(),
};
let flow_type = value.flow_type.unwrap_or_default().to_string();
req.flow_options
.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
req
}
}
}
@@ -414,7 +369,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
expire_after,
comment,
sql,
flow_options: mut options,
flow_options: options,
..
} = value.task.clone();
@@ -431,21 +386,19 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
.map(|(idx, peer)| (idx as u32, FlowRouteValue { peer: peer.clone() }))
.collect::<Vec<_>>();
let flow_type = value.flow_type.unwrap_or_default().to_string();
options.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
let flow_info = FlowInfoValue {
source_table_ids: value.source_table_ids.clone(),
sink_table_name,
flownode_ids,
catalog_name,
flow_name,
raw_sql: sql,
expire_after,
comment,
options,
};
(flow_info, flow_routes)
(
FlowInfoValue {
source_table_ids: value.source_table_ids.clone(),
sink_table_name,
flownode_ids,
catalog_name,
flow_name,
raw_sql: sql,
expire_after,
comment,
options,
},
flow_routes,
)
}
}

View File

@@ -128,7 +128,7 @@ impl State for DropDatabaseExecutor {
.await?;
executor.invalidate_table_cache(ddl_ctx).await?;
executor
.on_drop_regions(ddl_ctx, &self.physical_region_routes, true)
.on_drop_regions(ddl_ctx, &self.physical_region_routes)
.await?;
info!("Table: {}({}) is dropped", self.table_name, self.table_id);

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use common_catalog::format_full_flow_name;
use futures::TryStreamExt;
use snafu::{ensure, OptionExt};
use crate::ddl::drop_flow::DropFlowProcedure;
@@ -38,10 +39,9 @@ impl DropFlowProcedure {
.flow_metadata_manager
.flow_route_manager()
.routes(self.data.task.flow_id)
.await?
.into_iter()
.map(|(_, value)| value)
.collect::<Vec<_>>();
.map_ok(|(_, value)| value)
.try_collect::<Vec<_>>()
.await?;
ensure!(
!flow_route_values.is_empty(),
error::FlowRouteNotFoundSnafu {

View File

@@ -156,7 +156,7 @@ impl DropTableProcedure {
pub async fn on_datanode_drop_regions(&mut self) -> Result<Status> {
self.executor
.on_drop_regions(&self.context, &self.data.physical_region_routes, false)
.on_drop_regions(&self.context, &self.data.physical_region_routes)
.await?;
self.data.state = DropTableState::DeleteTombstone;
Ok(Status::executing(true))

View File

@@ -214,7 +214,6 @@ impl DropTableExecutor {
&self,
ctx: &DdlContext,
region_routes: &[RegionRoute],
fast_path: bool,
) -> Result<()> {
let leaders = find_leaders(region_routes);
let mut drop_region_tasks = Vec::with_capacity(leaders.len());
@@ -237,7 +236,6 @@ impl DropTableExecutor {
}),
body: Some(region_request::Body::Drop(PbDropRegionRequest {
region_id: region_id.as_u64(),
fast_path,
})),
};
let datanode = datanode.clone();

View File

@@ -16,9 +16,9 @@ pub mod flow_info;
pub(crate) mod flow_name;
pub(crate) mod flow_route;
pub mod flow_state;
mod flownode_addr_helper;
pub(crate) mod flownode_flow;
pub(crate) mod table_flow;
use std::ops::Deref;
use std::sync::Arc;
@@ -506,6 +506,7 @@ mod tests {
let routes = flow_metadata_manager
.flow_route_manager()
.routes(flow_id)
.try_collect::<Vec<_>>()
.await
.unwrap();
assert_eq!(
@@ -537,6 +538,7 @@ mod tests {
let nodes = flow_metadata_manager
.table_flow_manager()
.flows(table_id)
.try_collect::<Vec<_>>()
.await
.unwrap();
assert_eq!(
@@ -725,6 +727,7 @@ mod tests {
let routes = flow_metadata_manager
.flow_route_manager()
.routes(flow_id)
.try_collect::<Vec<_>>()
.await
.unwrap();
assert_eq!(
@@ -756,6 +759,7 @@ mod tests {
let nodes = flow_metadata_manager
.table_flow_manager()
.flows(table_id)
.try_collect::<Vec<_>>()
.await
.unwrap();
assert_eq!(

View File

@@ -12,15 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use futures::TryStreamExt;
use futures::stream::BoxStream;
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize};
use snafu::OptionExt;
use crate::error::{self, Result};
use crate::key::flow::{flownode_addr_helper, FlowScoped};
use crate::key::node_address::NodeAddressKey;
use crate::key::flow::FlowScoped;
use crate::key::{BytesAdapter, FlowId, FlowPartitionId, MetadataKey, MetadataValue};
use crate::kv_backend::txn::{Txn, TxnOp};
use crate::kv_backend::KvBackendRef;
@@ -168,7 +167,10 @@ impl FlowRouteManager {
}
/// Retrieves all [FlowRouteValue]s of the specified `flow_id`.
pub async fn routes(&self, flow_id: FlowId) -> Result<Vec<(FlowRouteKey, FlowRouteValue)>> {
pub fn routes(
&self,
flow_id: FlowId,
) -> BoxStream<'static, Result<(FlowRouteKey, FlowRouteValue)>> {
let start_key = FlowRouteKey::range_start_key(flow_id);
let req = RangeRequest::new().with_prefix(start_key);
let stream = PaginationStream::new(
@@ -179,9 +181,7 @@ impl FlowRouteManager {
)
.into_stream();
let mut res = stream.try_collect::<Vec<_>>().await?;
self.remap_flow_route_addresses(&mut res).await?;
Ok(res)
Box::pin(stream)
}
/// Builds a create flow routes transaction.
@@ -203,28 +203,6 @@ impl FlowRouteManager {
Ok(Txn::new().and_then(txns))
}
async fn remap_flow_route_addresses(
&self,
flow_routes: &mut [(FlowRouteKey, FlowRouteValue)],
) -> Result<()> {
let keys = flow_routes
.iter()
.map(|(_, value)| NodeAddressKey::with_flownode(value.peer.id))
.collect();
let flow_node_addrs =
flownode_addr_helper::get_flownode_addresses(&self.kv_backend, keys).await?;
for (_, flow_route_value) in flow_routes.iter_mut() {
let flownode_id = flow_route_value.peer.id;
// If an id lacks a corresponding address in the `flow_node_addrs`,
// it means the old address in `table_flow_value` is still valid,
// which is expected.
if let Some(node_addr) = flow_node_addrs.get(&flownode_id) {
flow_route_value.peer.addr = node_addr.peer.addr.clone();
}
}
Ok(())
}
}
#[cfg(test)]

View File

@@ -1,47 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use crate::error::Result;
use crate::key::node_address::{NodeAddressKey, NodeAddressValue};
use crate::key::{MetadataKey, MetadataValue};
use crate::kv_backend::KvBackendRef;
use crate::rpc::store::BatchGetRequest;
/// Get the addresses of the flownodes.
/// The result is a map: node_id -> NodeAddressValue
pub(crate) async fn get_flownode_addresses(
kv_backend: &KvBackendRef,
keys: Vec<NodeAddressKey>,
) -> Result<HashMap<u64, NodeAddressValue>> {
if keys.is_empty() {
return Ok(HashMap::default());
}
let req = BatchGetRequest {
keys: keys.into_iter().map(|k| k.to_bytes()).collect(),
};
kv_backend
.batch_get(req)
.await?
.kvs
.into_iter()
.map(|kv| {
let key = NodeAddressKey::from_bytes(&kv.key)?;
let value = NodeAddressValue::try_from_raw_value(&kv.value)?;
Ok((key.node_id, value))
})
.collect()
}

View File

@@ -14,7 +14,7 @@
use std::sync::Arc;
use futures::TryStreamExt;
use futures::stream::BoxStream;
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize};
@@ -22,8 +22,7 @@ use snafu::OptionExt;
use table::metadata::TableId;
use crate::error::{self, Result};
use crate::key::flow::{flownode_addr_helper, FlowScoped};
use crate::key::node_address::NodeAddressKey;
use crate::key::flow::FlowScoped;
use crate::key::{BytesAdapter, FlowId, FlowPartitionId, MetadataKey, MetadataValue};
use crate::kv_backend::txn::{Txn, TxnOp};
use crate::kv_backend::KvBackendRef;
@@ -197,7 +196,10 @@ impl TableFlowManager {
/// Retrieves all [TableFlowKey]s of the specified `table_id`.
///
/// TODO(discord9): add cache for it since range request does not support cache.
pub async fn flows(&self, table_id: TableId) -> Result<Vec<(TableFlowKey, TableFlowValue)>> {
pub fn flows(
&self,
table_id: TableId,
) -> BoxStream<'static, Result<(TableFlowKey, TableFlowValue)>> {
let start_key = TableFlowKey::range_start_key(table_id);
let req = RangeRequest::new().with_prefix(start_key);
let stream = PaginationStream::new(
@@ -208,9 +210,7 @@ impl TableFlowManager {
)
.into_stream();
let mut res = stream.try_collect::<Vec<_>>().await?;
self.remap_table_flow_addresses(&mut res).await?;
Ok(res)
Box::pin(stream)
}
/// Builds a create table flow transaction.
@@ -238,28 +238,6 @@ impl TableFlowManager {
Ok(Txn::new().and_then(txns))
}
async fn remap_table_flow_addresses(
&self,
table_flows: &mut [(TableFlowKey, TableFlowValue)],
) -> Result<()> {
let keys = table_flows
.iter()
.map(|(_, value)| NodeAddressKey::with_flownode(value.peer.id))
.collect::<Vec<_>>();
let flownode_addrs =
flownode_addr_helper::get_flownode_addresses(&self.kv_backend, keys).await?;
for (_, table_flow_value) in table_flows.iter_mut() {
let flownode_id = table_flow_value.peer.id;
// If an id lacks a corresponding address in the `flow_node_addrs`,
// it means the old address in `table_flow_value` is still valid,
// which is expected.
if let Some(flownode_addr) = flownode_addrs.get(&flownode_id) {
table_flow_value.peer.addr = flownode_addr.peer.addr.clone();
}
}
Ok(())
}
}
#[cfg(test)]

View File

@@ -39,10 +39,6 @@ impl NodeAddressKey {
pub fn with_datanode(node_id: u64) -> Self {
Self::new(Role::Datanode, node_id)
}
pub fn with_flownode(node_id: u64) -> Self {
Self::new(Role::Flownode, node_id)
}
}
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]

View File

@@ -1218,10 +1218,7 @@ mod tests {
);
let response = mock_region_server
.handle_request(
region_id,
RegionRequest::Drop(RegionDropRequest { fast_path: false }),
)
.handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
.await
.unwrap();
assert_eq!(response.affected_rows, 0);
@@ -1313,10 +1310,7 @@ mod tests {
.insert(region_id, RegionEngineWithStatus::Ready(engine.clone()));
mock_region_server
.handle_request(
region_id,
RegionRequest::Drop(RegionDropRequest { fast_path: false }),
)
.handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
.await
.unwrap_err();

View File

@@ -49,13 +49,12 @@ pub(crate) use crate::adapter::node_context::FlownodeContext;
use crate::adapter::refill::RefillTask;
use crate::adapter::table_source::ManagedTableSource;
use crate::adapter::util::relation_desc_to_column_schemas_with_fallback;
pub(crate) use crate::adapter::worker::{create_worker, WorkerHandle};
pub(crate) use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
use crate::compute::ErrCollector;
use crate::df_optimizer::sql_to_flow_plan;
use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, InvalidQuerySnafu, UnexpectedSnafu};
use crate::expr::Batch;
use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_ROWS, METRIC_FLOW_RUN_INTERVAL_MS};
use crate::recording_rules::RecordingRuleEngine;
use crate::repr::{self, DiffRow, RelationDesc, Row, BATCH_SIZE};
mod flownode_impl;
@@ -172,8 +171,6 @@ pub struct FlowWorkerManager {
flush_lock: RwLock<()>,
/// receive a oneshot sender to send state size report
state_report_handler: RwLock<Option<StateReportHandler>>,
/// engine for recording rule
rule_engine: RecordingRuleEngine,
}
/// Building FlownodeManager
@@ -188,7 +185,6 @@ impl FlowWorkerManager {
node_id: Option<u32>,
query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef,
rule_engine: RecordingRuleEngine,
) -> Self {
let srv_map = ManagedTableSource::new(
table_meta.table_info_manager().clone(),
@@ -211,7 +207,6 @@ impl FlowWorkerManager {
node_id,
flush_lock: RwLock::new(()),
state_report_handler: RwLock::new(None),
rule_engine,
}
}
@@ -220,6 +215,25 @@ impl FlowWorkerManager {
self
}
/// Create a flownode manager with one worker
pub fn new_with_workers<'s>(
node_id: Option<u32>,
query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef,
num_workers: usize,
) -> (Self, Vec<Worker<'s>>) {
let mut zelf = Self::new(node_id, query_engine, table_meta);
let workers: Vec<_> = (0..num_workers)
.map(|_| {
let (handle, worker) = create_worker();
zelf.add_worker_handle(handle);
worker
})
.collect();
(zelf, workers)
}
/// add a worker handler to manager, meaning this corresponding worker is under it's manage
pub fn add_worker_handle(&mut self, handle: WorkerHandle) {
self.worker_handles.push(handle);
@@ -737,11 +751,7 @@ pub struct CreateFlowArgs {
/// Create&Remove flow
impl FlowWorkerManager {
/// remove a flow by it's id
#[allow(unreachable_code)]
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
// TODO(discord9): reroute some back to streaming engine later
return self.rule_engine.remove_flow(flow_id).await;
for handle in self.worker_handles.iter() {
if handle.contains_flow(flow_id).await? {
handle.remove_flow(flow_id).await?;
@@ -757,10 +767,8 @@ impl FlowWorkerManager {
/// steps to create task:
/// 1. parse query into typed plan(and optional parse expire_after expr)
/// 2. render source/sink with output table id and used input table id
#[allow(clippy::too_many_arguments, unreachable_code)]
#[allow(clippy::too_many_arguments)]
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
// TODO(discord9): reroute some back to streaming engine later
return self.rule_engine.create_flow(args).await;
let CreateFlowArgs {
flow_id,
sink_table_name,

View File

@@ -153,10 +153,7 @@ impl Flownode for FlowWorkerManager {
}
}
#[allow(unreachable_code, unused)]
async fn handle_inserts(&self, request: InsertRequests) -> Result<FlowResponse> {
return Ok(Default::default());
// using try_read to ensure two things:
// 1. flush wouldn't happen until inserts before it is inserted
// 2. inserts happening concurrently with flush wouldn't be block by flush

View File

@@ -16,7 +16,6 @@
use std::any::Any;
use arrow_schema::ArrowError;
use common_error::ext::BoxedError;
use common_error::{define_into_tonic_status, from_err_code_msg_to_header};
use common_macro::stack_trace_debug;
@@ -157,15 +156,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Arrow error: {raw:?} in context: {context}"))]
Arrow {
#[snafu(source)]
raw: ArrowError,
context: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Datafusion error: {raw:?} in context: {context}"))]
Datafusion {
#[snafu(source)]
@@ -240,7 +230,6 @@ impl ErrorExt for Error {
match self {
Self::Eval { .. }
| Self::JoinTask { .. }
| Self::Arrow { .. }
| Self::Datafusion { .. }
| Self::InsertIntoFlow { .. } => StatusCode::Internal,
Self::FlowAlreadyExist { .. } => StatusCode::TableAlreadyExists,

View File

@@ -238,7 +238,6 @@ mod test {
for (sql, current, expected) in &testcases {
let plan = sql_to_substrait(engine.clone(), sql).await;
let mut ctx = create_test_ctx();
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan)
.await

View File

@@ -14,6 +14,7 @@
//! Send heartbeat from flownode to metasrv
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use api::v1::meta::{HeartbeatRequest, Peer};
@@ -24,7 +25,7 @@ use common_meta::heartbeat::handler::{
use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage};
use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
use common_meta::key::flow::flow_state::FlowStat;
use common_telemetry::{debug, error, info};
use common_telemetry::{debug, error, info, warn};
use greptime_proto::v1::meta::NodeInfo;
use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient};
use servers::addrs;
@@ -59,12 +60,13 @@ async fn query_flow_state(
#[derive(Clone)]
pub struct HeartbeatTask {
node_id: u64,
node_epoch: u64,
peer_addr: String,
meta_client: Arc<MetaClient>,
report_interval: Duration,
retry_interval: Duration,
resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
start_time_ms: u64,
running: Arc<AtomicBool>,
query_stat_size: Option<SizeReportSender>,
}
@@ -81,17 +83,26 @@ impl HeartbeatTask {
) -> Self {
Self {
node_id: opts.node_id.unwrap_or(0),
node_epoch: common_time::util::current_time_millis() as u64,
peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
meta_client,
report_interval: heartbeat_opts.interval,
retry_interval: heartbeat_opts.retry_interval,
resp_handler_executor,
start_time_ms: common_time::util::current_time_millis() as u64,
running: Arc::new(AtomicBool::new(false)),
query_stat_size: None,
}
}
pub async fn start(&self) -> Result<(), Error> {
if self
.running
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Acquire)
.is_err()
{
warn!("Heartbeat task started multiple times");
return Ok(());
}
info!("Start to establish the heartbeat connection to metasrv.");
let (req_sender, resp_stream) = self
.meta_client
@@ -114,6 +125,13 @@ impl HeartbeatTask {
pub fn shutdown(&self) {
info!("Close heartbeat task for flownode");
if self
.running
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
.is_err()
{
warn!("Call close heartbeat task multiple times");
}
}
fn new_heartbeat_request(
@@ -163,7 +181,7 @@ impl HeartbeatTask {
mut outgoing_rx: mpsc::Receiver<OutgoingMessage>,
) {
let report_interval = self.report_interval;
let node_epoch = self.node_epoch;
let start_time_ms = self.start_time_ms;
let self_peer = Some(Peer {
id: self.node_id,
addr: self.peer_addr.clone(),
@@ -180,8 +198,7 @@ impl HeartbeatTask {
let heartbeat_request = HeartbeatRequest {
peer: self_peer,
node_epoch,
info: Self::build_node_info(node_epoch),
info: Self::build_node_info(start_time_ms),
..Default::default()
};
@@ -213,8 +230,6 @@ impl HeartbeatTask {
// set the timeout to half of the report interval so that it wouldn't delay heartbeat if something went horribly wrong
latest_report = query_flow_state(&query_stat_size, report_interval / 2).await;
}
info!("flownode heartbeat task stopped.");
});
}

View File

@@ -33,7 +33,6 @@ mod expr;
pub mod heartbeat;
mod metrics;
mod plan;
mod recording_rules;
mod repr;
mod server;
mod transform;
@@ -44,5 +43,4 @@ mod test_utils;
pub use adapter::{FlowConfig, FlowWorkerManager, FlowWorkerManagerRef, FlownodeOptions};
pub use error::{Error, Result};
pub use recording_rules::FrontendClient;
pub use server::{FlownodeBuilder, FlownodeInstance, FlownodeServer, FrontendInvoker};

View File

@@ -28,32 +28,6 @@ lazy_static! {
&["table_id"]
)
.unwrap();
pub static ref METRIC_FLOW_RULE_ENGINE_QUERY_TIME: HistogramVec = register_histogram_vec!(
"greptime_flow_rule_engine_query_time",
"flow rule engine query time",
&["flow_id"],
vec![
0.0,
1.,
3.,
5.,
10.,
20.,
30.,
60.,
2. * 60.,
5. * 60.,
10. * 60.
]
)
.unwrap();
pub static ref METRIC_FLOW_RULE_ENGINE_SLOW_QUERY: HistogramVec = register_histogram_vec!(
"greptime_flow_rule_engine_slow_query",
"flow rule engine slow query",
&["flow_id", "sql", "peer"],
vec![60., 2. * 60., 3. * 60., 5. * 60., 10. * 60.]
)
.unwrap();
pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge =
register_int_gauge!("greptime_flow_run_interval_ms", "flow run interval in ms").unwrap();
pub static ref METRIC_FLOW_ROWS: IntCounterVec = register_int_counter_vec!(

View File

@@ -1,744 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Run flow as recording rule which is time-window-aware normal query triggered every tick set by user
mod engine;
mod frontend_client;
use std::collections::HashSet;
use std::sync::Arc;
use common_error::ext::BoxedError;
use common_recordbatch::DfRecordBatch;
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use datafusion::error::Result as DfResult;
use datafusion::logical_expr::Expr;
use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
use datafusion::prelude::SessionContext;
use datafusion::sql::unparser::Unparser;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter};
use datafusion_common::{Column, DFSchema, TableReference};
use datafusion_expr::LogicalPlan;
use datafusion_physical_expr::PhysicalExprRef;
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::value::Value;
use datatypes::vectors::{
TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
TimestampSecondVector, Vector,
};
pub use engine::RecordingRuleEngine;
pub use frontend_client::FrontendClient;
use query::parser::QueryLanguageParser;
use query::QueryEngineRef;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use crate::df_optimizer::apply_df_optimizer;
use crate::error::{ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, UnexpectedSnafu};
use crate::Error;
/// Convert sql to datafusion logical plan
pub async fn sql_to_df_plan(
query_ctx: QueryContextRef,
engine: QueryEngineRef,
sql: &str,
optimize: bool,
) -> Result<LogicalPlan, Error> {
let stmt = QueryLanguageParser::parse_sql(sql, &query_ctx)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let plan = engine
.planner()
.plan(&stmt, query_ctx)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let plan = if optimize {
apply_df_optimizer(plan).await?
} else {
plan
};
Ok(plan)
}
/// Find nearest lower bound for time `current` in given `plan` for the time window expr.
/// i.e. for time window expr being `date_bin(INTERVAL '5 minutes', ts) as time_window` and `current="2021-07-01 00:01:01.000"`,
/// return `Some("2021-07-01 00:00:00.000")`
/// if `plan` doesn't contain a `TIME INDEX` column, return `None`
///
/// Time window expr is a expr that:
/// 1. ref only to a time index column
/// 2. is monotonic increasing
/// 3. show up in GROUP BY clause
///
/// note this plan should only contain one TableScan
pub async fn find_plan_time_window_bound(
plan: &LogicalPlan,
current: Timestamp,
query_ctx: QueryContextRef,
engine: QueryEngineRef,
) -> Result<(String, Option<Timestamp>, Option<Timestamp>), Error> {
// TODO(discord9): find the expr that do time window
let catalog_man = engine.engine_state().catalog_manager();
let mut table_name = None;
// first find the table source in the logical plan
plan.apply(|plan| {
let LogicalPlan::TableScan(table_scan) = plan else {
return Ok(TreeNodeRecursion::Continue);
};
table_name = Some(table_scan.table_name.clone());
Ok(TreeNodeRecursion::Stop)
})
.with_context(|_| DatafusionSnafu {
context: format!("Can't find table source in plan {plan:?}"),
})?;
let Some(table_name) = table_name else {
UnexpectedSnafu {
reason: format!("Can't find table source in plan {plan:?}"),
}
.fail()?
};
let current_schema = query_ctx.current_schema();
let catalog_name = table_name.catalog().unwrap_or(query_ctx.current_catalog());
let schema_name = table_name.schema().unwrap_or(&current_schema);
let table_name = table_name.table();
let Some(table_ref) = catalog_man
.table(catalog_name, schema_name, table_name, Some(&query_ctx))
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
else {
UnexpectedSnafu {
reason: format!(
"Can't find table {table_name:?} in catalog {catalog_name:?}/{schema_name:?}"
),
}
.fail()?
};
let schema = &table_ref.table_info().meta.schema;
let ts_index = schema.timestamp_column().context(UnexpectedSnafu {
reason: format!("Can't find timestamp column in table {table_name:?}"),
})?;
let ts_col_name = ts_index.name.clone();
let expected_time_unit = ts_index.data_type.as_timestamp().with_context(|| UnexpectedSnafu {
reason: format!(
"Expected timestamp column {ts_col_name:?} in table {table_name:?} to be timestamp, but got {ts_index:?}"
),
})?.unit();
let ts_columns: HashSet<_> = HashSet::from_iter(vec![
format!("{catalog_name}.{schema_name}.{table_name}.{ts_col_name}"),
format!("{schema_name}.{table_name}.{ts_col_name}"),
format!("{table_name}.{ts_col_name}"),
format!("{ts_col_name}"),
]);
let ts_columns: HashSet<_> = ts_columns
.into_iter()
.map(Column::from_qualified_name)
.collect();
let ts_columns_ref: HashSet<&Column> = ts_columns.iter().collect();
// find the time window expr which refers to the time index column
let mut time_window_expr: Option<Expr> = None;
let find_time_window_expr = |plan: &LogicalPlan| {
let LogicalPlan::Aggregate(aggregate) = plan else {
return Ok(TreeNodeRecursion::Continue);
};
for group_expr in &aggregate.group_expr {
let refs = group_expr.column_refs();
if refs.len() != 1 {
continue;
}
let ref_col = refs.iter().next().unwrap();
if ts_columns_ref.contains(ref_col) {
time_window_expr = Some(group_expr.clone());
break;
}
}
Ok(TreeNodeRecursion::Stop)
};
plan.apply(find_time_window_expr)
.with_context(|_| DatafusionSnafu {
context: format!("Can't find time window expr in plan {plan:?}"),
})?;
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
ts_col_name.clone(),
ts_index.data_type.as_arrow_type(),
false,
)]));
let df_schema = DFSchema::from_field_specific_qualified_schema(
vec![Some(TableReference::bare(table_name))],
&arrow_schema,
)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
})?;
// cast current to ts_index's type
let new_current = current
.convert_to(expected_time_unit)
.with_context(|| UnexpectedSnafu {
reason: format!("Failed to cast current timestamp {current:?} to {expected_time_unit}"),
})?;
// if no time_window_expr is found, return None
if let Some(time_window_expr) = time_window_expr {
let lower_bound =
find_expr_time_window_lower_bound(&time_window_expr, &df_schema, new_current)?;
let upper_bound =
find_expr_time_window_upper_bound(&time_window_expr, &df_schema, new_current)?;
Ok((ts_col_name, lower_bound, upper_bound))
} else {
Ok((ts_col_name, None, None))
}
}
/// Find the lower bound of time window in given `expr` and `current` timestamp.
///
/// i.e. for `current="2021-07-01 00:01:01.000"` and `expr=date_bin(INTERVAL '5 minutes', ts) as time_window` and `ts_col=ts`,
/// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
/// of current time window given the current timestamp
///
/// if return None, meaning this time window have no lower bound
fn find_expr_time_window_lower_bound(
expr: &Expr,
df_schema: &DFSchema,
current: Timestamp,
) -> Result<Option<Timestamp>, Error> {
use std::cmp::Ordering;
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
if cur_time_window == current {
return Ok(Some(current));
}
// search to find the lower bound
let mut offset: i64 = 1;
let lower_bound;
let mut upper_bound = Some(current);
// first expontial probe to found a range for binary search
loop {
let Some(next_val) = current.value().checked_sub(offset) else {
// no lower bound
return Ok(None);
};
let prev_time_probe = common_time::Timestamp::new(next_val, current.unit());
let prev_time_window = eval_ts_to_ts(&phy_expr, df_schema, prev_time_probe)?;
match prev_time_window.cmp(&cur_time_window) {
Ordering::Less => {
lower_bound = Some(prev_time_probe);
break;
}
Ordering::Equal => {
upper_bound = Some(prev_time_probe);
}
Ordering::Greater => {
UnexpectedSnafu {
reason: format!(
"Unsupported time window expression, expect monotonic increasing for time window expression {expr:?}"
),
}
.fail()?
}
}
let Some(new_offset) = offset.checked_mul(2) else {
// no lower bound
return Ok(None);
};
offset = new_offset;
}
// binary search for the exact lower bound
ensure!(lower_bound.map(|v|v.unit())==upper_bound.map(|v|v.unit()), UnexpectedSnafu{
reason: format!(" unit mismatch for time window expression {expr:?}, found {lower_bound:?} and {upper_bound:?}"),
});
let input_time_unit = lower_bound
.context(UnexpectedSnafu {
reason: "should have lower bound",
})?
.unit();
let mut low = lower_bound
.context(UnexpectedSnafu {
reason: "should have lower bound",
})?
.value();
let mut high = upper_bound
.context(UnexpectedSnafu {
reason: "should have upper bound",
})?
.value();
while low < high {
let mid = (low + high) / 2;
let mid_probe = common_time::Timestamp::new(mid, input_time_unit);
let mid_time_window = eval_ts_to_ts(&phy_expr, df_schema, mid_probe)?;
match mid_time_window.cmp(&cur_time_window) {
Ordering::Less => low = mid + 1,
Ordering::Equal => high = mid,
Ordering::Greater => UnexpectedSnafu {
reason: format!("Binary search failed for time window expression {expr:?}"),
}
.fail()?,
}
}
let final_lower_bound_for_time_window = common_time::Timestamp::new(low, input_time_unit);
Ok(Some(final_lower_bound_for_time_window))
}
/// Find the upper bound for time window expression
fn find_expr_time_window_upper_bound(
expr: &Expr,
df_schema: &DFSchema,
current: Timestamp,
) -> Result<Option<Timestamp>, Error> {
use std::cmp::Ordering;
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
// search to find the lower bound
let mut offset: i64 = 1;
let mut lower_bound = Some(current);
let upper_bound;
// first expontial probe to found a range for binary search
loop {
let Some(next_val) = current.value().checked_add(offset) else {
// no upper bound if overflow
return Ok(None);
};
let next_time_probe = common_time::Timestamp::new(next_val, current.unit());
let next_time_window = eval_ts_to_ts(&phy_expr, df_schema, next_time_probe)?;
match next_time_window.cmp(&cur_time_window) {
Ordering::Less => {UnexpectedSnafu {
reason: format!(
"Unsupported time window expression, expect monotonic increasing for time window expression {expr:?}"
),
}
.fail()?
}
Ordering::Equal => {
lower_bound = Some(next_time_probe);
}
Ordering::Greater => {
upper_bound = Some(next_time_probe);
break
}
}
let Some(new_offset) = offset.checked_mul(2) else {
// no upper bound if overflow
return Ok(None);
};
offset = new_offset;
}
// binary search for the exact upper bound
ensure!(lower_bound.map(|v|v.unit())==upper_bound.map(|v|v.unit()), UnexpectedSnafu{
reason: format!(" unit mismatch for time window expression {expr:?}, found {lower_bound:?} and {upper_bound:?}"),
});
let output_unit = upper_bound
.context(UnexpectedSnafu {
reason: "should have lower bound",
})?
.unit();
let mut low = lower_bound
.context(UnexpectedSnafu {
reason: "should have lower bound",
})?
.value();
let mut high = upper_bound
.context(UnexpectedSnafu {
reason: "should have upper bound",
})?
.value();
while low < high {
let mid = (low + high) / 2;
let mid_probe = common_time::Timestamp::new(mid, output_unit);
let mid_time_window = eval_ts_to_ts(&phy_expr, df_schema, mid_probe)?;
match mid_time_window.cmp(&cur_time_window) {
Ordering::Less => UnexpectedSnafu {
reason: format!("Binary search failed for time window expression {expr:?}"),
}
.fail()?,
Ordering::Equal => low = mid + 1,
Ordering::Greater => high = mid,
}
}
let final_upper_bound_for_time_window = common_time::Timestamp::new(high, output_unit);
Ok(Some(final_upper_bound_for_time_window))
}
fn eval_ts_to_ts(
phy: &PhysicalExprRef,
df_schema: &DFSchema,
input_value: Timestamp,
) -> Result<Timestamp, Error> {
let ts_vector = match input_value.unit() {
TimeUnit::Second => {
TimestampSecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Millisecond => {
TimestampMillisecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Microsecond => {
TimestampMicrosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Nanosecond => {
TimestampNanosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
};
let rb = DfRecordBatch::try_new(df_schema.inner().clone(), vec![ts_vector.clone()])
.with_context(|_| ArrowSnafu {
context: format!("Failed to create record batch from {df_schema:?} and {ts_vector:?}"),
})?;
let eval_res = phy.evaluate(&rb).with_context(|_| DatafusionSnafu {
context: format!("Failed to evaluate physical expression {phy:?} on {rb:?}"),
})?;
let val = match eval_res {
datafusion_expr::ColumnarValue::Array(array) => {
let ty = array.data_type();
let ty = ConcreteDataType::from_arrow_type(ty);
let time_unit = if let ConcreteDataType::Timestamp(ty) = ty {
ty.unit()
} else {
return UnexpectedSnafu {
reason: format!("Physical expression {phy:?} evaluated to non-timestamp type"),
}
.fail();
};
match time_unit {
TimeUnit::Second => TimestampSecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.get(0),
TimeUnit::Millisecond => {
TimestampMillisecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.get(0)
}
TimeUnit::Microsecond => {
TimestampMicrosecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.get(0)
}
TimeUnit::Nanosecond => {
TimestampNanosecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.get(0)
}
}
}
datafusion_expr::ColumnarValue::Scalar(scalar) => Value::try_from(scalar.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to convert scalar {scalar:?} to value"),
})?,
};
if let Value::Timestamp(ts) = val {
Ok(ts)
} else {
UnexpectedSnafu {
reason: format!("Expected timestamp in expression {phy:?} but got {val:?}"),
}
.fail()?
}
}
// TODO(discord9): a method to found out the precise time window
/// Find out the `Filter` Node corresponding to outermost `WHERE` and add a new filter expr to it
#[derive(Debug)]
pub struct AddFilterRewriter {
extra_filter: Expr,
is_rewritten: bool,
}
impl AddFilterRewriter {
fn new(filter: Expr) -> Self {
Self {
extra_filter: filter,
is_rewritten: false,
}
}
}
impl TreeNodeRewriter for AddFilterRewriter {
type Node = LogicalPlan;
fn f_down(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
if self.is_rewritten {
return Ok(Transformed::no(node));
}
match node {
LogicalPlan::Filter(mut filter) if !filter.having => {
filter.predicate = filter.predicate.and(self.extra_filter.clone());
self.is_rewritten = true;
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
}
LogicalPlan::TableScan(_) => {
// add a new filter
let filter =
datafusion_expr::Filter::try_new(self.extra_filter.clone(), Arc::new(node))?;
self.is_rewritten = true;
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
}
_ => Ok(Transformed::no(node)),
}
}
}
fn df_plan_to_sql(plan: &LogicalPlan) -> Result<String, Error> {
let unparser = Unparser::default();
let sql = unparser
.plan_to_sql(plan)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to unparse logical plan {plan:?}"),
})?;
Ok(sql.to_string())
}
#[cfg(test)]
mod test {
use datafusion_common::tree_node::TreeNode;
use pretty_assertions::assert_eq;
use session::context::QueryContext;
use super::{sql_to_df_plan, *};
use crate::recording_rules::{df_plan_to_sql, AddFilterRewriter};
use crate::test_utils::create_test_query_engine;
#[tokio::test]
async fn test_add_filter() {
let testcases = vec![
(
"SELECT number FROM numbers_with_ts GROUP BY number","SELECT numbers_with_ts.number FROM numbers_with_ts WHERE (number > 4) GROUP BY numbers_with_ts.number"
),
(
"SELECT number FROM numbers_with_ts WHERE number < 2 OR number >10",
"SELECT numbers_with_ts.number FROM numbers_with_ts WHERE (((numbers_with_ts.number < 2) OR (numbers_with_ts.number > 10)) AND (number > 4))"
),
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window",
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE (number > 4) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
)
];
use datafusion_expr::{col, lit};
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
for (before, after) in testcases {
let sql = before;
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
.await
.unwrap();
let mut add_filter = AddFilterRewriter::new(col("number").gt(lit(4u32)));
let plan = plan.rewrite(&mut add_filter).unwrap().data;
let new_sql = df_plan_to_sql(&plan).unwrap();
assert_eq!(after, new_sql);
}
}
#[tokio::test]
async fn test_plan_time_window_lower_bound() {
use datafusion_expr::{col, lit};
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
let testcases = [
// same alias is not same column
(
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts GROUP BY ts;",
Timestamp::new(1740394109, TimeUnit::Second),
(
"ts".to_string(),
Some(Timestamp::new(1740394109000, TimeUnit::Millisecond)),
Some(Timestamp::new(1740394109001, TimeUnit::Millisecond)),
),
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:29' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:48:29.001' AS TIMESTAMP))) GROUP BY numbers_with_ts.ts"
),
// complex time window index
(
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(1740394109, TimeUnit::Second),
(
"ts".to_string(),
Some(Timestamp::new(1740394080, TimeUnit::Second)),
Some(Timestamp::new(1740394140, TimeUnit::Second)),
),
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')"
),
// no time index
(
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;",
Timestamp::new(23, TimeUnit::Millisecond),
("ts".to_string(), None, None),
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;"
),
// time index
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// on spot
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(0, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// different time unit
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23_000_000, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// time index with other fields
(
"SELECT sum(number) as sum_up, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT sum(numbers_with_ts.number) AS sum_up, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// time index with other pks
(
"SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number"
),
];
for (sql, current, expected, unparsed) in testcases {
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, true)
.await
.unwrap();
let real =
find_plan_time_window_bound(&plan, current, ctx.clone(), query_engine.clone())
.await
.unwrap();
assert_eq!(expected, real);
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
.await
.unwrap();
let (col_name, lower, upper) = real;
let new_sql = if lower.is_some() {
let to_df_literal = |value| {
let value = Value::from(value);
value.try_to_scalar_value(&value.data_type()).unwrap()
};
let lower = to_df_literal(lower.unwrap());
let upper = to_df_literal(upper.unwrap());
let expr = col(&col_name)
.gt_eq(lit(lower))
.and(col(&col_name).lt_eq(lit(upper)));
let mut add_filter = AddFilterRewriter::new(expr);
let plan = plan.rewrite(&mut add_filter).unwrap().data;
df_plan_to_sql(&plan).unwrap()
} else {
sql.to_string()
};
assert_eq!(unparsed, new_sql);
}
}
}

View File

@@ -1,407 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::BTreeMap;
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use common_meta::ddl::create_flow::FlowType;
use common_telemetry::tracing::warn;
use common_telemetry::{debug, info};
use common_time::Timestamp;
use datafusion_common::tree_node::TreeNode;
use datatypes::value::Value;
use query::QueryEngineRef;
use session::context::QueryContextRef;
use snafu::{ensure, ResultExt};
use tokio::sync::oneshot::error::TryRecvError;
use tokio::sync::{oneshot, RwLock};
use tokio::time::Instant;
use super::frontend_client::FrontendClient;
use super::{df_plan_to_sql, AddFilterRewriter};
use crate::adapter::{CreateFlowArgs, FlowId};
use crate::error::{DatafusionSnafu, DatatypesSnafu, FlowAlreadyExistSnafu, UnexpectedSnafu};
use crate::metrics::{METRIC_FLOW_RULE_ENGINE_QUERY_TIME, METRIC_FLOW_RULE_ENGINE_SLOW_QUERY};
use crate::recording_rules::{find_plan_time_window_bound, sql_to_df_plan};
use crate::Error;
/// TODO(discord9): make those constants configurable
/// The default rule engine query timeout is 10 minutes
pub const DEFAULT_RULE_ENGINE_QUERY_TIMEOUT: Duration = Duration::from_secs(10 * 60);
/// will output a warn log for any query that runs for more that 1 minutes, and also every 1 minutes when that query is still running
pub const SLOW_QUERY_THRESHOLD: Duration = Duration::from_secs(60);
/// TODO(discord9): determine how to configure refresh rate
pub struct RecordingRuleEngine {
tasks: RwLock<BTreeMap<FlowId, RecordingRuleTask>>,
shutdown_txs: RwLock<BTreeMap<FlowId, oneshot::Sender<()>>>,
frontend_client: Arc<FrontendClient>,
engine: QueryEngineRef,
}
impl RecordingRuleEngine {
pub fn new(frontend_client: Arc<FrontendClient>, engine: QueryEngineRef) -> Self {
Self {
tasks: Default::default(),
shutdown_txs: Default::default(),
frontend_client,
engine,
}
}
}
const MIN_REFRESH_DURATION: Duration = Duration::new(5, 0);
impl RecordingRuleEngine {
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
let CreateFlowArgs {
flow_id,
sink_table_name,
source_table_ids: _,
create_if_not_exists,
or_replace,
expire_after,
comment: _,
sql,
flow_options,
query_ctx,
} = args;
// or replace logic
{
let is_exist = self.tasks.read().await.contains_key(&flow_id);
match (create_if_not_exists, or_replace, is_exist) {
// if replace, ignore that old flow exists
(_, true, true) => {
info!("Replacing flow with id={}", flow_id);
}
(false, false, true) => FlowAlreadyExistSnafu { id: flow_id }.fail()?,
// already exists, and not replace, return None
(true, false, true) => {
info!("Flow with id={} already exists, do nothing", flow_id);
return Ok(None);
}
// continue as normal
(_, _, false) => (),
}
}
let flow_type = flow_options.get(FlowType::FLOW_TYPE_KEY);
ensure!(
flow_type == Some(&FlowType::RecordingRule.to_string()) || flow_type.is_none(),
UnexpectedSnafu {
reason: format!("Flow type is not RecordingRule nor None, got {flow_type:?}")
}
);
let Some(query_ctx) = query_ctx else {
UnexpectedSnafu {
reason: "Query context is None".to_string(),
}
.fail()?
};
let (tx, rx) = oneshot::channel();
let task = RecordingRuleTask::new(
flow_id,
&sql,
expire_after,
sink_table_name,
Arc::new(query_ctx),
rx,
);
let task_inner = task.clone();
let engine = self.engine.clone();
let frontend = self.frontend_client.clone();
// TODO(discord9): also save handle & use time wheel or what for better
let _handle = common_runtime::spawn_global(async move {
match task_inner.start_executing(engine, frontend).await {
Ok(()) => info!("Flow {} shutdown", task_inner.flow_id),
Err(err) => common_telemetry::error!(
"Flow {} encounter unrecoverable error: {err:?}",
task_inner.flow_id
),
}
});
// TODO(discord9): deal with replace logic
let replaced_old_task_opt = self.tasks.write().await.insert(flow_id, task);
drop(replaced_old_task_opt);
self.shutdown_txs.write().await.insert(flow_id, tx);
Ok(Some(flow_id))
}
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
if self.tasks.write().await.remove(&flow_id).is_none() {
warn!("Flow {flow_id} not found in tasks")
}
let Some(tx) = self.shutdown_txs.write().await.remove(&flow_id) else {
UnexpectedSnafu {
reason: format!("Can't found shutdown tx for flow {flow_id}"),
}
.fail()?
};
if tx.send(()).is_err() {
warn!("Fail to shutdown flow {flow_id} due to receiver already dropped, maybe flow {flow_id} is already dropped?")
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct RecordingRuleTask {
flow_id: FlowId,
query: String,
/// in seconds
expire_after: Option<i64>,
sink_table_name: [String; 3],
state: Arc<RwLock<RecordingRuleState>>,
}
impl RecordingRuleTask {
pub fn new(
flow_id: FlowId,
query: &str,
expire_after: Option<i64>,
sink_table_name: [String; 3],
query_ctx: QueryContextRef,
shutdown_rx: oneshot::Receiver<()>,
) -> Self {
Self {
flow_id,
query: query.to_string(),
expire_after,
sink_table_name,
state: Arc::new(RwLock::new(RecordingRuleState::new(query_ctx, shutdown_rx))),
}
}
}
impl RecordingRuleTask {
/// This should be called in a new tokio task
pub async fn start_executing(
&self,
engine: QueryEngineRef,
frontend_client: Arc<FrontendClient>,
) -> Result<(), Error> {
// only first query don't need upper bound
let mut is_first = true;
loop {
// FIXME(discord9): test if need upper bound also works
let new_query = self
.gen_query_with_time_window(engine.clone(), false)
.await?;
let insert_into = format!(
"INSERT INTO {}.{}.{} {}",
self.sink_table_name[0],
self.sink_table_name[1],
self.sink_table_name[2],
new_query
);
if is_first {
is_first = false;
}
let instant = Instant::now();
let flow_id = self.flow_id;
let db_client = frontend_client.get_database_client().await?;
let peer_addr = db_client.peer.addr;
debug!(
"Executing flow {flow_id}(expire_after={:?} secs) on {:?} with query {}",
self.expire_after, peer_addr, &insert_into
);
let timer = METRIC_FLOW_RULE_ENGINE_QUERY_TIME
.with_label_values(&[flow_id.to_string().as_str()])
.start_timer();
let res = db_client.database.sql(&insert_into).await;
drop(timer);
let elapsed = instant.elapsed();
if let Ok(res1) = &res {
debug!(
"Flow {flow_id} executed, result: {res1:?}, elapsed: {:?}",
elapsed
);
} else if let Err(res) = &res {
warn!(
"Failed to execute Flow {flow_id} on frontend {}, result: {res:?}, elapsed: {:?} with query: {}",
peer_addr, elapsed, &insert_into
);
}
// record slow query
if elapsed >= SLOW_QUERY_THRESHOLD {
warn!(
"Flow {flow_id} on frontend {} executed for {:?} before complete, query: {}",
peer_addr, elapsed, &insert_into
);
METRIC_FLOW_RULE_ENGINE_SLOW_QUERY
.with_label_values(&[flow_id.to_string().as_str(), &insert_into, &peer_addr])
.observe(elapsed.as_secs_f64());
}
self.state
.write()
.await
.after_query_exec(elapsed, res.is_ok());
let sleep_until = {
let mut state = self.state.write().await;
match state.shutdown_rx.try_recv() {
Ok(()) => break Ok(()),
Err(TryRecvError::Closed) => {
warn!("Unexpected shutdown flow {flow_id}, shutdown anyway");
break Ok(());
}
Err(TryRecvError::Empty) => (),
}
state.get_next_start_query_time(None)
};
tokio::time::sleep_until(sleep_until).await;
}
}
async fn gen_query_with_time_window(
&self,
engine: QueryEngineRef,
need_upper_bound: bool,
) -> Result<String, Error> {
let query_ctx = self.state.read().await.query_ctx.clone();
let start = SystemTime::now();
let since_the_epoch = start
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");
let low_bound = self
.expire_after
.map(|e| since_the_epoch.as_secs() - e as u64);
let Some(low_bound) = low_bound else {
return Ok(self.query.clone());
};
let low_bound = Timestamp::new_second(low_bound as i64);
let plan = sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.query, true).await?;
let (col_name, lower, upper) =
find_plan_time_window_bound(&plan, low_bound, query_ctx.clone(), engine.clone())
.await?;
let new_sql = {
let to_df_literal = |value| -> Result<_, Error> {
let value = Value::from(value);
let value = value
.try_to_scalar_value(&value.data_type())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to convert to scalar value: {}", value),
})?;
Ok(value)
};
let lower = lower.map(to_df_literal).transpose()?;
let upper = upper.map(to_df_literal).transpose()?.and_then(|u| {
if need_upper_bound {
Some(u)
} else {
None
}
});
let expr = {
use datafusion_expr::{col, lit};
match (lower, upper) {
(Some(l), Some(u)) => col(&col_name)
.gt_eq(lit(l))
.and(col(&col_name).lt_eq(lit(u))),
(Some(l), None) => col(&col_name).gt_eq(lit(l)),
(None, Some(u)) => col(&col_name).lt(lit(u)),
// no time window, direct return
(None, None) => return Ok(self.query.clone()),
}
};
let mut add_filter = AddFilterRewriter::new(expr);
// make a not optimized plan for clearer unparse
let plan =
sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.query, false).await?;
let plan = plan
.clone()
.rewrite(&mut add_filter)
.with_context(|_| DatafusionSnafu {
context: format!("Failed to rewrite plan {plan:?}"),
})?
.data;
df_plan_to_sql(&plan)?
};
Ok(new_sql)
}
}
#[derive(Debug)]
pub struct RecordingRuleState {
query_ctx: QueryContextRef,
/// last query complete time
last_update_time: Instant,
/// last time query duration
last_query_duration: Duration,
exec_state: ExecState,
shutdown_rx: oneshot::Receiver<()>,
}
impl RecordingRuleState {
pub fn new(query_ctx: QueryContextRef, shutdown_rx: oneshot::Receiver<()>) -> Self {
Self {
query_ctx,
last_update_time: Instant::now(),
last_query_duration: Duration::from_secs(0),
exec_state: ExecState::Idle,
shutdown_rx,
}
}
/// called after last query is done
/// `is_succ` indicate whether the last query is successful
pub fn after_query_exec(&mut self, elapsed: Duration, _is_succ: bool) {
self.exec_state = ExecState::Idle;
self.last_query_duration = elapsed;
self.last_update_time = Instant::now();
}
/// wait for at least `last_query_duration`, at most `max_timeout` to start next query
pub fn get_next_start_query_time(&self, max_timeout: Option<Duration>) -> Instant {
let next_duration = max_timeout
.unwrap_or(self.last_query_duration)
.min(self.last_query_duration);
let next_duration = next_duration.max(MIN_REFRESH_DURATION);
self.last_update_time + next_duration
}
}
#[derive(Debug, Clone)]
enum ExecState {
Idle,
Executing,
}

View File

@@ -1,150 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Frontend client to run flow as recording rule which is time-window-aware normal query triggered every tick set by user
use std::sync::Arc;
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::ext::BoxedError;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::cluster::{NodeInfo, NodeInfoKey, Role};
use common_meta::peer::Peer;
use common_meta::rpc::store::RangeRequest;
use meta_client::client::MetaClient;
use snafu::ResultExt;
use crate::error::{ExternalSnafu, UnexpectedSnafu};
use crate::recording_rules::engine::DEFAULT_RULE_ENGINE_QUERY_TIMEOUT;
use crate::Error;
fn default_channel_mgr() -> ChannelManager {
let cfg = ChannelConfig::new().timeout(DEFAULT_RULE_ENGINE_QUERY_TIMEOUT);
ChannelManager::with_config(cfg)
}
fn client_from_urls(addrs: Vec<String>) -> Client {
Client::with_manager_and_urls(default_channel_mgr(), addrs)
}
/// A simple frontend client able to execute sql using grpc protocol
#[derive(Debug)]
pub enum FrontendClient {
Distributed {
meta_client: Arc<MetaClient>,
},
Standalone {
/// for the sake of simplicity still use grpc even in standalone mode
/// notice the client here should all be lazy, so that can wait after frontend is booted then make conn
/// TODO(discord9): not use grpc under standalone mode
database_client: DatabaseWithPeer,
},
}
#[derive(Debug, Clone)]
pub struct DatabaseWithPeer {
pub database: Database,
pub peer: Peer,
}
impl DatabaseWithPeer {
fn new(database: Database, peer: Peer) -> Self {
Self { database, peer }
}
}
impl FrontendClient {
pub fn from_meta_client(meta_client: Arc<MetaClient>) -> Self {
Self::Distributed { meta_client }
}
pub fn from_static_grpc_addr(addr: String) -> Self {
let peer = Peer {
id: 0,
addr: addr.clone(),
};
let client = client_from_urls(vec![addr]);
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
Self::Standalone {
database_client: DatabaseWithPeer::new(database, peer),
}
}
}
impl FrontendClient {
async fn scan_for_frontend(&self) -> Result<Vec<(NodeInfoKey, NodeInfo)>, Error> {
let Self::Distributed { meta_client, .. } = self else {
return Ok(vec![]);
};
let cluster_client = meta_client
.cluster_client()
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let cluster_id = meta_client.id().0;
let prefix = NodeInfoKey::key_prefix_with_role(cluster_id, Role::Frontend);
let req = RangeRequest::new().with_prefix(prefix);
let resp = cluster_client
.range(req)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let mut res = Vec::with_capacity(resp.kvs.len());
for kv in resp.kvs {
let key = NodeInfoKey::try_from(kv.key)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let val = NodeInfo::try_from(kv.value)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
res.push((key, val));
}
Ok(res)
}
/// Get the database with max `last_activity_ts`
async fn get_last_active_frontend(&self) -> Result<DatabaseWithPeer, Error> {
if let Self::Standalone { database_client } = self {
return Ok(database_client.clone());
}
let frontends = self.scan_for_frontend().await?;
let mut last_activity_ts = i64::MIN;
let mut peer = None;
for (_key, val) in frontends.iter() {
if val.last_activity_ts > last_activity_ts {
last_activity_ts = val.last_activity_ts;
peer = Some(val.peer.clone());
}
}
let Some(peer) = peer else {
UnexpectedSnafu {
reason: format!("No frontend available: {:?}", frontends),
}
.fail()?
};
let client = client_from_urls(vec![peer.addr.clone()]);
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
Ok(DatabaseWithPeer::new(database, peer))
}
/// Get a database client, and possibly update it before returning.
pub async fn get_database_client(&self) -> Result<DatabaseWithPeer, Error> {
match self {
Self::Standalone { database_client } => Ok(database_client.clone()),
Self::Distributed { meta_client: _ } => self.get_last_active_frontend().await,
}
}
}

View File

@@ -57,7 +57,6 @@ use crate::error::{
};
use crate::heartbeat::HeartbeatTask;
use crate::metrics::{METRIC_FLOW_PROCESSING_TIME, METRIC_FLOW_ROWS};
use crate::recording_rules::{FrontendClient, RecordingRuleEngine};
use crate::transform::register_function_to_query_engine;
use crate::utils::{SizeReportSender, StateReportHandler};
use crate::{Error, FlowWorkerManager, FlownodeOptions};
@@ -246,7 +245,6 @@ impl FlownodeInstance {
self.server.shutdown().await.context(ShutdownServerSnafu)?;
if let Some(task) = &self.heartbeat_task {
info!("Close heartbeat task for flownode");
task.shutdown();
}
@@ -273,8 +271,6 @@ pub struct FlownodeBuilder {
heartbeat_task: Option<HeartbeatTask>,
/// receive a oneshot sender to send state size report
state_report_handler: Option<StateReportHandler>,
/// Client to send sql to frontend
frontend_client: Arc<FrontendClient>,
}
impl FlownodeBuilder {
@@ -285,7 +281,6 @@ impl FlownodeBuilder {
table_meta: TableMetadataManagerRef,
catalog_manager: CatalogManagerRef,
flow_metadata_manager: FlowMetadataManagerRef,
frontend_client: Arc<FrontendClient>,
) -> Self {
Self {
opts,
@@ -295,7 +290,6 @@ impl FlownodeBuilder {
flow_metadata_manager,
heartbeat_task: None,
state_report_handler: None,
frontend_client,
}
}
@@ -453,10 +447,7 @@ impl FlownodeBuilder {
let node_id = self.opts.node_id.map(|id| id as u32);
let rule_engine =
RecordingRuleEngine::new(self.frontend_client.clone(), query_engine.clone());
let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta, rule_engine);
let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta);
for worker_id in 0..num_workers {
let (tx, rx) = oneshot::channel();

View File

@@ -86,8 +86,7 @@ pub fn create_test_query_engine() -> Arc<dyn QueryEngine> {
let schema = vec![
datatypes::schema::ColumnSchema::new("number", CDT::uint32_datatype(), false),
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false)
.with_time_index(true),
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false),
];
let mut columns = vec![];
let numbers = (1..=10).collect_vec();

View File

@@ -237,13 +237,6 @@ impl Instance {
let output = match stmt {
Statement::Query(_) | Statement::Explain(_) | Statement::Delete(_) => {
// TODO: remove this when format is supported in datafusion
if let Statement::Explain(explain) = &stmt {
if let Some(format) = explain.format() {
query_ctx.set_explain_format(format.to_string());
}
}
let stmt = QueryStatement::Sql(stmt);
let plan = self
.statement_executor

View File

@@ -25,7 +25,7 @@ use crate::fulltext_index::create::{FulltextIndexCreator, TantivyFulltextIndexCr
use crate::fulltext_index::search::{FulltextIndexSearcher, RowId, TantivyFulltextIndexSearcher};
use crate::fulltext_index::{Analyzer, Config};
async fn new_bounded_stager(prefix: &str) -> (TempDir, Arc<BoundedStager<String>>) {
async fn new_bounded_stager(prefix: &str) -> (TempDir, Arc<BoundedStager>) {
let staging_dir = create_temp_dir(prefix);
let path = staging_dir.path().to_path_buf();
(
@@ -68,13 +68,13 @@ async fn test_search(
let file_accessor = Arc::new(MockFileAccessor::new(prefix));
let puffin_manager = FsPuffinManager::new(stager, file_accessor);
let file_name = "fulltext_index".to_string();
let blob_key = "fulltext_index".to_string();
let mut writer = puffin_manager.writer(&file_name).await.unwrap();
create_index(prefix, &mut writer, &blob_key, texts, config).await;
let file_name = "fulltext_index";
let blob_key = "fulltext_index";
let mut writer = puffin_manager.writer(file_name).await.unwrap();
create_index(prefix, &mut writer, blob_key, texts, config).await;
let reader = puffin_manager.reader(&file_name).await.unwrap();
let index_dir = reader.dir(&blob_key).await.unwrap();
let reader = puffin_manager.reader(file_name).await.unwrap();
let index_dir = reader.dir(blob_key).await.unwrap();
let searcher = TantivyFulltextIndexSearcher::new(index_dir.path()).unwrap();
let results = searcher.search(query).await.unwrap();

View File

@@ -112,7 +112,6 @@ impl MetaClientBuilder {
.enable_store()
.enable_heartbeat()
.enable_procedure()
.enable_access_cluster_info()
}
pub fn enable_heartbeat(self) -> Self {

View File

@@ -198,13 +198,13 @@ impl Inner {
}
);
let leader_addr = self
let leader = self
.ask_leader
.as_ref()
.unwrap()
.get_leader()
.context(error::NoLeaderSnafu)?;
let mut leader = self.make_client(&leader_addr)?;
let mut leader = self.make_client(leader)?;
let (sender, receiver) = mpsc::channel::<HeartbeatRequest>(128);
@@ -236,11 +236,7 @@ impl Inner {
.await
.map_err(error::Error::from)?
.context(error::CreateHeartbeatStreamSnafu)?;
info!(
"Success to create heartbeat stream to server: {}, response: {:#?}",
leader_addr, res
);
info!("Success to create heartbeat stream to server: {:#?}", res);
Ok((
HeartbeatSender::new(self.id, self.role, sender),

View File

@@ -44,7 +44,6 @@ use mailbox_handler::MailboxHandler;
use on_leader_start_handler::OnLeaderStartHandler;
use publish_heartbeat_handler::PublishHeartbeatHandler;
use region_lease_handler::RegionLeaseHandler;
use remap_flow_peer_handler::RemapFlowPeerHandler;
use response_header_handler::ResponseHeaderHandler;
use snafu::{OptionExt, ResultExt};
use store_api::storage::RegionId;
@@ -72,7 +71,6 @@ pub mod mailbox_handler;
pub mod on_leader_start_handler;
pub mod publish_heartbeat_handler;
pub mod region_lease_handler;
pub mod remap_flow_peer_handler;
pub mod response_header_handler;
#[async_trait::async_trait]
@@ -575,7 +573,6 @@ impl HeartbeatHandlerGroupBuilder {
self.add_handler_last(publish_heartbeat_handler);
}
self.add_handler_last(CollectStatsHandler::new(self.flush_stats_factor));
self.add_handler_last(RemapFlowPeerHandler::default());
if let Some(flow_state_handler) = self.flow_state_handler.take() {
self.add_handler_last(flow_state_handler);
@@ -856,7 +853,7 @@ mod tests {
.unwrap();
let handlers = group.handlers;
assert_eq!(13, handlers.len());
assert_eq!(12, handlers.len());
let names = [
"ResponseHeaderHandler",
@@ -871,7 +868,6 @@ mod tests {
"MailboxHandler",
"FilterInactiveRegionStatsHandler",
"CollectStatsHandler",
"RemapFlowPeerHandler",
];
for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -892,7 +888,7 @@ mod tests {
let group = builder.build().unwrap();
let handlers = group.handlers;
assert_eq!(14, handlers.len());
assert_eq!(13, handlers.len());
let names = [
"ResponseHeaderHandler",
@@ -908,7 +904,6 @@ mod tests {
"CollectStatsHandler",
"FilterInactiveRegionStatsHandler",
"CollectStatsHandler",
"RemapFlowPeerHandler",
];
for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -926,7 +921,7 @@ mod tests {
let group = builder.build().unwrap();
let handlers = group.handlers;
assert_eq!(14, handlers.len());
assert_eq!(13, handlers.len());
let names = [
"CollectStatsHandler",
@@ -942,7 +937,6 @@ mod tests {
"MailboxHandler",
"FilterInactiveRegionStatsHandler",
"CollectStatsHandler",
"RemapFlowPeerHandler",
];
for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -960,7 +954,7 @@ mod tests {
let group = builder.build().unwrap();
let handlers = group.handlers;
assert_eq!(14, handlers.len());
assert_eq!(13, handlers.len());
let names = [
"ResponseHeaderHandler",
@@ -976,7 +970,6 @@ mod tests {
"CollectStatsHandler",
"FilterInactiveRegionStatsHandler",
"CollectStatsHandler",
"RemapFlowPeerHandler",
];
for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -994,7 +987,7 @@ mod tests {
let group = builder.build().unwrap();
let handlers = group.handlers;
assert_eq!(14, handlers.len());
assert_eq!(13, handlers.len());
let names = [
"ResponseHeaderHandler",
@@ -1010,7 +1003,6 @@ mod tests {
"FilterInactiveRegionStatsHandler",
"CollectStatsHandler",
"ResponseHeaderHandler",
"RemapFlowPeerHandler",
];
for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -1028,7 +1020,7 @@ mod tests {
let group = builder.build().unwrap();
let handlers = group.handlers;
assert_eq!(13, handlers.len());
assert_eq!(12, handlers.len());
let names = [
"ResponseHeaderHandler",
@@ -1043,7 +1035,6 @@ mod tests {
"CollectStatsHandler",
"FilterInactiveRegionStatsHandler",
"CollectStatsHandler",
"RemapFlowPeerHandler",
];
for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -1061,7 +1052,7 @@ mod tests {
let group = builder.build().unwrap();
let handlers = group.handlers;
assert_eq!(13, handlers.len());
assert_eq!(12, handlers.len());
let names = [
"ResponseHeaderHandler",
@@ -1076,7 +1067,6 @@ mod tests {
"MailboxHandler",
"FilterInactiveRegionStatsHandler",
"ResponseHeaderHandler",
"RemapFlowPeerHandler",
];
for (handler, name) in handlers.iter().zip(names.into_iter()) {
@@ -1094,7 +1084,7 @@ mod tests {
let group = builder.build().unwrap();
let handlers = group.handlers;
assert_eq!(13, handlers.len());
assert_eq!(12, handlers.len());
let names = [
"CollectStatsHandler",
@@ -1109,7 +1099,6 @@ mod tests {
"MailboxHandler",
"FilterInactiveRegionStatsHandler",
"CollectStatsHandler",
"RemapFlowPeerHandler",
];
for (handler, name) in handlers.iter().zip(names.into_iter()) {

View File

@@ -23,8 +23,8 @@ pub struct CheckLeaderHandler;
#[async_trait::async_trait]
impl HeartbeatHandler for CheckLeaderHandler {
fn is_acceptable(&self, _role: Role) -> bool {
true
fn is_acceptable(&self, role: Role) -> bool {
role == Role::Datanode
}
async fn handle(

View File

@@ -21,7 +21,7 @@ use common_meta::key::node_address::{NodeAddressKey, NodeAddressValue};
use common_meta::key::{MetadataKey, MetadataValue};
use common_meta::peer::Peer;
use common_meta::rpc::store::PutRequest;
use common_telemetry::{error, info, warn};
use common_telemetry::{error, warn};
use dashmap::DashMap;
use snafu::ResultExt;
@@ -185,10 +185,6 @@ async fn rewrite_node_address(ctx: &mut Context, stat: &Stat) {
match ctx.leader_cached_kv_backend.put(put).await {
Ok(_) => {
info!(
"Successfully updated datanode `NodeAddressValue`: {:?}",
peer
);
// broadcast invalidating cache
let cache_idents = stat
.table_ids()
@@ -204,14 +200,11 @@ async fn rewrite_node_address(ctx: &mut Context, stat: &Stat) {
}
}
Err(e) => {
error!(e; "Failed to update datanode `NodeAddressValue`: {:?}", peer);
error!(e; "Failed to update NodeAddressValue: {:?}", peer);
}
}
} else {
warn!(
"Failed to serialize datanode `NodeAddressValue`: {:?}",
peer
);
warn!("Failed to serialize NodeAddressValue: {:?}", peer);
}
}

View File

@@ -1,92 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use api::v1::meta::{HeartbeatRequest, Peer, Role};
use common_meta::key::node_address::{NodeAddressKey, NodeAddressValue};
use common_meta::key::{MetadataKey, MetadataValue};
use common_meta::rpc::store::PutRequest;
use common_telemetry::{error, info, warn};
use dashmap::DashMap;
use crate::handler::{HandleControl, HeartbeatAccumulator, HeartbeatHandler};
use crate::metasrv::Context;
use crate::Result;
#[derive(Debug, Default)]
pub struct RemapFlowPeerHandler {
/// flow_node_id -> epoch
epoch_cache: DashMap<u64, u64>,
}
#[async_trait::async_trait]
impl HeartbeatHandler for RemapFlowPeerHandler {
fn is_acceptable(&self, role: Role) -> bool {
role == Role::Flownode
}
async fn handle(
&self,
req: &HeartbeatRequest,
ctx: &mut Context,
_acc: &mut HeartbeatAccumulator,
) -> Result<HandleControl> {
let Some(peer) = req.peer.as_ref() else {
return Ok(HandleControl::Continue);
};
let current_epoch = req.node_epoch;
let flow_node_id = peer.id;
let refresh = if let Some(mut epoch) = self.epoch_cache.get_mut(&flow_node_id) {
if current_epoch > *epoch.value() {
*epoch.value_mut() = current_epoch;
true
} else {
false
}
} else {
self.epoch_cache.insert(flow_node_id, current_epoch);
true
};
if refresh {
rewrite_node_address(ctx, peer).await;
}
Ok(HandleControl::Continue)
}
}
async fn rewrite_node_address(ctx: &mut Context, peer: &Peer) {
let key = NodeAddressKey::with_flownode(peer.id).to_bytes();
if let Ok(value) = NodeAddressValue::new(peer.clone().into()).try_as_raw_value() {
let put = PutRequest {
key,
value,
prev_kv: false,
};
match ctx.leader_cached_kv_backend.put(put).await {
Ok(_) => {
info!("Successfully updated flow `NodeAddressValue`: {:?}", peer);
// TODO(discord): broadcast invalidating cache to all frontends
}
Err(e) => {
error!(e; "Failed to update flow `NodeAddressValue`: {:?}", peer);
}
}
} else {
warn!("Failed to serialize flow `NodeAddressValue`: {:?}", peer);
}
}

View File

@@ -17,15 +17,13 @@ use std::time::Duration;
use api::v1::meta::{
procedure_service_server, DdlTaskRequest as PbDdlTaskRequest,
DdlTaskResponse as PbDdlTaskResponse, Error, MigrateRegionRequest, MigrateRegionResponse,
DdlTaskResponse as PbDdlTaskResponse, MigrateRegionRequest, MigrateRegionResponse,
ProcedureDetailRequest, ProcedureDetailResponse, ProcedureStateResponse, QueryProcedureRequest,
ResponseHeader,
};
use common_meta::ddl::ExecutorContext;
use common_meta::rpc::ddl::{DdlTask, SubmitDdlTaskRequest};
use common_meta::rpc::procedure;
use common_telemetry::warn;
use snafu::{OptionExt, ResultExt};
use snafu::{ensure, OptionExt, ResultExt};
use tonic::{Request, Response};
use super::GrpcResult;
@@ -39,16 +37,6 @@ impl procedure_service_server::ProcedureService for Metasrv {
&self,
request: Request<QueryProcedureRequest>,
) -> GrpcResult<ProcedureStateResponse> {
if !self.is_leader() {
let resp = ProcedureStateResponse {
header: Some(ResponseHeader::failed(0, Error::is_not_leader())),
..Default::default()
};
warn!("The current meta is not leader, but a `query procedure state` request have reached the meta. Detail: {:?}.", request);
return Ok(Response::new(resp));
}
let QueryProcedureRequest { header, pid, .. } = request.into_inner();
let _header = header.context(error::MissingRequestHeaderSnafu)?;
let pid = pid.context(error::MissingRequiredParameterSnafu { param: "pid" })?;
@@ -69,16 +57,6 @@ impl procedure_service_server::ProcedureService for Metasrv {
}
async fn ddl(&self, request: Request<PbDdlTaskRequest>) -> GrpcResult<PbDdlTaskResponse> {
if !self.is_leader() {
let resp = PbDdlTaskResponse {
header: Some(ResponseHeader::failed(0, Error::is_not_leader())),
..Default::default()
};
warn!("The current meta is not leader, but a `ddl` request have reached the meta. Detail: {:?}.", request);
return Ok(Response::new(resp));
}
let PbDdlTaskRequest {
header,
query_context,
@@ -121,15 +99,12 @@ impl procedure_service_server::ProcedureService for Metasrv {
&self,
request: Request<MigrateRegionRequest>,
) -> GrpcResult<MigrateRegionResponse> {
if !self.is_leader() {
let resp = MigrateRegionResponse {
header: Some(ResponseHeader::failed(0, Error::is_not_leader())),
..Default::default()
};
warn!("The current meta is not leader, but a `migrate` request have reached the meta. Detail: {:?}.", request);
return Ok(Response::new(resp));
}
ensure!(
self.meta_peer_client().is_leader(),
error::UnexpectedSnafu {
violated: "Trying to submit a region migration procedure to non-leader meta server"
}
);
let MigrateRegionRequest {
header,
@@ -175,16 +150,6 @@ impl procedure_service_server::ProcedureService for Metasrv {
&self,
request: Request<ProcedureDetailRequest>,
) -> GrpcResult<ProcedureDetailResponse> {
if !self.is_leader() {
let resp = ProcedureDetailResponse {
header: Some(ResponseHeader::failed(0, Error::is_not_leader())),
..Default::default()
};
warn!("The current meta is not leader, but a `procedure details` request have reached the meta. Detail: {:?}.", request);
return Ok(Response::new(resp));
}
let ProcedureDetailRequest { header } = request.into_inner();
let _header = header.context(error::MissingRequestHeaderSnafu)?;
let metas = self

View File

@@ -142,7 +142,6 @@ impl DataRegion {
c.column_id = new_column_id_start + delta as u32;
c.column_schema.set_nullable();
match index_options {
IndexOptions::None => {}
IndexOptions::Inverted => {
c.column_schema.set_inverted_index(true);
}

View File

@@ -21,7 +21,7 @@ use api::v1::SemanticType;
use common_telemetry::info;
use common_time::{Timestamp, FOREVER};
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, SkippingIndexOptions};
use datatypes::schema::ColumnSchema;
use datatypes::value::Value;
use mito2::engine::MITO_ENGINE_NAME;
use object_store::util::join_dir;
@@ -55,8 +55,6 @@ use crate::error::{
use crate::metrics::PHYSICAL_REGION_COUNT;
use crate::utils::{self, to_data_region_id, to_metadata_region_id};
const DEFAULT_TABLE_ID_SKIPPING_INDEX_GRANULARITY: u32 = 1024;
impl MetricEngineInner {
pub async fn create_regions(
&self,
@@ -442,7 +440,6 @@ impl MetricEngineInner {
///
/// Return `[table_id_col, tsid_col]`
fn internal_column_metadata() -> [ColumnMetadata; 2] {
// Safety: BloomFilter is a valid skipping index type
let metric_name_col = ColumnMetadata {
column_id: ReservedColumnId::table_id(),
semantic_type: SemanticType::Tag,
@@ -451,11 +448,7 @@ impl MetricEngineInner {
ConcreteDataType::uint32_datatype(),
false,
)
.with_skipping_options(SkippingIndexOptions {
granularity: DEFAULT_TABLE_ID_SKIPPING_INDEX_GRANULARITY,
index_type: datatypes::schema::SkippingIndexType::BloomFilter,
})
.unwrap(),
.with_inverted_index(true),
};
let tsid_col = ColumnMetadata {
column_id: ReservedColumnId::tsid(),

View File

@@ -30,10 +30,9 @@ impl MetricEngineInner {
pub async fn drop_region(
&self,
region_id: RegionId,
req: RegionDropRequest,
_req: RegionDropRequest,
) -> Result<AffectedRows> {
let data_region_id = utils::to_data_region_id(region_id);
let fast_path = req.fast_path;
// enclose the guard in a block to prevent the guard from polluting the async context
let (is_physical_region, is_physical_region_busy) = {
@@ -53,7 +52,7 @@ impl MetricEngineInner {
if is_physical_region {
// check if there is no logical region relates to this physical region
if is_physical_region_busy && !fast_path {
if is_physical_region_busy {
// reject if there is any present logical region
return Err(PhysicalRegionBusySnafu {
region_id: data_region_id,
@@ -61,21 +60,9 @@ impl MetricEngineInner {
.build());
}
return self.drop_physical_region(data_region_id).await;
}
if fast_path {
// for fast path, we don't delete the metadata in the metadata region.
// it only remove the logical region from the engine state.
//
// The drop database procedure will ensure the metadata region and data region are dropped eventually.
self.state
.write()
.unwrap()
.remove_logical_region(region_id)?;
Ok(0)
self.drop_physical_region(data_region_id).await
} else {
// cannot merge these two `if` otherwise the stupid type checker will complain
let metadata_region_id = self
.state
.read()
@@ -100,16 +87,13 @@ impl MetricEngineInner {
// Since the physical regions are going to be dropped, we don't need to
// update the contents in metadata region.
self.mito
.handle_request(
data_region_id,
RegionRequest::Drop(RegionDropRequest { fast_path: false }),
)
.handle_request(data_region_id, RegionRequest::Drop(RegionDropRequest {}))
.await
.with_context(|_| CloseMitoRegionSnafu { region_id })?;
self.mito
.handle_request(
metadata_region_id,
RegionRequest::Drop(RegionDropRequest { fast_path: false }),
RegionRequest::Drop(RegionDropRequest {}),
)
.await
.with_context(|_| CloseMitoRegionSnafu { region_id })?;

View File

@@ -40,7 +40,6 @@ pub struct PhysicalRegionOptions {
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum IndexOptions {
#[default]
None,
Inverted,
Skipping {
granularity: u32,

View File

@@ -146,14 +146,11 @@ impl AccessLayer {
} else {
// Write cache is disabled.
let store = self.object_store.clone();
let path_provider = RegionFilePathFactory::new(self.region_dir.clone());
let indexer_builder = IndexerBuilderImpl {
op_type: request.op_type,
metadata: request.metadata.clone(),
row_group_size: write_opts.row_group_size,
puffin_manager: self
.puffin_manager_factory
.build(store, path_provider.clone()),
puffin_manager: self.puffin_manager_factory.build(store),
intermediate_manager: self.intermediate_manager.clone(),
index_options: request.index_options,
inverted_index_config: request.inverted_index_config,
@@ -164,7 +161,9 @@ impl AccessLayer {
self.object_store.clone(),
request.metadata,
indexer_builder,
path_provider,
RegionFilePathFactory {
region_dir: self.region_dir.clone(),
},
)
.await;
writer
@@ -249,18 +248,8 @@ pub trait FilePathProvider: Send + Sync {
/// Path provider that builds paths in local write cache.
#[derive(Clone)]
pub(crate) struct WriteCachePathProvider {
region_id: RegionId,
file_cache: FileCacheRef,
}
impl WriteCachePathProvider {
/// Creates a new `WriteCachePathProvider` instance.
pub fn new(region_id: RegionId, file_cache: FileCacheRef) -> Self {
Self {
region_id,
file_cache,
}
}
pub(crate) region_id: RegionId,
pub(crate) file_cache: FileCacheRef,
}
impl FilePathProvider for WriteCachePathProvider {
@@ -278,14 +267,7 @@ impl FilePathProvider for WriteCachePathProvider {
/// Path provider that builds paths in region storage path.
#[derive(Clone, Debug)]
pub(crate) struct RegionFilePathFactory {
region_dir: String,
}
impl RegionFilePathFactory {
/// Creates a new `RegionFilePathFactory` instance.
pub fn new(region_dir: String) -> Self {
Self { region_dir }
}
pub(crate) region_dir: String,
}
impl FilePathProvider for RegionFilePathFactory {

View File

@@ -187,12 +187,9 @@ impl FileCache {
}
/// Removes a file from the cache explicitly.
/// It always tries to remove the file from the local store because we may not have the file
/// in the memory index if upload is failed.
pub(crate) async fn remove(&self, key: IndexKey) {
let file_path = self.cache_file_path(key);
self.memory_index.remove(&key).await;
// Always delete the file from the local store.
if let Err(e) = self.local_store.delete(&file_path).await {
warn!(e; "Failed to delete a cached file {}", file_path);
}

View File

@@ -22,7 +22,6 @@ use common_telemetry::{debug, info};
use futures::AsyncWriteExt;
use object_store::ObjectStore;
use snafu::ResultExt;
use store_api::storage::RegionId;
use crate::access_layer::{
new_fs_cache_store, FilePathProvider, RegionFilePathFactory, SstInfoArray, SstWriteRequest,
@@ -115,14 +114,15 @@ impl WriteCache {
let region_id = write_request.metadata.region_id;
let store = self.file_cache.local_store();
let path_provider = WriteCachePathProvider::new(region_id, self.file_cache.clone());
let path_provider = WriteCachePathProvider {
file_cache: self.file_cache.clone(),
region_id,
};
let indexer = IndexerBuilderImpl {
op_type: write_request.op_type,
metadata: write_request.metadata.clone(),
row_group_size: write_opts.row_group_size,
puffin_manager: self
.puffin_manager_factory
.build(store, path_provider.clone()),
puffin_manager: self.puffin_manager_factory.build(store),
intermediate_manager: self.intermediate_manager.clone(),
index_options: write_request.index_options,
inverted_index_config: write_request.inverted_index_config,
@@ -150,41 +150,24 @@ impl WriteCache {
return Ok(sst_info);
}
let mut upload_tracker = UploadTracker::new(region_id);
let mut err = None;
let remote_store = &upload_request.remote_store;
for sst in &sst_info {
let parquet_key = IndexKey::new(region_id, sst.file_id, FileType::Parquet);
let parquet_path = upload_request
.dest_path_provider
.build_sst_file_path(sst.file_id);
if let Err(e) = self.upload(parquet_key, &parquet_path, remote_store).await {
err = Some(e);
break;
}
upload_tracker.push_uploaded_file(parquet_path);
self.upload(parquet_key, &parquet_path, remote_store)
.await?;
if sst.index_metadata.file_size > 0 {
let puffin_key = IndexKey::new(region_id, sst.file_id, FileType::Puffin);
let puffin_path = upload_request
let puffin_path = &upload_request
.dest_path_provider
.build_index_file_path(sst.file_id);
if let Err(e) = self.upload(puffin_key, &puffin_path, remote_store).await {
err = Some(e);
break;
}
upload_tracker.push_uploaded_file(puffin_path);
self.upload(puffin_key, puffin_path, remote_store).await?;
}
}
if let Some(err) = err {
// Cleans files on failure.
upload_tracker
.clean(&sst_info, &self.file_cache, remote_store)
.await;
return Err(err);
}
Ok(sst_info)
}
@@ -350,61 +333,6 @@ pub struct SstUploadRequest {
pub remote_store: ObjectStore,
}
/// A structs to track files to upload and clean them if upload failed.
struct UploadTracker {
/// Id of the region to track.
region_id: RegionId,
/// Paths of files uploaded successfully.
files_uploaded: Vec<String>,
}
impl UploadTracker {
/// Creates a new instance of `UploadTracker` for a given region.
fn new(region_id: RegionId) -> Self {
Self {
region_id,
files_uploaded: Vec::new(),
}
}
/// Add a file path to the list of uploaded files.
fn push_uploaded_file(&mut self, path: String) {
self.files_uploaded.push(path);
}
/// Cleans uploaded files and files in the file cache at best effort.
async fn clean(
&self,
sst_info: &SstInfoArray,
file_cache: &FileCacheRef,
remote_store: &ObjectStore,
) {
common_telemetry::info!(
"Start cleaning files on upload failure, region: {}, num_ssts: {}",
self.region_id,
sst_info.len()
);
// Cleans files in the file cache first.
for sst in sst_info {
let parquet_key = IndexKey::new(self.region_id, sst.file_id, FileType::Parquet);
file_cache.remove(parquet_key).await;
if sst.index_metadata.file_size > 0 {
let puffin_key = IndexKey::new(self.region_id, sst.file_id, FileType::Puffin);
file_cache.remove(puffin_key).await;
}
}
// Cleans uploaded files.
for file_path in &self.files_uploaded {
if let Err(e) = remote_store.delete(file_path).await {
common_telemetry::error!(e; "Failed to delete file {}", file_path);
}
}
}
}
#[cfg(test)]
mod tests {
use common_test_util::temp_dir::create_temp_dir;
@@ -427,7 +355,9 @@ mod tests {
// and now just use local file system to mock.
let mut env = TestEnv::new();
let mock_store = env.init_object_store_manager();
let path_provider = RegionFilePathFactory::new("test".to_string());
let path_provider = RegionFilePathFactory {
region_dir: "test".to_string(),
};
let local_dir = create_temp_dir("");
let local_store = new_fs_store(local_dir.path().to_str().unwrap());
@@ -558,7 +488,9 @@ mod tests {
..Default::default()
};
let upload_request = SstUploadRequest {
dest_path_provider: RegionFilePathFactory::new(data_home.clone()),
dest_path_provider: RegionFilePathFactory {
region_dir: data_home.clone(),
},
remote_store: mock_store.clone(),
};

View File

@@ -56,10 +56,7 @@ async fn test_engine_drop_region() {
// It's okay to drop a region doesn't exist.
engine
.handle_request(
region_id,
RegionRequest::Drop(RegionDropRequest { fast_path: false }),
)
.handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
.await
.unwrap_err();
@@ -89,10 +86,7 @@ async fn test_engine_drop_region() {
// drop the created region.
engine
.handle_request(
region_id,
RegionRequest::Drop(RegionDropRequest { fast_path: false }),
)
.handle_request(region_id, RegionRequest::Drop(RegionDropRequest {}))
.await
.unwrap();
assert!(!engine.is_region_exists(region_id));
@@ -198,10 +192,7 @@ async fn test_engine_drop_region_for_custom_store() {
// Drop the custom region.
engine
.handle_request(
custom_region_id,
RegionRequest::Drop(RegionDropRequest { fast_path: false }),
)
.handle_request(custom_region_id, RegionRequest::Drop(RegionDropRequest {}))
.await
.unwrap();
assert!(!engine.is_region_exists(custom_region_id));

View File

@@ -301,7 +301,10 @@ impl PartitionTreeMemtable {
fn update_stats(&self, metrics: &WriteMetrics) {
// Only let the tracker tracks value bytes.
self.alloc_tracker.on_allocation(metrics.value_bytes);
metrics.update_timestamp_range(&self.max_timestamp, &self.min_timestamp);
self.max_timestamp
.fetch_max(metrics.max_ts, Ordering::SeqCst);
self.min_timestamp
.fetch_min(metrics.min_ts, Ordering::SeqCst);
}
}

View File

@@ -14,8 +14,6 @@
//! Internal metrics of the memtable.
use std::sync::atomic::{AtomicI64, Ordering};
/// Metrics of writing memtables.
pub(crate) struct WriteMetrics {
/// Size allocated by keys.
@@ -28,51 +26,6 @@ pub(crate) struct WriteMetrics {
pub(crate) max_ts: i64,
}
impl WriteMetrics {
/// Update the min/max timestamp range according to current write metric.
pub(crate) fn update_timestamp_range(&self, prev_max_ts: &AtomicI64, prev_min_ts: &AtomicI64) {
loop {
let current_min = prev_min_ts.load(Ordering::Relaxed);
if self.min_ts >= current_min {
break;
}
let Err(updated) = prev_min_ts.compare_exchange(
current_min,
self.min_ts,
Ordering::Relaxed,
Ordering::Relaxed,
) else {
break;
};
if updated == self.min_ts {
break;
}
}
loop {
let current_max = prev_max_ts.load(Ordering::Relaxed);
if self.max_ts <= current_max {
break;
}
let Err(updated) = prev_max_ts.compare_exchange(
current_max,
self.max_ts,
Ordering::Relaxed,
Ordering::Relaxed,
) else {
break;
};
if updated == self.max_ts {
break;
}
}
}
}
impl Default for WriteMetrics {
fn default() -> Self {
Self {

View File

@@ -146,7 +146,8 @@ impl TimeSeriesMemtable {
fn update_stats(&self, stats: WriteMetrics) {
self.alloc_tracker
.on_allocation(stats.key_bytes + stats.value_bytes);
stats.update_timestamp_range(&self.max_timestamp, &self.min_timestamp);
self.max_timestamp.fetch_max(stats.max_ts, Ordering::SeqCst);
self.min_timestamp.fetch_min(stats.min_ts, Ordering::SeqCst);
}
fn write_key_value(&self, kv: KeyValue, stats: &mut WriteMetrics) -> Result<()> {

View File

@@ -32,6 +32,7 @@ use tokio::sync::{mpsc, Semaphore};
use tokio_stream::wrappers::ReceiverStream;
use crate::access_layer::AccessLayerRef;
use crate::cache::file_cache::FileCacheRef;
use crate::cache::CacheStrategy;
use crate::config::DEFAULT_SCAN_CHANNEL_SIZE;
use crate::error::Result;
@@ -310,13 +311,10 @@ impl ScanRegion {
let memtables: Vec<_> = memtables
.into_iter()
.filter(|mem| {
if mem.is_empty() {
// check if memtable is empty by reading stats.
let Some((start, end)) = mem.stats().time_range() else {
return false;
}
let stats = mem.stats();
// Safety: the memtable is not empty.
let (start, end) = stats.time_range().unwrap();
};
// The time range of the memtable is inclusive.
let memtable_range = TimestampRange::new_inclusive(Some(start), Some(end));
memtable_range.intersects(&time_range)
@@ -426,7 +424,12 @@ impl ScanRegion {
return None;
}
let file_cache = self.cache_strategy.write_cache().map(|w| w.file_cache());
let file_cache = || -> Option<FileCacheRef> {
let write_cache = self.cache_strategy.write_cache()?;
let file_cache = write_cache.file_cache();
Some(file_cache)
}();
let inverted_index_cache = self.cache_strategy.inverted_index_cache().cloned();
let puffin_metadata_cache = self.cache_strategy.puffin_metadata_cache().cloned();
@@ -461,8 +464,14 @@ impl ScanRegion {
return None;
}
let file_cache = self.cache_strategy.write_cache().map(|w| w.file_cache());
let file_cache = || -> Option<FileCacheRef> {
let write_cache = self.cache_strategy.write_cache()?;
let file_cache = write_cache.file_cache();
Some(file_cache)
}();
let bloom_filter_index_cache = self.cache_strategy.bloom_filter_index_cache().cloned();
let puffin_metadata_cache = self.cache_strategy.puffin_metadata_cache().cloned();
BloomFilterIndexApplierBuilder::new(
@@ -487,18 +496,12 @@ impl ScanRegion {
return None;
}
let file_cache = self.cache_strategy.write_cache().map(|w| w.file_cache());
let puffin_metadata_cache = self.cache_strategy.puffin_metadata_cache().cloned();
FulltextIndexApplierBuilder::new(
self.access_layer.region_dir().to_string(),
self.version.metadata.region_id,
self.access_layer.object_store().clone(),
self.access_layer.puffin_manager_factory().clone(),
self.version.metadata.as_ref(),
)
.with_file_cache(file_cache)
.with_puffin_metadata_cache(puffin_metadata_cache)
.build(&self.request.filters)
.inspect_err(|err| warn!(err; "Failed to build fulltext index applier"))
.ok()

View File

@@ -35,8 +35,8 @@ use store_api::metadata::{ColumnMetadata, RegionMetadata, RegionMetadataRef};
use store_api::region_engine::{SetRegionRoleStateResponse, SettableRegionRoleState};
use store_api::region_request::{
AffectedRows, RegionAlterRequest, RegionCatchupRequest, RegionCloseRequest,
RegionCompactRequest, RegionCreateRequest, RegionFlushRequest, RegionOpenRequest,
RegionRequest, RegionTruncateRequest,
RegionCompactRequest, RegionCreateRequest, RegionDropRequest, RegionFlushRequest,
RegionOpenRequest, RegionRequest, RegionTruncateRequest,
};
use store_api::storage::{RegionId, SequenceNumber};
use tokio::sync::oneshot::{self, Receiver, Sender};
@@ -624,10 +624,10 @@ impl WorkerRequest {
sender: sender.into(),
request: DdlRequest::Create(v),
}),
RegionRequest::Drop(_) => WorkerRequest::Ddl(SenderDdlRequest {
RegionRequest::Drop(v) => WorkerRequest::Ddl(SenderDdlRequest {
region_id,
sender: sender.into(),
request: DdlRequest::Drop,
request: DdlRequest::Drop(v),
}),
RegionRequest::Open(v) => WorkerRequest::Ddl(SenderDdlRequest {
region_id,
@@ -690,7 +690,7 @@ impl WorkerRequest {
#[derive(Debug)]
pub(crate) enum DdlRequest {
Create(RegionCreateRequest),
Drop,
Drop(RegionDropRequest),
Open((RegionOpenRequest, Option<WalEntryReceiver>)),
Close(RegionCloseRequest),
Alter(RegionAlterRequest),

View File

@@ -174,8 +174,31 @@ impl FileMeta {
.contains(&IndexType::BloomFilterIndex)
}
pub fn index_file_size(&self) -> u64 {
self.index_file_size
/// Returns the size of the inverted index file
pub fn inverted_index_size(&self) -> Option<u64> {
if self.available_indexes.len() == 1 && self.inverted_index_available() {
Some(self.index_file_size)
} else {
None
}
}
/// Returns the size of the fulltext index file
pub fn fulltext_index_size(&self) -> Option<u64> {
if self.available_indexes.len() == 1 && self.fulltext_index_available() {
Some(self.index_file_size)
} else {
None
}
}
/// Returns the size of the bloom filter index file
pub fn bloom_filter_index_size(&self) -> Option<u64> {
if self.available_indexes.len() == 1 && self.bloom_filter_index_available() {
Some(self.index_file_size)
} else {
None
}
}
}

View File

@@ -113,9 +113,11 @@ impl FilePurger for LocalFilePurger {
}
// Purges index content in the stager.
let puffin_file_name =
crate::sst::location::index_file_path(sst_layer.region_dir(), file_meta.file_id);
if let Err(e) = sst_layer
.puffin_manager_factory()
.purge_stager(file_meta.file_id)
.purge_stager(&puffin_file_name)
.await
{
error!(e; "Failed to purge stager with index file, file_id: {}, region: {}",

View File

@@ -103,6 +103,7 @@ pub type BloomFilterOutput = IndexBaseOutput;
#[derive(Default)]
pub struct Indexer {
file_id: FileId,
file_path: String,
region_id: RegionId,
puffin_manager: Option<SstPuffinManager>,
inverted_indexer: Option<InvertedIndexer>,
@@ -169,7 +170,7 @@ impl Indexer {
#[async_trait::async_trait]
pub trait IndexerBuilder {
/// Builds indexer of given file id to [index_file_path].
async fn build(&self, file_id: FileId) -> Indexer;
async fn build(&self, file_id: FileId, index_file_path: String) -> Indexer;
}
pub(crate) struct IndexerBuilderImpl {
@@ -187,9 +188,10 @@ pub(crate) struct IndexerBuilderImpl {
#[async_trait::async_trait]
impl IndexerBuilder for IndexerBuilderImpl {
/// Sanity check for arguments and create a new [Indexer] if arguments are valid.
async fn build(&self, file_id: FileId) -> Indexer {
async fn build(&self, file_id: FileId, index_file_path: String) -> Indexer {
let mut indexer = Indexer {
file_id,
file_path: index_file_path,
region_id: self.metadata.region_id,
..Default::default()
};
@@ -390,31 +392,30 @@ mod tests {
use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder};
use super::*;
use crate::access_layer::FilePathProvider;
use crate::config::{FulltextIndexConfig, Mode};
struct MetaConfig {
with_inverted: bool,
with_tag: bool,
with_fulltext: bool,
with_skipping_bloom: bool,
}
fn mock_region_metadata(
MetaConfig {
with_inverted,
with_tag,
with_fulltext,
with_skipping_bloom,
}: MetaConfig,
) -> RegionMetadataRef {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 2));
let mut column_schema = ColumnSchema::new("a", ConcreteDataType::int64_datatype(), false);
if with_inverted {
column_schema = column_schema.with_inverted_index(true);
}
builder
.push_column_metadata(ColumnMetadata {
column_schema,
semantic_type: SemanticType::Field,
column_schema: ColumnSchema::new("a", ConcreteDataType::int64_datatype(), false),
semantic_type: if with_tag {
SemanticType::Tag
} else {
SemanticType::Field
},
column_id: 1,
})
.push_column_metadata(ColumnMetadata {
@@ -432,6 +433,10 @@ mod tests {
column_id: 3,
});
if with_tag {
builder.primary_key(vec![1]);
}
if with_fulltext {
let column_schema =
ColumnSchema::new("text", ConcreteDataType::string_datatype(), true)
@@ -479,18 +484,6 @@ mod tests {
IntermediateManager::init_fs(path).await.unwrap()
}
struct NoopPathProvider;
impl FilePathProvider for NoopPathProvider {
fn build_index_file_path(&self, _file_id: FileId) -> String {
unreachable!()
}
fn build_sst_file_path(&self, _file_id: FileId) -> String {
unreachable!()
}
}
#[tokio::test]
async fn test_build_indexer_basic() {
let (dir, factory) =
@@ -498,7 +491,7 @@ mod tests {
let intm_manager = mock_intm_mgr(dir.path().to_string_lossy()).await;
let metadata = mock_region_metadata(MetaConfig {
with_inverted: true,
with_tag: true,
with_fulltext: true,
with_skipping_bloom: true,
});
@@ -506,14 +499,14 @@ mod tests {
op_type: OperationType::Flush,
metadata,
row_group_size: 1024,
puffin_manager: factory.build(mock_object_store(), NoopPathProvider),
puffin_manager: factory.build(mock_object_store()),
intermediate_manager: intm_manager,
index_options: IndexOptions::default(),
inverted_index_config: InvertedIndexConfig::default(),
fulltext_index_config: FulltextIndexConfig::default(),
bloom_filter_index_config: BloomFilterConfig::default(),
}
.build(FileId::random())
.build(FileId::random(), "test".to_string())
.await;
assert!(indexer.inverted_indexer.is_some());
@@ -528,7 +521,7 @@ mod tests {
let intm_manager = mock_intm_mgr(dir.path().to_string_lossy()).await;
let metadata = mock_region_metadata(MetaConfig {
with_inverted: true,
with_tag: true,
with_fulltext: true,
with_skipping_bloom: true,
});
@@ -536,7 +529,7 @@ mod tests {
op_type: OperationType::Flush,
metadata: metadata.clone(),
row_group_size: 1024,
puffin_manager: factory.build(mock_object_store(), NoopPathProvider),
puffin_manager: factory.build(mock_object_store()),
intermediate_manager: intm_manager.clone(),
index_options: IndexOptions::default(),
inverted_index_config: InvertedIndexConfig {
@@ -546,7 +539,7 @@ mod tests {
fulltext_index_config: FulltextIndexConfig::default(),
bloom_filter_index_config: BloomFilterConfig::default(),
}
.build(FileId::random())
.build(FileId::random(), "test".to_string())
.await;
assert!(indexer.inverted_indexer.is_none());
@@ -557,7 +550,7 @@ mod tests {
op_type: OperationType::Compact,
metadata: metadata.clone(),
row_group_size: 1024,
puffin_manager: factory.build(mock_object_store(), NoopPathProvider),
puffin_manager: factory.build(mock_object_store()),
intermediate_manager: intm_manager.clone(),
index_options: IndexOptions::default(),
inverted_index_config: InvertedIndexConfig::default(),
@@ -567,7 +560,7 @@ mod tests {
},
bloom_filter_index_config: BloomFilterConfig::default(),
}
.build(FileId::random())
.build(FileId::random(), "test".to_string())
.await;
assert!(indexer.inverted_indexer.is_some());
@@ -578,7 +571,7 @@ mod tests {
op_type: OperationType::Compact,
metadata,
row_group_size: 1024,
puffin_manager: factory.build(mock_object_store(), NoopPathProvider),
puffin_manager: factory.build(mock_object_store()),
intermediate_manager: intm_manager,
index_options: IndexOptions::default(),
inverted_index_config: InvertedIndexConfig::default(),
@@ -588,7 +581,7 @@ mod tests {
..Default::default()
},
}
.build(FileId::random())
.build(FileId::random(), "test".to_string())
.await;
assert!(indexer.inverted_indexer.is_some());
@@ -603,7 +596,7 @@ mod tests {
let intm_manager = mock_intm_mgr(dir.path().to_string_lossy()).await;
let metadata = mock_region_metadata(MetaConfig {
with_inverted: false,
with_tag: false,
with_fulltext: true,
with_skipping_bloom: true,
});
@@ -611,14 +604,14 @@ mod tests {
op_type: OperationType::Flush,
metadata: metadata.clone(),
row_group_size: 1024,
puffin_manager: factory.build(mock_object_store(), NoopPathProvider),
puffin_manager: factory.build(mock_object_store()),
intermediate_manager: intm_manager.clone(),
index_options: IndexOptions::default(),
inverted_index_config: InvertedIndexConfig::default(),
fulltext_index_config: FulltextIndexConfig::default(),
bloom_filter_index_config: BloomFilterConfig::default(),
}
.build(FileId::random())
.build(FileId::random(), "test".to_string())
.await;
assert!(indexer.inverted_indexer.is_none());
@@ -626,7 +619,7 @@ mod tests {
assert!(indexer.bloom_filter_indexer.is_some());
let metadata = mock_region_metadata(MetaConfig {
with_inverted: true,
with_tag: true,
with_fulltext: false,
with_skipping_bloom: true,
});
@@ -634,14 +627,14 @@ mod tests {
op_type: OperationType::Flush,
metadata: metadata.clone(),
row_group_size: 1024,
puffin_manager: factory.build(mock_object_store(), NoopPathProvider),
puffin_manager: factory.build(mock_object_store()),
intermediate_manager: intm_manager.clone(),
index_options: IndexOptions::default(),
inverted_index_config: InvertedIndexConfig::default(),
fulltext_index_config: FulltextIndexConfig::default(),
bloom_filter_index_config: BloomFilterConfig::default(),
}
.build(FileId::random())
.build(FileId::random(), "test".to_string())
.await;
assert!(indexer.inverted_indexer.is_some());
@@ -649,7 +642,7 @@ mod tests {
assert!(indexer.bloom_filter_indexer.is_some());
let metadata = mock_region_metadata(MetaConfig {
with_inverted: true,
with_tag: true,
with_fulltext: true,
with_skipping_bloom: false,
});
@@ -657,14 +650,14 @@ mod tests {
op_type: OperationType::Flush,
metadata: metadata.clone(),
row_group_size: 1024,
puffin_manager: factory.build(mock_object_store(), NoopPathProvider),
puffin_manager: factory.build(mock_object_store()),
intermediate_manager: intm_manager,
index_options: IndexOptions::default(),
inverted_index_config: InvertedIndexConfig::default(),
fulltext_index_config: FulltextIndexConfig::default(),
bloom_filter_index_config: BloomFilterConfig::default(),
}
.build(FileId::random())
.build(FileId::random(), "test".to_string())
.await;
assert!(indexer.inverted_indexer.is_some());
@@ -679,7 +672,7 @@ mod tests {
let intm_manager = mock_intm_mgr(dir.path().to_string_lossy()).await;
let metadata = mock_region_metadata(MetaConfig {
with_inverted: true,
with_tag: true,
with_fulltext: true,
with_skipping_bloom: true,
});
@@ -687,14 +680,14 @@ mod tests {
op_type: OperationType::Flush,
metadata,
row_group_size: 0,
puffin_manager: factory.build(mock_object_store(), NoopPathProvider),
puffin_manager: factory.build(mock_object_store()),
intermediate_manager: intm_manager,
index_options: IndexOptions::default(),
inverted_index_config: InvertedIndexConfig::default(),
fulltext_index_config: FulltextIndexConfig::default(),
bloom_filter_index_config: BloomFilterConfig::default(),
}
.build(FileId::random())
.build(FileId::random(), "test".to_string())
.await;
assert!(indexer.inverted_indexer.is_none());

View File

@@ -28,7 +28,6 @@ use puffin::puffin_manager::{BlobGuard, PuffinManager, PuffinReader};
use snafu::ResultExt;
use store_api::storage::{ColumnId, RegionId};
use crate::access_layer::{RegionFilePathFactory, WriteCachePathProvider};
use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
use crate::cache::index::bloom_filter_index::{
BloomFilterIndexCacheRef, CachedBloomFilterIndexBlobReader,
@@ -44,6 +43,7 @@ use crate::sst::index::bloom_filter::applier::builder::Predicate;
use crate::sst::index::bloom_filter::INDEX_BLOB_TYPE;
use crate::sst::index::puffin_manager::{BlobReader, PuffinManagerFactory};
use crate::sst::index::TYPE_BLOOM_FILTER_INDEX;
use crate::sst::location;
pub(crate) type BloomFilterIndexApplierRef = Arc<BloomFilterIndexApplier>;
@@ -247,12 +247,11 @@ impl BloomFilterIndexApplier {
return Ok(None);
};
let puffin_manager = self.puffin_manager_factory.build(
file_cache.local_store(),
WriteCachePathProvider::new(self.region_id, file_cache.clone()),
);
let puffin_manager = self.puffin_manager_factory.build(file_cache.local_store());
let puffin_file_name = file_cache.cache_file_path(index_key);
let reader = puffin_manager
.reader(&file_id)
.reader(&puffin_file_name)
.await
.context(PuffinBuildReaderSnafu)?
.with_file_size_hint(file_size_hint)
@@ -279,14 +278,12 @@ impl BloomFilterIndexApplier {
) -> Result<BlobReader> {
let puffin_manager = self
.puffin_manager_factory
.build(
self.object_store.clone(),
RegionFilePathFactory::new(self.region_dir.clone()),
)
.build(self.object_store.clone())
.with_puffin_metadata_cache(self.puffin_metadata_cache.clone());
let file_path = location::index_file_path(&self.region_dir, file_id);
puffin_manager
.reader(&file_id)
.reader(&file_path)
.await
.context(PuffinBuildReaderSnafu)?
.with_file_size_hint(file_size_hint)
@@ -450,6 +447,7 @@ mod tests {
let memory_usage_threshold = Some(1024);
let file_id = FileId::random();
let region_dir = "region_dir".to_string();
let path = location::index_file_path(&region_dir, file_id);
let mut indexer =
BloomFilterIndexer::new(file_id, &region_metadata, intm_mgr, memory_usage_threshold)
@@ -462,12 +460,9 @@ mod tests {
let mut batch = new_batch("tag2", 10..20);
indexer.update(&mut batch).await.unwrap();
let puffin_manager = factory.build(
object_store.clone(),
RegionFilePathFactory::new(region_dir.clone()),
);
let puffin_manager = factory.build(object_store.clone());
let mut puffin_writer = puffin_manager.writer(&file_id).await.unwrap();
let mut puffin_writer = puffin_manager.writer(&path).await.unwrap();
indexer.finish(&mut puffin_writer).await.unwrap();
puffin_writer.finish().await.unwrap();

View File

@@ -356,7 +356,6 @@ pub(crate) mod tests {
use store_api::storage::RegionId;
use super::*;
use crate::access_layer::FilePathProvider;
use crate::read::BatchColumn;
use crate::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodecExt};
use crate::sst::index::puffin_manager::PuffinManagerFactory;
@@ -369,18 +368,6 @@ pub(crate) mod tests {
IntermediateManager::init_fs(path).await.unwrap()
}
pub struct TestPathProvider;
impl FilePathProvider for TestPathProvider {
fn build_index_file_path(&self, file_id: FileId) -> String {
file_id.to_string()
}
fn build_sst_file_path(&self, file_id: FileId) -> String {
file_id.to_string()
}
}
/// tag_str:
/// - type: string
/// - index: bloom filter
@@ -496,16 +483,16 @@ pub(crate) mod tests {
indexer.update(&mut batch).await.unwrap();
let (_d, factory) = PuffinManagerFactory::new_for_test_async(prefix).await;
let puffin_manager = factory.build(object_store, TestPathProvider);
let puffin_manager = factory.build(object_store);
let file_id = FileId::random();
let mut puffin_writer = puffin_manager.writer(&file_id).await.unwrap();
let index_file_name = "index_file";
let mut puffin_writer = puffin_manager.writer(index_file_name).await.unwrap();
let (row_count, byte_count) = indexer.finish(&mut puffin_writer).await.unwrap();
assert_eq!(row_count, 20);
assert!(byte_count > 0);
puffin_writer.finish().await.unwrap();
let puffin_reader = puffin_manager.reader(&file_id).await.unwrap();
let puffin_reader = puffin_manager.reader(index_file_name).await.unwrap();
// tag_str
{

View File

@@ -15,22 +15,19 @@
use std::collections::BTreeSet;
use std::sync::Arc;
use common_telemetry::warn;
use index::fulltext_index::search::{FulltextIndexSearcher, RowId, TantivyFulltextIndexSearcher};
use object_store::ObjectStore;
use puffin::puffin_manager::cache::PuffinMetadataCacheRef;
use puffin::puffin_manager::{DirGuard, PuffinManager, PuffinReader};
use snafu::ResultExt;
use store_api::storage::{ColumnId, RegionId};
use store_api::storage::ColumnId;
use crate::access_layer::{RegionFilePathFactory, WriteCachePathProvider};
use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
use crate::error::{ApplyFulltextIndexSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result};
use crate::metrics::INDEX_APPLY_ELAPSED;
use crate::sst::file::FileId;
use crate::sst::index::fulltext_index::INDEX_BLOB_TYPE_TANTIVY;
use crate::sst::index::puffin_manager::{PuffinManagerFactory, SstPuffinDir};
use crate::sst::index::TYPE_FULLTEXT_INDEX;
use crate::sst::location;
pub mod builder;
@@ -39,9 +36,6 @@ pub struct FulltextIndexApplier {
/// The root directory of the region.
region_dir: String,
/// The region ID.
region_id: RegionId,
/// Queries to apply to the index.
queries: Vec<(ColumnId, String)>,
@@ -50,12 +44,6 @@ pub struct FulltextIndexApplier {
/// Store responsible for accessing index files.
store: ObjectStore,
/// File cache to be used by the `FulltextIndexApplier`.
file_cache: Option<FileCacheRef>,
/// The puffin metadata cache.
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
}
pub type FulltextIndexApplierRef = Arc<FulltextIndexApplier>;
@@ -64,43 +52,20 @@ impl FulltextIndexApplier {
/// Creates a new `FulltextIndexApplier`.
pub fn new(
region_dir: String,
region_id: RegionId,
store: ObjectStore,
queries: Vec<(ColumnId, String)>,
puffin_manager_factory: PuffinManagerFactory,
) -> Self {
Self {
region_dir,
region_id,
store,
queries,
puffin_manager_factory,
file_cache: None,
puffin_metadata_cache: None,
}
}
/// Sets the file cache.
pub fn with_file_cache(mut self, file_cache: Option<FileCacheRef>) -> Self {
self.file_cache = file_cache;
self
}
/// Sets the puffin metadata cache.
pub fn with_puffin_metadata_cache(
mut self,
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
) -> Self {
self.puffin_metadata_cache = puffin_metadata_cache;
self
}
/// Applies the queries to the fulltext index of the specified SST file.
pub async fn apply(
&self,
file_id: FileId,
file_size_hint: Option<u64>,
) -> Result<BTreeSet<RowId>> {
pub async fn apply(&self, file_id: FileId) -> Result<BTreeSet<RowId>> {
let _timer = INDEX_APPLY_ELAPSED
.with_label_values(&[TYPE_FULLTEXT_INDEX])
.start_timer();
@@ -109,9 +74,7 @@ impl FulltextIndexApplier {
let mut row_ids = BTreeSet::new();
for (column_id, query) in &self.queries {
let dir = self
.index_dir_path(file_id, *column_id, file_size_hint)
.await?;
let dir = self.index_dir_path(file_id, *column_id).await?;
let path = match &dir {
Some(dir) => dir.path(),
None => {
@@ -147,74 +110,15 @@ impl FulltextIndexApplier {
&self,
file_id: FileId,
column_id: ColumnId,
file_size_hint: Option<u64>,
) -> Result<Option<SstPuffinDir>> {
let blob_key = format!("{INDEX_BLOB_TYPE_TANTIVY}-{column_id}");
let puffin_manager = self.puffin_manager_factory.build(self.store.clone());
let file_path = location::index_file_path(&self.region_dir, file_id);
// FAST PATH: Try to read the index from the file cache.
if let Some(file_cache) = &self.file_cache {
let index_key = IndexKey::new(self.region_id, file_id, FileType::Puffin);
if file_cache.get(index_key).await.is_some() {
match self
.get_index_from_file_cache(file_cache, file_id, file_size_hint, &blob_key)
.await
{
Ok(dir) => return Ok(dir),
Err(err) => {
warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.")
}
}
}
}
// SLOW PATH: Try to read the index from the remote file.
self.get_index_from_remote_file(file_id, file_size_hint, &blob_key)
.await
}
async fn get_index_from_file_cache(
&self,
file_cache: &FileCacheRef,
file_id: FileId,
file_size_hint: Option<u64>,
blob_key: &str,
) -> Result<Option<SstPuffinDir>> {
match self
.puffin_manager_factory
.build(
file_cache.local_store(),
WriteCachePathProvider::new(self.region_id, file_cache.clone()),
)
.reader(&file_id)
match puffin_manager
.reader(&file_path)
.await
.context(PuffinBuildReaderSnafu)?
.with_file_size_hint(file_size_hint)
.dir(blob_key)
.await
{
Ok(dir) => Ok(Some(dir)),
Err(puffin::error::Error::BlobNotFound { .. }) => Ok(None),
Err(err) => Err(err).context(PuffinReadBlobSnafu),
}
}
async fn get_index_from_remote_file(
&self,
file_id: FileId,
file_size_hint: Option<u64>,
blob_key: &str,
) -> Result<Option<SstPuffinDir>> {
match self
.puffin_manager_factory
.build(
self.store.clone(),
RegionFilePathFactory::new(self.region_dir.clone()),
)
.reader(&file_id)
.await
.context(PuffinBuildReaderSnafu)?
.with_file_size_hint(file_size_hint)
.dir(blob_key)
.dir(&format!("{INDEX_BLOB_TYPE_TANTIVY}-{column_id}"))
.await
{
Ok(dir) => Ok(Some(dir)),

View File

@@ -15,11 +15,9 @@
use datafusion_common::ScalarValue;
use datafusion_expr::Expr;
use object_store::ObjectStore;
use puffin::puffin_manager::cache::PuffinMetadataCacheRef;
use store_api::metadata::RegionMetadata;
use store_api::storage::{ColumnId, ConcreteDataType, RegionId};
use store_api::storage::{ColumnId, ConcreteDataType};
use crate::cache::file_cache::FileCacheRef;
use crate::error::Result;
use crate::sst::index::fulltext_index::applier::FulltextIndexApplier;
use crate::sst::index::puffin_manager::PuffinManagerFactory;
@@ -27,49 +25,27 @@ use crate::sst::index::puffin_manager::PuffinManagerFactory;
/// `FulltextIndexApplierBuilder` is a builder for `FulltextIndexApplier`.
pub struct FulltextIndexApplierBuilder<'a> {
region_dir: String,
region_id: RegionId,
store: ObjectStore,
puffin_manager_factory: PuffinManagerFactory,
metadata: &'a RegionMetadata,
file_cache: Option<FileCacheRef>,
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
}
impl<'a> FulltextIndexApplierBuilder<'a> {
/// Creates a new `FulltextIndexApplierBuilder`.
pub fn new(
region_dir: String,
region_id: RegionId,
store: ObjectStore,
puffin_manager_factory: PuffinManagerFactory,
metadata: &'a RegionMetadata,
) -> Self {
Self {
region_dir,
region_id,
store,
puffin_manager_factory,
metadata,
file_cache: None,
puffin_metadata_cache: None,
}
}
/// Sets the file cache to be used by the `FulltextIndexApplier`.
pub fn with_file_cache(mut self, file_cache: Option<FileCacheRef>) -> Self {
self.file_cache = file_cache;
self
}
/// Sets the puffin metadata cache to be used by the `FulltextIndexApplier`.
pub fn with_puffin_metadata_cache(
mut self,
puffin_metadata_cache: Option<PuffinMetadataCacheRef>,
) -> Self {
self.puffin_metadata_cache = puffin_metadata_cache;
self
}
/// Builds `SstIndexApplier` from the given expressions.
pub fn build(self, exprs: &[Expr]) -> Result<Option<FulltextIndexApplier>> {
let mut queries = Vec::with_capacity(exprs.len());
@@ -82,13 +58,10 @@ impl<'a> FulltextIndexApplierBuilder<'a> {
Ok((!queries.is_empty()).then(|| {
FulltextIndexApplier::new(
self.region_dir,
self.region_id,
self.store,
queries,
self.puffin_manager_factory,
)
.with_file_cache(self.file_cache)
.with_puffin_metadata_cache(self.puffin_metadata_cache)
}))
}

View File

@@ -350,11 +350,11 @@ mod tests {
use store_api::storage::{ConcreteDataType, RegionId};
use super::*;
use crate::access_layer::RegionFilePathFactory;
use crate::read::{Batch, BatchColumn};
use crate::sst::file::FileId;
use crate::sst::index::fulltext_index::applier::FulltextIndexApplier;
use crate::sst::index::puffin_manager::PuffinManagerFactory;
use crate::sst::location;
fn mock_object_store() -> ObjectStore {
ObjectStore::new(Memory::default()).unwrap().finish()
@@ -494,6 +494,7 @@ mod tests {
let (d, factory) = PuffinManagerFactory::new_for_test_async(prefix).await;
let region_dir = "region0".to_string();
let sst_file_id = FileId::random();
let file_path = location::index_file_path(&region_dir, sst_file_id);
let object_store = mock_object_store();
let region_metadata = mock_region_metadata();
let intm_mgr = new_intm_mgr(d.path().to_string_lossy()).await;
@@ -513,11 +514,8 @@ mod tests {
let mut batch = new_batch(rows);
indexer.update(&mut batch).await.unwrap();
let puffin_manager = factory.build(
object_store.clone(),
RegionFilePathFactory::new(region_dir.clone()),
);
let mut writer = puffin_manager.writer(&sst_file_id).await.unwrap();
let puffin_manager = factory.build(object_store.clone());
let mut writer = puffin_manager.writer(&file_path).await.unwrap();
let _ = indexer.finish(&mut writer).await.unwrap();
writer.finish().await.unwrap();
@@ -525,7 +523,6 @@ mod tests {
let _d = &d;
let applier = FulltextIndexApplier::new(
region_dir.clone(),
region_metadata.region_id,
object_store.clone(),
queries
.into_iter()
@@ -534,7 +531,7 @@ mod tests {
factory.clone(),
);
async move { applier.apply(sst_file_id, None).await.unwrap() }.boxed()
async move { applier.apply(sst_file_id).await.unwrap() }.boxed()
}
}

View File

@@ -62,7 +62,7 @@ impl Indexer {
async fn build_puffin_writer(&mut self) -> Option<SstPuffinWriter> {
let puffin_manager = self.puffin_manager.take()?;
let err = match puffin_manager.writer(&self.file_id).await {
let err = match puffin_manager.writer(&self.file_path).await {
Ok(writer) => return Some(writer),
Err(err) => err,
};

View File

@@ -28,7 +28,6 @@ use puffin::puffin_manager::{BlobGuard, PuffinManager, PuffinReader};
use snafu::ResultExt;
use store_api::storage::RegionId;
use crate::access_layer::{RegionFilePathFactory, WriteCachePathProvider};
use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey};
use crate::cache::index::inverted_index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef};
use crate::error::{
@@ -39,6 +38,7 @@ use crate::sst::file::FileId;
use crate::sst::index::inverted_index::INDEX_BLOB_TYPE;
use crate::sst::index::puffin_manager::{BlobReader, PuffinManagerFactory};
use crate::sst::index::TYPE_INVERTED_INDEX;
use crate::sst::location;
/// `InvertedIndexApplier` is responsible for applying predicates to the provided SST files
/// and returning the relevant row group ids for further scan.
@@ -172,14 +172,12 @@ impl InvertedIndexApplier {
return Ok(None);
};
let puffin_manager = self.puffin_manager_factory.build(
file_cache.local_store(),
WriteCachePathProvider::new(self.region_id, file_cache.clone()),
);
let puffin_manager = self.puffin_manager_factory.build(file_cache.local_store());
let puffin_file_name = file_cache.cache_file_path(index_key);
// Adds file size hint to the puffin reader to avoid extra metadata read.
let reader = puffin_manager
.reader(&file_id)
.reader(&puffin_file_name)
.await
.context(PuffinBuildReaderSnafu)?
.with_file_size_hint(file_size_hint)
@@ -200,14 +198,12 @@ impl InvertedIndexApplier {
) -> Result<BlobReader> {
let puffin_manager = self
.puffin_manager_factory
.build(
self.store.clone(),
RegionFilePathFactory::new(self.region_dir.clone()),
)
.build(self.store.clone())
.with_puffin_metadata_cache(self.puffin_metadata_cache.clone());
let file_path = location::index_file_path(&self.region_dir, file_id);
puffin_manager
.reader(&file_id)
.reader(&file_path)
.await
.context(PuffinBuildReaderSnafu)?
.with_file_size_hint(file_size_hint)
@@ -243,12 +239,10 @@ mod tests {
let object_store = ObjectStore::new(Memory::default()).unwrap().finish();
let file_id = FileId::random();
let region_dir = "region_dir".to_string();
let path = location::index_file_path(&region_dir, file_id);
let puffin_manager = puffin_manager_factory.build(
object_store.clone(),
RegionFilePathFactory::new(region_dir.clone()),
);
let mut writer = puffin_manager.writer(&file_id).await.unwrap();
let puffin_manager = puffin_manager_factory.build(object_store.clone());
let mut writer = puffin_manager.writer(&path).await.unwrap();
writer
.put_blob(INDEX_BLOB_TYPE, Cursor::new(vec![]), Default::default())
.await
@@ -291,12 +285,10 @@ mod tests {
let object_store = ObjectStore::new(Memory::default()).unwrap().finish();
let file_id = FileId::random();
let region_dir = "region_dir".to_string();
let path = location::index_file_path(&region_dir, file_id);
let puffin_manager = puffin_manager_factory.build(
object_store.clone(),
RegionFilePathFactory::new(region_dir.clone()),
);
let mut writer = puffin_manager.writer(&file_id).await.unwrap();
let puffin_manager = puffin_manager_factory.build(object_store.clone());
let mut writer = puffin_manager.writer(&path).await.unwrap();
writer
.put_blob("invalid_blob_type", Cursor::new(vec![]), Default::default())
.await

View File

@@ -336,13 +336,13 @@ mod tests {
use store_api::storage::RegionId;
use super::*;
use crate::access_layer::RegionFilePathFactory;
use crate::cache::index::inverted_index::InvertedIndexCache;
use crate::metrics::CACHE_BYTES;
use crate::read::BatchColumn;
use crate::row_converter::{DensePrimaryKeyCodec, PrimaryKeyCodecExt};
use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder;
use crate::sst::index::puffin_manager::PuffinManagerFactory;
use crate::sst::location;
fn mock_object_store() -> ObjectStore {
ObjectStore::new(Memory::default()).unwrap().finish()
@@ -438,6 +438,7 @@ mod tests {
let (d, factory) = PuffinManagerFactory::new_for_test_async(prefix).await;
let region_dir = "region0".to_string();
let sst_file_id = FileId::random();
let file_path = location::index_file_path(&region_dir, sst_file_id);
let object_store = mock_object_store();
let region_metadata = mock_region_metadata();
let intm_mgr = new_intm_mgr(d.path().to_string_lossy()).await;
@@ -459,11 +460,8 @@ mod tests {
creator.update(&mut batch).await.unwrap();
}
let puffin_manager = factory.build(
object_store.clone(),
RegionFilePathFactory::new(region_dir.clone()),
);
let mut writer = puffin_manager.writer(&sst_file_id).await.unwrap();
let puffin_manager = factory.build(object_store.clone());
let mut writer = puffin_manager.writer(&file_path).await.unwrap();
let (row_count, _) = creator.finish(&mut writer).await.unwrap();
assert_eq!(row_count, rows.len() * segment_row_count);
writer.finish().await.unwrap();

View File

@@ -26,20 +26,18 @@ use puffin::puffin_manager::stager::{BoundedStager, Stager};
use puffin::puffin_manager::{BlobGuard, PuffinManager, PuffinReader};
use snafu::ResultExt;
use crate::access_layer::FilePathProvider;
use crate::error::{PuffinInitStagerSnafu, PuffinPurgeStagerSnafu, Result};
use crate::metrics::{
StagerMetrics, INDEX_PUFFIN_FLUSH_OP_TOTAL, INDEX_PUFFIN_READ_BYTES_TOTAL,
INDEX_PUFFIN_READ_OP_TOTAL, INDEX_PUFFIN_WRITE_BYTES_TOTAL, INDEX_PUFFIN_WRITE_OP_TOTAL,
};
use crate::sst::file::FileId;
use crate::sst::index::store::{self, InstrumentedStore};
type InstrumentedRangeReader = store::InstrumentedRangeReader<'static>;
type InstrumentedAsyncWrite = store::InstrumentedAsyncWrite<'static, FuturesAsyncWriter>;
pub(crate) type SstPuffinManager =
FsPuffinManager<Arc<BoundedStager<FileId>>, ObjectStorePuffinFileAccessor>;
FsPuffinManager<Arc<BoundedStager>, ObjectStorePuffinFileAccessor>;
pub(crate) type SstPuffinReader = <SstPuffinManager as PuffinManager>::Reader;
pub(crate) type SstPuffinWriter = <SstPuffinManager as PuffinManager>::Writer;
pub(crate) type SstPuffinBlob = <SstPuffinReader as PuffinReader>::Blob;
@@ -52,7 +50,7 @@ const STAGING_DIR: &str = "staging";
#[derive(Clone)]
pub struct PuffinManagerFactory {
/// The stager used by the puffin manager.
stager: Arc<BoundedStager<FileId>>,
stager: Arc<BoundedStager>,
/// The size of the write buffer used to create object store.
write_buffer_size: Option<usize>,
@@ -81,20 +79,15 @@ impl PuffinManagerFactory {
})
}
pub(crate) fn build(
&self,
store: ObjectStore,
path_provider: impl FilePathProvider + 'static,
) -> SstPuffinManager {
pub(crate) fn build(&self, store: ObjectStore) -> SstPuffinManager {
let store = InstrumentedStore::new(store).with_write_buffer_size(self.write_buffer_size);
let puffin_file_accessor =
ObjectStorePuffinFileAccessor::new(store, Arc::new(path_provider));
let puffin_file_accessor = ObjectStorePuffinFileAccessor::new(store);
SstPuffinManager::new(self.stager.clone(), puffin_file_accessor)
}
pub(crate) async fn purge_stager(&self, file_id: FileId) -> Result<()> {
pub(crate) async fn purge_stager(&self, puffin_file_name: &str) -> Result<()> {
self.stager
.purge(&file_id)
.purge(puffin_file_name)
.await
.context(PuffinPurgeStagerSnafu)
}
@@ -126,15 +119,11 @@ impl PuffinManagerFactory {
#[derive(Clone)]
pub(crate) struct ObjectStorePuffinFileAccessor {
object_store: InstrumentedStore,
path_provider: Arc<dyn FilePathProvider>,
}
impl ObjectStorePuffinFileAccessor {
pub fn new(object_store: InstrumentedStore, path_provider: Arc<dyn FilePathProvider>) -> Self {
Self {
object_store,
path_provider,
}
pub fn new(object_store: InstrumentedStore) -> Self {
Self { object_store }
}
}
@@ -142,13 +131,11 @@ impl ObjectStorePuffinFileAccessor {
impl PuffinFileAccessor for ObjectStorePuffinFileAccessor {
type Reader = InstrumentedRangeReader;
type Writer = InstrumentedAsyncWrite;
type FileHandle = FileId;
async fn reader(&self, handle: &FileId) -> PuffinResult<Self::Reader> {
let file_path = self.path_provider.build_index_file_path(*handle);
async fn reader(&self, puffin_file_name: &str) -> PuffinResult<Self::Reader> {
self.object_store
.range_reader(
&file_path,
puffin_file_name,
&INDEX_PUFFIN_READ_BYTES_TOTAL,
&INDEX_PUFFIN_READ_OP_TOTAL,
)
@@ -157,11 +144,10 @@ impl PuffinFileAccessor for ObjectStorePuffinFileAccessor {
.context(puffin_error::ExternalSnafu)
}
async fn writer(&self, handle: &FileId) -> PuffinResult<Self::Writer> {
let file_path = self.path_provider.build_index_file_path(*handle);
async fn writer(&self, puffin_file_name: &str) -> PuffinResult<Self::Writer> {
self.object_store
.writer(
&file_path,
puffin_file_name,
&INDEX_PUFFIN_WRITE_BYTES_TOTAL,
&INDEX_PUFFIN_WRITE_OP_TOTAL,
&INDEX_PUFFIN_FLUSH_OP_TOTAL,
@@ -183,32 +169,20 @@ mod tests {
use super::*;
struct TestFilePathProvider;
impl FilePathProvider for TestFilePathProvider {
fn build_index_file_path(&self, file_id: FileId) -> String {
file_id.to_string()
}
fn build_sst_file_path(&self, file_id: FileId) -> String {
file_id.to_string()
}
}
#[tokio::test]
async fn test_puffin_manager_factory() {
let (_dir, factory) =
PuffinManagerFactory::new_for_test_async("test_puffin_manager_factory_").await;
let object_store = ObjectStore::new(Memory::default()).unwrap().finish();
let manager = factory.build(object_store, TestFilePathProvider);
let manager = factory.build(object_store);
let file_id = FileId::random();
let file_name = "my-puffin-file";
let blob_key = "blob-key";
let dir_key = "dir-key";
let raw_data = b"hello world!";
let mut writer = manager.writer(&file_id).await.unwrap();
let mut writer = manager.writer(file_name).await.unwrap();
writer
.put_blob(blob_key, Cursor::new(raw_data), PutOptions::default())
.await
@@ -229,7 +203,7 @@ mod tests {
.unwrap();
writer.finish().await.unwrap();
let reader = manager.reader(&file_id).await.unwrap();
let reader = manager.reader(file_name).await.unwrap();
let blob_guard = reader.blob(blob_key).await.unwrap();
let blob_reader = blob_guard.reader().await.unwrap();
let meta = blob_reader.metadata().await.unwrap();

View File

@@ -131,7 +131,7 @@ mod tests {
#[async_trait::async_trait]
impl IndexerBuilder for NoopIndexBuilder {
async fn build(&self, _file_id: FileId) -> Indexer {
async fn build(&self, _file_id: FileId, _path: String) -> Indexer {
Indexer::default()
}
}

View File

@@ -387,11 +387,7 @@ impl ParquetReaderBuilder {
return false;
}
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let apply_res = match index_applier
.apply(self.file_handle.file_id(), Some(file_size_hint))
.await
{
let apply_res = match index_applier.apply(self.file_handle.file_id()).await {
Ok(res) => res,
Err(err) => {
if cfg!(any(test, feature = "test")) {
@@ -471,9 +467,9 @@ impl ParquetReaderBuilder {
if !self.file_handle.meta_ref().inverted_index_available() {
return false;
}
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let file_size_hint = self.file_handle.meta_ref().inverted_index_size();
let apply_output = match index_applier
.apply(self.file_handle.file_id(), Some(file_size_hint))
.apply(self.file_handle.file_id(), file_size_hint)
.await
{
Ok(output) => output,
@@ -582,11 +578,11 @@ impl ParquetReaderBuilder {
return false;
}
let file_size_hint = self.file_handle.meta_ref().index_file_size();
let file_size_hint = self.file_handle.meta_ref().bloom_filter_index_size();
let apply_output = match index_applier
.apply(
self.file_handle.file_id(),
Some(file_size_hint),
file_size_hint,
parquet_meta
.row_groups()
.iter()

View File

@@ -121,7 +121,8 @@ where
path_provider: P,
) -> ParquetWriter<F, I, P> {
let init_file = FileId::random();
let indexer = indexer_builder.build(init_file).await;
let index_file_path = path_provider.build_index_file_path(init_file);
let indexer = indexer_builder.build(init_file, index_file_path).await;
ParquetWriter {
path_provider,
@@ -139,7 +140,11 @@ where
match self.current_indexer {
None => {
self.current_file = FileId::random();
let indexer = self.indexer_builder.build(self.current_file).await;
let index_file_path = self.path_provider.build_index_file_path(self.current_file);
let indexer = self
.indexer_builder
.build(self.current_file, index_file_path)
.await;
self.current_indexer = Some(indexer);
// safety: self.current_indexer already set above.
self.current_indexer.as_mut().unwrap()

View File

@@ -47,8 +47,7 @@ pub fn sst_region_metadata() -> RegionMetadata {
"tag_0".to_string(),
ConcreteDataType::string_datatype(),
true,
)
.with_inverted_index(true),
),
semantic_type: SemanticType::Tag,
column_id: 0,
})

View File

@@ -836,7 +836,7 @@ impl<S: LogStore> RegionWorkerLoop<S> {
for ddl in ddl_requests.drain(..) {
let res = match ddl.request {
DdlRequest::Create(req) => self.handle_create_request(ddl.region_id, req).await,
DdlRequest::Drop => self.handle_drop_request(ddl.region_id).await,
DdlRequest::Drop(_) => self.handle_drop_request(ddl.region_id).await,
DdlRequest::Open((req, wal_entry_receiver)) => {
self.handle_open_request(ddl.region_id, req, wal_entry_receiver, ddl.sender)
.await;

View File

@@ -21,7 +21,7 @@ use common_meta::node_manager::NodeManagerRef;
use common_query::error::Result;
use common_telemetry::tracing_context::TracingContext;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use futures::{StreamExt, TryStreamExt};
use session::context::QueryContextRef;
use snafu::{OptionExt, ResultExt};
@@ -81,6 +81,7 @@ impl FlowServiceOperator {
.flow_metadata_manager
.flow_route_manager()
.routes(id)
.try_collect::<Vec<_>>()
.await
.map_err(BoxedError::new)
.context(common_query::error::ExecuteSnafu)?;

View File

@@ -68,7 +68,6 @@ pub struct Inserter {
catalog_manager: CatalogManagerRef,
partition_manager: PartitionRuleManagerRef,
node_manager: NodeManagerRef,
#[allow(unused)]
table_flownode_set_cache: TableFlownodeSetCacheRef,
}
@@ -336,11 +335,9 @@ impl Inserter {
let InstantAndNormalInsertRequests {
normal_requests,
instant_requests: _,
instant_requests,
} = requests;
// TODO(discord9): mirror some
/*
// Mirror requests for source table to flownode asynchronously
let flow_mirror_task = FlowMirrorTask::new(
&self.table_flownode_set_cache,
@@ -350,7 +347,7 @@ impl Inserter {
.chain(instant_requests.requests.iter()),
)
.await?;
flow_mirror_task.detach(self.node_manager.clone())?;*/
flow_mirror_task.detach(self.node_manager.clone())?;
// Write requests to datanode and wait for response
let write_tasks = self
@@ -820,14 +817,12 @@ struct CreateAlterTableResult {
table_infos: HashMap<TableId, Arc<TableInfo>>,
}
#[allow(unused)]
struct FlowMirrorTask {
requests: HashMap<Peer, RegionInsertRequests>,
num_rows: usize,
}
impl FlowMirrorTask {
#[allow(unused)]
async fn new(
cache: &TableFlownodeSetCacheRef,
requests: impl Iterator<Item = &RegionInsertRequest>,
@@ -901,7 +896,6 @@ impl FlowMirrorTask {
})
}
#[allow(unused)]
fn detach(self, node_manager: NodeManagerRef) -> Result<()> {
crate::metrics::DIST_MIRROR_PENDING_ROW_COUNT.add(self.num_rows as i64);
for (peer, inserts) in self.requests {

View File

@@ -36,13 +36,12 @@ use crate::file_metadata::FileMetadata;
pub trait PuffinManager {
type Reader: PuffinReader;
type Writer: PuffinWriter;
type FileHandle: ToString + Clone + Send + Sync;
/// Creates a `PuffinReader` for the specified `handle`.
async fn reader(&self, handle: &Self::FileHandle) -> Result<Self::Reader>;
/// Creates a `PuffinReader` for the specified `puffin_file_name`.
async fn reader(&self, puffin_file_name: &str) -> Result<Self::Reader>;
/// Creates a `PuffinWriter` for the specified `handle`.
async fn writer(&self, handle: &Self::FileHandle) -> Result<Self::Writer>;
/// Creates a `PuffinWriter` for the specified `puffin_file_name`.
async fn writer(&self, puffin_file_name: &str) -> Result<Self::Writer>;
}
/// The `PuffinWriter` trait provides methods for writing blobs and directories to a Puffin file.

View File

@@ -27,13 +27,12 @@ use crate::error::Result;
pub trait PuffinFileAccessor: Send + Sync + 'static {
type Reader: SizeAwareRangeReader + Sync;
type Writer: AsyncWrite + Unpin + Send;
type FileHandle: ToString + Clone + Send + Sync;
/// Opens a reader for the given puffin file handle.
async fn reader(&self, handle: &Self::FileHandle) -> Result<Self::Reader>;
/// Opens a reader for the given puffin file.
async fn reader(&self, puffin_file_name: &str) -> Result<Self::Reader>;
/// Creates a writer for the given puffin file handle.
async fn writer(&self, handle: &Self::FileHandle) -> Result<Self::Writer>;
/// Creates a writer for the given puffin file.
async fn writer(&self, puffin_file_name: &str) -> Result<Self::Writer>;
}
pub struct MockFileAccessor {
@@ -51,16 +50,15 @@ impl MockFileAccessor {
impl PuffinFileAccessor for MockFileAccessor {
type Reader = FileReader;
type Writer = Compat<File>;
type FileHandle = String;
async fn reader(&self, handle: &String) -> Result<Self::Reader> {
Ok(FileReader::new(self.tempdir.path().join(handle))
async fn reader(&self, puffin_file_name: &str) -> Result<Self::Reader> {
Ok(FileReader::new(self.tempdir.path().join(puffin_file_name))
.await
.unwrap())
}
async fn writer(&self, handle: &String) -> Result<Self::Writer> {
let p = self.tempdir.path().join(handle);
async fn writer(&self, puffin_file_name: &str) -> Result<Self::Writer> {
let p = self.tempdir.path().join(puffin_file_name);
if let Some(p) = p.parent() {
if !tokio::fs::try_exists(p).await.unwrap() {
tokio::fs::create_dir_all(p).await.unwrap();

View File

@@ -61,26 +61,25 @@ impl<S, F> FsPuffinManager<S, F> {
#[async_trait]
impl<S, F> PuffinManager for FsPuffinManager<S, F>
where
S: Stager + Clone + 'static,
F: PuffinFileAccessor + Clone,
S: Stager<FileHandle = F::FileHandle> + Clone + 'static,
{
type Reader = FsPuffinReader<S, F>;
type Writer = FsPuffinWriter<S, F::Writer>;
type FileHandle = F::FileHandle;
async fn reader(&self, handle: &Self::FileHandle) -> Result<Self::Reader> {
async fn reader(&self, puffin_file_name: &str) -> Result<Self::Reader> {
Ok(FsPuffinReader::new(
handle.clone(),
puffin_file_name.to_string(),
self.stager.clone(),
self.puffin_file_accessor.clone(),
self.puffin_metadata_cache.clone(),
))
}
async fn writer(&self, handle: &Self::FileHandle) -> Result<Self::Writer> {
let writer = self.puffin_file_accessor.writer(handle).await?;
async fn writer(&self, puffin_file_name: &str) -> Result<Self::Writer> {
let writer = self.puffin_file_accessor.writer(puffin_file_name).await?;
Ok(FsPuffinWriter::new(
handle.clone(),
puffin_file_name.to_string(),
self.stager.clone(),
writer,
))

View File

@@ -39,13 +39,9 @@ use crate::puffin_manager::stager::{BoxWriter, DirWriterProviderRef, Stager};
use crate::puffin_manager::{BlobGuard, PuffinReader};
/// `FsPuffinReader` is a `PuffinReader` that provides fs readers for puffin files.
pub struct FsPuffinReader<S, F>
where
S: Stager + 'static,
F: PuffinFileAccessor + Clone,
{
/// The handle of the puffin file.
handle: F::FileHandle,
pub struct FsPuffinReader<S, F> {
/// The name of the puffin file.
puffin_file_name: String,
/// The file size hint.
file_size_hint: Option<u64>,
@@ -60,19 +56,15 @@ where
puffin_file_metadata_cache: Option<PuffinMetadataCacheRef>,
}
impl<S, F> FsPuffinReader<S, F>
where
S: Stager + 'static,
F: PuffinFileAccessor + Clone,
{
impl<S, F> FsPuffinReader<S, F> {
pub(crate) fn new(
handle: F::FileHandle,
puffin_file_name: String,
stager: S,
puffin_file_accessor: F,
puffin_file_metadata_cache: Option<PuffinMetadataCacheRef>,
) -> Self {
Self {
handle,
puffin_file_name,
file_size_hint: None,
stager,
puffin_file_accessor,
@@ -84,8 +76,8 @@ where
#[async_trait]
impl<S, F> PuffinReader for FsPuffinReader<S, F>
where
S: Stager + 'static,
F: PuffinFileAccessor + Clone,
S: Stager<FileHandle = F::FileHandle> + 'static,
{
type Blob = Either<RandomReadBlob<F>, S::Blob>;
type Dir = S::Dir;
@@ -96,13 +88,19 @@ where
}
async fn metadata(&self) -> Result<Arc<FileMetadata>> {
let reader = self.puffin_file_accessor.reader(&self.handle).await?;
let reader = self
.puffin_file_accessor
.reader(&self.puffin_file_name)
.await?;
let mut file = PuffinFileReader::new(reader);
self.get_puffin_file_metadata(&mut file).await
}
async fn blob(&self, key: &str) -> Result<Self::Blob> {
let mut reader = self.puffin_file_accessor.reader(&self.handle).await?;
let mut reader = self
.puffin_file_accessor
.reader(&self.puffin_file_name)
.await?;
if let Some(file_size_hint) = self.file_size_hint {
reader.with_file_size_hint(file_size_hint);
}
@@ -119,7 +117,7 @@ where
let blob = if blob_metadata.compression_codec.is_none() {
// If the blob is not compressed, we can directly read it from the puffin file.
Either::L(RandomReadBlob {
handle: self.handle.clone(),
file_name: self.puffin_file_name.clone(),
accessor: self.puffin_file_accessor.clone(),
blob_metadata,
})
@@ -128,7 +126,7 @@ where
let staged_blob = self
.stager
.get_blob(
&self.handle,
self.puffin_file_name.as_str(),
key,
Box::new(|writer| {
Box::pin(Self::init_blob_to_stager(file, blob_metadata, writer))
@@ -145,18 +143,17 @@ where
async fn dir(&self, key: &str) -> Result<Self::Dir> {
self.stager
.get_dir(
&self.handle,
self.puffin_file_name.as_str(),
key,
Box::new(|writer_provider| {
let accessor = self.puffin_file_accessor.clone();
let handle = self.handle.clone();
let puffin_file_name = self.puffin_file_name.clone();
let key = key.to_string();
Box::pin(Self::init_dir_to_stager(
handle,
puffin_file_name,
key,
writer_provider,
accessor,
self.file_size_hint,
))
}),
)
@@ -173,16 +170,15 @@ where
&self,
reader: &mut PuffinFileReader<F::Reader>,
) -> Result<Arc<FileMetadata>> {
let id = self.handle.to_string();
if let Some(cache) = self.puffin_file_metadata_cache.as_ref() {
if let Some(metadata) = cache.get_metadata(&id) {
if let Some(metadata) = cache.get_metadata(&self.puffin_file_name) {
return Ok(metadata);
}
}
let metadata = Arc::new(reader.metadata().await?);
if let Some(cache) = self.puffin_file_metadata_cache.as_ref() {
cache.put_metadata(id, metadata.clone());
cache.put_metadata(self.puffin_file_name.to_string(), metadata.clone());
}
Ok(metadata)
}
@@ -200,16 +196,12 @@ where
}
async fn init_dir_to_stager(
handle: F::FileHandle,
puffin_file_name: String,
key: String,
writer_provider: DirWriterProviderRef,
accessor: F,
file_size_hint: Option<u64>,
) -> Result<u64> {
let mut reader = accessor.reader(&handle).await?;
if let Some(file_size_hint) = file_size_hint {
reader.with_file_size_hint(file_size_hint);
}
let reader = accessor.reader(&puffin_file_name).await?;
let mut file = PuffinFileReader::new(reader);
let puffin_metadata = file.metadata().await?;
@@ -245,7 +237,7 @@ where
}
);
let reader = accessor.reader(&handle).await?;
let reader = accessor.reader(&puffin_file_name).await?;
let writer = writer_provider.writer(&file_meta.relative_path).await?;
let task = common_runtime::spawn_global(async move {
let reader = PuffinFileReader::new(reader).into_blob_reader(&blob_meta);
@@ -292,8 +284,8 @@ where
}
/// `RandomReadBlob` is a `BlobGuard` that directly reads the blob from the puffin file.
pub struct RandomReadBlob<F: PuffinFileAccessor> {
handle: F::FileHandle,
pub struct RandomReadBlob<F> {
file_name: String,
accessor: F,
blob_metadata: BlobMetadata,
}
@@ -310,7 +302,7 @@ impl<F: PuffinFileAccessor + Clone> BlobGuard for RandomReadBlob<F> {
}
);
let reader = self.accessor.reader(&self.handle).await?;
let reader = self.accessor.reader(&self.file_name).await?;
let blob_reader = PuffinFileReader::new(reader).into_blob_reader(&self.blob_metadata);
Ok(blob_reader)
}

Some files were not shown because too many files have changed in this diff Show More