Compare commits

..

18 Commits

Author SHA1 Message Date
evenyag
0d5b423eb7 feat: opendal metrics
Signed-off-by: evenyag <realevenyag@gmail.com>
2025-09-29 18:23:41 +08:00
evenyag
26bdb6a413 fix: disable on compaction
Signed-off-by: evenyag <realevenyag@gmail.com>
2025-09-26 17:40:00 +08:00
evenyag
2fe21469f8 chore: also print infos
Signed-off-by: evenyag <realevenyag@gmail.com>
2025-09-26 17:29:37 +08:00
evenyag
3aa67c7af4 feat: add series num to metrics
Signed-off-by: evenyag <realevenyag@gmail.com>
2025-09-26 17:08:27 +08:00
evenyag
e0d3e6ae97 chore: disable fulltext index
Signed-off-by: evenyag <realevenyag@gmail.com>
2025-09-26 17:01:53 +08:00
evenyag
2ce476dc42 feat: add prof-file flag to get flamegraph
Signed-off-by: evenyag <realevenyag@gmail.com>
2025-09-26 15:36:15 +08:00
Lei, HUANG
69a816fa0c feat/objbench:
### Update Metrics and Command Output

 - **`objbench.rs`**:
   - Renamed "Write time" to "Total time" in output.
   - Enhanced metrics output to include a sum of all metrics.

 - **`access_layer.rs`**:
   - Split `index` duration into `index_update` and `index_finish`.
   - Added a `sum` method to `Metrics` to calculate the total duration.

 - **`writer.rs`**:
   - Updated metrics to use `index_update` and `index_finish` for more granular tracking of index operations.

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>
2025-09-24 20:46:27 +08:00
Lei, HUANG
dcf5a62014 feat/objbench:
### Add Metrics for Indexing and Conversion in `access_layer.rs` and `writer.rs`

 - **Enhancements in `access_layer.rs`:**
   - Added new metrics `convert` and `index` to the `Metrics` struct to track conversion and indexing durations.

 - **Updates in `writer.rs`:**
   - Implemented tracking of indexing duration by measuring the time taken for `update` in the indexer.
   - Added measurement of conversion duration for `convert_batch` to enhance performance monitoring.

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>
2025-09-24 18:14:00 +08:00
Lei, HUANG
f3aa967aae fix storage config
Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>
2025-09-24 16:01:19 +08:00
Lei, HUANG
93e8510b2a pretty print 2025-09-23 16:01:50 +08:00
Lei, HUANG
53c58494fd feat/objbench:
### Add verbose logging and file deletion in `objbench.rs`

 - **Verbose Logging**: Introduced a `--verbose` flag in `Command` to enable detailed logging using `common_telemetry::init_default_ut_logging()`.
 - **File Deletion**: Implemented automatic deletion of the destination file after processing in `Command::run()`.

 ### Update tests in `parquet.rs`

 - Removed unused parameters in test functions to streamline the code.

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>
2025-09-23 15:42:21 +08:00
Lei, HUANG
741c5e2fb1 feat/objbench:
### Update `objbench.rs` and `parquet.rs` for Improved File Handling

 - **`objbench.rs`:**
   - Simplified target access layer initialization by directly using `self.target`.
   - Added assertion to ensure single file info and constructed destination file path for reporting.
   - Enhanced logging to include destination file path in write completion message.

 - **`parquet.rs`:**
   - Updated test cases to include `None` for additional parameter in function calls.

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>
2025-09-23 15:26:09 +08:00
Lei, HUANG
d68215dc88 feat/objbench:
### Add `objbench` Binary and Enhance Metrics Collection

 - **New Binary**: Introduced a new binary `objbench` in `src/cmd/src/bin/objbench.rs` for benchmarking object store operations.
 - **Metrics Collection**: Enhanced metrics collection by adding a `Metrics` struct in `access_layer.rs` and integrating it into SST writing processes across multiple files, including `write_cache.rs`, `compactor.rs`, `flush

Signed-off-by: Lei, HUANG <mrsatangel@gmail.com>
2025-09-23 14:49:33 +08:00
Yingwen
bcd63fdb87 chore: cherry pick #6821 and bump version to v0.12.2 (#6832)
* fix: correct heartbeat stream handling logic  (#6821)

* fix: correct heartbeat stream handling logic

Signed-off-by: WenyXu <wenymedia@gmail.com>

* Update src/meta-srv/src/service/heartbeat.rs

Co-authored-by: jeremyhi <jiachun_feng@proton.me>

---------

Signed-off-by: WenyXu <wenymedia@gmail.com>
Co-authored-by: jeremyhi <jiachun_feng@proton.me>
Signed-off-by: evenyag <realevenyag@gmail.com>

* chore: bump version to v0.12.2

Signed-off-by: evenyag <realevenyag@gmail.com>

* chore: fix typos

Signed-off-by: evenyag <realevenyag@gmail.com>

---------

Signed-off-by: WenyXu <wenymedia@gmail.com>
Signed-off-by: evenyag <realevenyag@gmail.com>
Co-authored-by: Weny Xu <wenymedia@gmail.com>
Co-authored-by: jeremyhi <jiachun_feng@proton.me>
2025-08-27 08:56:09 +00:00
Yingwen
f4c527cddf chore: cherry pick #5625 to v0.12 branch (#6831)
* ci: update 0.12 release ci

Signed-off-by: evenyag <realevenyag@gmail.com>

* fix: out of bound during bloom search (#5625)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: evenyag <realevenyag@gmail.com>
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
Co-authored-by: Zhenchi <zhongzc_arch@outlook.com>
2025-08-27 16:12:25 +08:00
Yingwen
8da5949fc5 ci: update 0.12 ci to latest (#6376)
* ci: update 0.12 ci to latest

Except:
- Remove mysql_backend
- Remove workflows/grafana.json

Signed-off-by: evenyag <realevenyag@gmail.com>

* ci: update typos

Signed-off-by: evenyag <realevenyag@gmail.com>

* ci: ignore more words

Signed-off-by: evenyag <realevenyag@gmail.com>

---------

Signed-off-by: evenyag <realevenyag@gmail.com>
2025-06-21 18:20:43 +08:00
Ruihang Xia
db6a63ef6c chore: bump version to 0.12.1
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
2025-06-21 16:52:28 +08:00
Yingwen
f166b93b02 feat: expose virtual_host_style config for s3 storage (#5696)
* feat: expose enable_virtual_host_style for s3 storage

* docs: update examples

* test: fix config test
2025-06-21 16:34:15 +08:00
78 changed files with 1916 additions and 2372 deletions

2
.github/CODEOWNERS vendored
View File

@@ -4,7 +4,7 @@
* @GreptimeTeam/db-approver * @GreptimeTeam/db-approver
## [Module] Databse Engine ## [Module] Database Engine
/src/index @zhongzc /src/index @zhongzc
/src/mito2 @evenyag @v0y4g3r @waynexia /src/mito2 @evenyag @v0y4g3r @waynexia
/src/query @evenyag /src/query @evenyag

View File

@@ -41,7 +41,14 @@ runs:
username: ${{ inputs.dockerhub-image-registry-username }} username: ${{ inputs.dockerhub-image-registry-username }}
password: ${{ inputs.dockerhub-image-registry-token }} password: ${{ inputs.dockerhub-image-registry-token }}
- name: Build and push dev-builder-ubuntu image - name: Set up qemu for multi-platform builds
uses: docker/setup-qemu-action@v3
with:
platforms: linux/amd64,linux/arm64
# The latest version will lead to segmentation fault.
image: tonistiigi/binfmt:qemu-v7.0.0-28
- name: Build and push dev-builder-ubuntu image # Build image for amd64 and arm64 platform.
shell: bash shell: bash
if: ${{ inputs.build-dev-builder-ubuntu == 'true' }} if: ${{ inputs.build-dev-builder-ubuntu == 'true' }}
run: | run: |
@@ -52,7 +59,7 @@ runs:
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \ IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }} DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
- name: Build and push dev-builder-centos image - name: Build and push dev-builder-centos image # Only build image for amd64 platform.
shell: bash shell: bash
if: ${{ inputs.build-dev-builder-centos == 'true' }} if: ${{ inputs.build-dev-builder-centos == 'true' }}
run: | run: |
@@ -69,8 +76,7 @@ runs:
run: | run: |
make dev-builder \ make dev-builder \
BASE_IMAGE=android \ BASE_IMAGE=android \
BUILDX_MULTI_PLATFORM_BUILD=amd64 \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \ IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \ IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }} && \ DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}

View File

@@ -52,7 +52,7 @@ runs:
uses: ./.github/actions/build-greptime-binary uses: ./.github/actions/build-greptime-binary
with: with:
base-image: ubuntu base-image: ubuntu
features: servers/dashboard,pg_kvbackend features: servers/dashboard
cargo-profile: ${{ inputs.cargo-profile }} cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }} artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
version: ${{ inputs.version }} version: ${{ inputs.version }}
@@ -70,7 +70,7 @@ runs:
if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds greptime for centos if the host machine is amd64. if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds greptime for centos if the host machine is amd64.
with: with:
base-image: centos base-image: centos
features: servers/dashboard,pg_kvbackend features: servers/dashboard
cargo-profile: ${{ inputs.cargo-profile }} cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }} artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
version: ${{ inputs.version }} version: ${{ inputs.version }}

View File

@@ -47,7 +47,6 @@ runs:
shell: pwsh shell: pwsh
run: make test sqlness-test run: make test sqlness-test
env: env:
RUSTUP_WINDOWS_PATH_ADD_BIN: 1 # Workaround for https://github.com/nextest-rs/nextest/issues/1493
RUST_BACKTRACE: 1 RUST_BACKTRACE: 1
SQLNESS_OPTS: "--preserve-state" SQLNESS_OPTS: "--preserve-state"

View File

@@ -64,11 +64,11 @@ inputs:
upload-max-retry-times: upload-max-retry-times:
description: Max retry times for uploading artifacts to S3 description: Max retry times for uploading artifacts to S3
required: false required: false
default: "20" default: "30"
upload-retry-timeout: upload-retry-timeout:
description: Timeout for uploading artifacts to S3 description: Timeout for uploading artifacts to S3
required: false required: false
default: "30" # minutes default: "120" # minutes
runs: runs:
using: composite using: composite
steps: steps:

View File

@@ -8,15 +8,15 @@ inputs:
default: 2 default: 2
description: "Number of Datanode replicas" description: "Number of Datanode replicas"
meta-replicas: meta-replicas:
default: 1 default: 2
description: "Number of Metasrv replicas" description: "Number of Metasrv replicas"
image-registry: image-registry:
default: "docker.io" default: "docker.io"
description: "Image registry" description: "Image registry"
image-repository: image-repository:
default: "greptime/greptimedb" default: "greptime/greptimedb"
description: "Image repository" description: "Image repository"
image-tag: image-tag:
default: "latest" default: "latest"
description: 'Image tag' description: 'Image tag'
etcd-endpoints: etcd-endpoints:
@@ -32,12 +32,12 @@ runs:
steps: steps:
- name: Install GreptimeDB operator - name: Install GreptimeDB operator
uses: nick-fields/retry@v3 uses: nick-fields/retry@v3
with: with:
timeout_minutes: 3 timeout_minutes: 3
max_attempts: 3 max_attempts: 3
shell: bash shell: bash
command: | command: |
helm repo add greptime https://greptimeteam.github.io/helm-charts/ helm repo add greptime https://greptimeteam.github.io/helm-charts/
helm repo update helm repo update
helm upgrade \ helm upgrade \
--install \ --install \
@@ -48,10 +48,10 @@ runs:
--wait-for-jobs --wait-for-jobs
- name: Install GreptimeDB cluster - name: Install GreptimeDB cluster
shell: bash shell: bash
run: | run: |
helm upgrade \ helm upgrade \
--install my-greptimedb \ --install my-greptimedb \
--set meta.etcdEndpoints=${{ inputs.etcd-endpoints }} \ --set meta.backendStorage.etcd.endpoints=${{ inputs.etcd-endpoints }} \
--set meta.enableRegionFailover=${{ inputs.enable-region-failover }} \ --set meta.enableRegionFailover=${{ inputs.enable-region-failover }} \
--set image.registry=${{ inputs.image-registry }} \ --set image.registry=${{ inputs.image-registry }} \
--set image.repository=${{ inputs.image-repository }} \ --set image.repository=${{ inputs.image-repository }} \
@@ -59,7 +59,7 @@ runs:
--set base.podTemplate.main.resources.requests.cpu=50m \ --set base.podTemplate.main.resources.requests.cpu=50m \
--set base.podTemplate.main.resources.requests.memory=256Mi \ --set base.podTemplate.main.resources.requests.memory=256Mi \
--set base.podTemplate.main.resources.limits.cpu=2000m \ --set base.podTemplate.main.resources.limits.cpu=2000m \
--set base.podTemplate.main.resources.limits.memory=2Gi \ --set base.podTemplate.main.resources.limits.memory=3Gi \
--set frontend.replicas=${{ inputs.frontend-replicas }} \ --set frontend.replicas=${{ inputs.frontend-replicas }} \
--set datanode.replicas=${{ inputs.datanode-replicas }} \ --set datanode.replicas=${{ inputs.datanode-replicas }} \
--set meta.replicas=${{ inputs.meta-replicas }} \ --set meta.replicas=${{ inputs.meta-replicas }} \
@@ -72,7 +72,7 @@ runs:
- name: Wait for GreptimeDB - name: Wait for GreptimeDB
shell: bash shell: bash
run: | run: |
while true; do while true; do
PHASE=$(kubectl -n my-greptimedb get gtc my-greptimedb -o jsonpath='{.status.clusterPhase}') PHASE=$(kubectl -n my-greptimedb get gtc my-greptimedb -o jsonpath='{.status.clusterPhase}')
if [ "$PHASE" == "Running" ]; then if [ "$PHASE" == "Running" ]; then
echo "Cluster is ready" echo "Cluster is ready"
@@ -86,10 +86,10 @@ runs:
- name: Print GreptimeDB info - name: Print GreptimeDB info
if: always() if: always()
shell: bash shell: bash
run: | run: |
kubectl get all --show-labels -n my-greptimedb kubectl get all --show-labels -n my-greptimedb
- name: Describe Nodes - name: Describe Nodes
if: always() if: always()
shell: bash shell: bash
run: | run: |
kubectl describe nodes kubectl describe nodes

View File

@@ -2,13 +2,14 @@ meta:
configData: |- configData: |-
[runtime] [runtime]
global_rt_size = 4 global_rt_size = 4
[wal] [wal]
provider = "kafka" provider = "kafka"
broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"] broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
num_topics = 3 num_topics = 3
auto_prune_interval = "30s"
trigger_flush_threshold = 100
[datanode] [datanode]
[datanode.client] [datanode.client]
timeout = "120s" timeout = "120s"
@@ -21,7 +22,7 @@ datanode:
[wal] [wal]
provider = "kafka" provider = "kafka"
broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"] broker_endpoints = ["kafka.kafka-cluster.svc.cluster.local:9092"]
linger = "2ms" overwrite_entry_start_id = true
frontend: frontend:
configData: |- configData: |-
[runtime] [runtime]

View File

@@ -56,7 +56,7 @@ runs:
- name: Start EC2 runner - name: Start EC2 runner
if: startsWith(inputs.runner, 'ec2') if: startsWith(inputs.runner, 'ec2')
uses: machulav/ec2-github-runner@v2 uses: machulav/ec2-github-runner@v2.3.8
id: start-linux-arm64-ec2-runner id: start-linux-arm64-ec2-runner
with: with:
mode: start mode: start

View File

@@ -33,7 +33,7 @@ runs:
- name: Stop EC2 runner - name: Stop EC2 runner
if: ${{ inputs.label && inputs.ec2-instance-id }} if: ${{ inputs.label && inputs.ec2-instance-id }}
uses: machulav/ec2-github-runner@v2 uses: machulav/ec2-github-runner@v2.3.8
with: with:
mode: stop mode: stop
label: ${{ inputs.label }} label: ${{ inputs.label }}

15
.github/labeler.yaml vendored Normal file
View File

@@ -0,0 +1,15 @@
ci:
- changed-files:
- any-glob-to-any-file: .github/**
docker:
- changed-files:
- any-glob-to-any-file: docker/**
documentation:
- changed-files:
- any-glob-to-any-file: docs/**
dashboard:
- changed-files:
- any-glob-to-any-file: grafana/**

42
.github/scripts/check-version.sh vendored Executable file
View File

@@ -0,0 +1,42 @@
#!/bin/bash
# Get current version
CURRENT_VERSION=$1
if [ -z "$CURRENT_VERSION" ]; then
echo "Error: Failed to get current version"
exit 1
fi
# Get the latest version from GitHub Releases
API_RESPONSE=$(curl -s "https://api.github.com/repos/GreptimeTeam/greptimedb/releases/latest")
if [ -z "$API_RESPONSE" ] || [ "$(echo "$API_RESPONSE" | jq -r '.message')" = "Not Found" ]; then
echo "Error: Failed to fetch latest version from GitHub"
exit 1
fi
# Get the latest version
LATEST_VERSION=$(echo "$API_RESPONSE" | jq -r '.tag_name')
if [ -z "$LATEST_VERSION" ] || [ "$LATEST_VERSION" = "null" ]; then
echo "Error: No valid version found in GitHub releases"
exit 1
fi
# Cleaned up version number format (removed possible 'v' prefix and -nightly suffix)
CLEAN_CURRENT=$(echo "$CURRENT_VERSION" | sed 's/^v//' | sed 's/-nightly-.*//')
CLEAN_LATEST=$(echo "$LATEST_VERSION" | sed 's/^v//' | sed 's/-nightly-.*//')
echo "Current version: $CLEAN_CURRENT"
echo "Latest release version: $CLEAN_LATEST"
# Use sort -V to compare versions
HIGHER_VERSION=$(printf "%s\n%s" "$CLEAN_CURRENT" "$CLEAN_LATEST" | sort -V | tail -n1)
if [ "$HIGHER_VERSION" = "$CLEAN_CURRENT" ]; then
echo "Current version ($CLEAN_CURRENT) is NEWER than or EQUAL to latest ($CLEAN_LATEST)"
echo "should-push-latest-tag=true" >> $GITHUB_OUTPUT
else
echo "Current version ($CLEAN_CURRENT) is OLDER than latest ($CLEAN_LATEST)"
echo "should-push-latest-tag=false" >> $GITHUB_OUTPUT
fi

View File

@@ -8,24 +8,25 @@ set -e
# - If it's a nightly build, the version is 'nightly-YYYYMMDD-$(git rev-parse --short HEAD)', like 'nightly-20230712-e5b243c'. # - If it's a nightly build, the version is 'nightly-YYYYMMDD-$(git rev-parse --short HEAD)', like 'nightly-20230712-e5b243c'.
# create_version ${GIHUB_EVENT_NAME} ${NEXT_RELEASE_VERSION} ${NIGHTLY_RELEASE_PREFIX} # create_version ${GIHUB_EVENT_NAME} ${NEXT_RELEASE_VERSION} ${NIGHTLY_RELEASE_PREFIX}
function create_version() { function create_version() {
# Read from envrionment variables. # Read from environment variables.
if [ -z "$GITHUB_EVENT_NAME" ]; then if [ -z "$GITHUB_EVENT_NAME" ]; then
echo "GITHUB_EVENT_NAME is empty" echo "GITHUB_EVENT_NAME is empty" >&2
exit 1 exit 1
fi fi
if [ -z "$NEXT_RELEASE_VERSION" ]; then if [ -z "$NEXT_RELEASE_VERSION" ]; then
echo "NEXT_RELEASE_VERSION is empty" echo "NEXT_RELEASE_VERSION is empty, use version from Cargo.toml" >&2
exit 1 # NOTE: Need a `v` prefix for the version string.
export NEXT_RELEASE_VERSION=v$(grep '^version = ' Cargo.toml | cut -d '"' -f 2 | head -n 1)
fi fi
if [ -z "$NIGHTLY_RELEASE_PREFIX" ]; then if [ -z "$NIGHTLY_RELEASE_PREFIX" ]; then
echo "NIGHTLY_RELEASE_PREFIX is empty" echo "NIGHTLY_RELEASE_PREFIX is empty" >&2
exit 1 exit 1
fi fi
# Reuse $NEXT_RELEASE_VERSION to identify whether it's a nightly build. # Reuse $NEXT_RELEASE_VERSION to identify whether it's a nightly build.
# It will be like 'nigtly-20230808-7d0d8dc6'. # It will be like 'nightly-20230808-7d0d8dc6'.
if [ "$NEXT_RELEASE_VERSION" = nightly ]; then if [ "$NEXT_RELEASE_VERSION" = nightly ]; then
echo "$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")-$(git rev-parse --short HEAD)" echo "$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")-$(git rev-parse --short HEAD)"
exit 0 exit 0
@@ -35,7 +36,7 @@ function create_version() {
# It will be like 'dev-2023080819-f0e7216c'. # It will be like 'dev-2023080819-f0e7216c'.
if [ "$NEXT_RELEASE_VERSION" = dev ]; then if [ "$NEXT_RELEASE_VERSION" = dev ]; then
if [ -z "$COMMIT_SHA" ]; then if [ -z "$COMMIT_SHA" ]; then
echo "COMMIT_SHA is empty in dev build" echo "COMMIT_SHA is empty in dev build" >&2
exit 1 exit 1
fi fi
echo "dev-$(date "+%Y%m%d-%s")-$(echo "$COMMIT_SHA" | cut -c1-8)" echo "dev-$(date "+%Y%m%d-%s")-$(echo "$COMMIT_SHA" | cut -c1-8)"
@@ -45,7 +46,7 @@ function create_version() {
# Note: Only output 'version=xxx' to stdout when everything is ok, so that it can be used in GitHub Actions Outputs. # Note: Only output 'version=xxx' to stdout when everything is ok, so that it can be used in GitHub Actions Outputs.
if [ "$GITHUB_EVENT_NAME" = push ]; then if [ "$GITHUB_EVENT_NAME" = push ]; then
if [ -z "$GITHUB_REF_NAME" ]; then if [ -z "$GITHUB_REF_NAME" ]; then
echo "GITHUB_REF_NAME is empty in push event" echo "GITHUB_REF_NAME is empty in push event" >&2
exit 1 exit 1
fi fi
echo "$GITHUB_REF_NAME" echo "$GITHUB_REF_NAME"
@@ -54,15 +55,15 @@ function create_version() {
elif [ "$GITHUB_EVENT_NAME" = schedule ]; then elif [ "$GITHUB_EVENT_NAME" = schedule ]; then
echo "$NEXT_RELEASE_VERSION-$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")" echo "$NEXT_RELEASE_VERSION-$NIGHTLY_RELEASE_PREFIX-$(date "+%Y%m%d")"
else else
echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME" echo "Unsupported GITHUB_EVENT_NAME: $GITHUB_EVENT_NAME" >&2
exit 1 exit 1
fi fi
} }
# You can run as following examples: # You can run as following examples:
# GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly GITHUB_REF_NAME=v0.3.0 ./create-version.sh # GITHUB_EVENT_NAME=push NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly GITHUB_REF_NAME=v0.3.0 ./create-version.sh
# GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh # GITHUB_EVENT_NAME=workflow_dispatch NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
# GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh # GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=v0.4.0 NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
# GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh # GITHUB_EVENT_NAME=schedule NEXT_RELEASE_VERSION=nightly NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
# GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nigtly ./create-version.sh # GITHUB_EVENT_NAME=workflow_dispatch COMMIT_SHA=f0e7216c4bb6acce9b29a21ec2d683be2e3f984a NEXT_RELEASE_VERSION=dev NIGHTLY_RELEASE_PREFIX=nightly ./create-version.sh
create_version create_version

View File

@@ -10,7 +10,7 @@ GREPTIMEDB_IMAGE_TAG=${GREPTIMEDB_IMAGE_TAG:-latest}
ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd" ETCD_CHART="oci://registry-1.docker.io/bitnamicharts/etcd"
GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/" GREPTIME_CHART="https://greptimeteam.github.io/helm-charts/"
# Ceate a cluster with 1 control-plane node and 5 workers. # Create a cluster with 1 control-plane node and 5 workers.
function create_kind_cluster() { function create_kind_cluster() {
cat <<EOF | kind create cluster --name "${CLUSTER}" --image kindest/node:"$KUBERNETES_VERSION" --config=- cat <<EOF | kind create cluster --name "${CLUSTER}" --image kindest/node:"$KUBERNETES_VERSION" --config=-
kind: Cluster kind: Cluster
@@ -68,7 +68,7 @@ function deploy_greptimedb_cluster() {
helm install "$cluster_name" greptime/greptimedb-cluster \ helm install "$cluster_name" greptime/greptimedb-cluster \
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \ --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
--set meta.etcdEndpoints="etcd.$install_namespace:2379" \ --set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
-n "$install_namespace" -n "$install_namespace"
# Wait for greptimedb cluster to be ready. # Wait for greptimedb cluster to be ready.
@@ -103,7 +103,7 @@ function deploy_greptimedb_cluster_with_s3_storage() {
helm install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \ helm install "$cluster_name" greptime/greptimedb-cluster -n "$install_namespace" \
--set image.tag="$GREPTIMEDB_IMAGE_TAG" \ --set image.tag="$GREPTIMEDB_IMAGE_TAG" \
--set meta.etcdEndpoints="etcd.$install_namespace:2379" \ --set meta.backendStorage.etcd.endpoints="etcd.$install_namespace:2379" \
--set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \ --set storage.s3.bucket="$AWS_CI_TEST_BUCKET" \
--set storage.s3.region="$AWS_REGION" \ --set storage.s3.region="$AWS_REGION" \
--set storage.s3.root="$DATA_ROOT" \ --set storage.s3.root="$DATA_ROOT" \

37
.github/scripts/update-dev-builder-version.sh vendored Executable file
View File

@@ -0,0 +1,37 @@
#!/bin/bash
DEV_BUILDER_IMAGE_TAG=$1
update_dev_builder_version() {
if [ -z "$DEV_BUILDER_IMAGE_TAG" ]; then
echo "Error: Should specify the dev-builder image tag"
exit 1
fi
# Configure Git configs.
git config --global user.email greptimedb-ci@greptime.com
git config --global user.name greptimedb-ci
# Checkout a new branch.
BRANCH_NAME="ci/update-dev-builder-$(date +%Y%m%d%H%M%S)"
git checkout -b $BRANCH_NAME
# Update the dev-builder image tag in the Makefile.
sed -i "s/DEV_BUILDER_IMAGE_TAG ?=.*/DEV_BUILDER_IMAGE_TAG ?= ${DEV_BUILDER_IMAGE_TAG}/g" Makefile
# Commit the changes.
git add Makefile
git commit -m "ci: update dev-builder image tag"
git push origin $BRANCH_NAME
# Create a Pull Request.
gh pr create \
--title "ci: update dev-builder image tag" \
--body "This PR updates the dev-builder image tag" \
--base main \
--head $BRANCH_NAME \
--reviewer zyy17 \
--reviewer daviderli614
}
update_dev_builder_version

46
.github/scripts/update-helm-charts-version.sh vendored Executable file
View File

@@ -0,0 +1,46 @@
#!/bin/bash
set -e
VERSION=${VERSION}
GITHUB_TOKEN=${GITHUB_TOKEN}
update_helm_charts_version() {
# Configure Git configs.
git config --global user.email update-helm-charts-version@greptime.com
git config --global user.name update-helm-charts-version
# Clone helm-charts repository.
git clone "https://x-access-token:${GITHUB_TOKEN}@github.com/GreptimeTeam/helm-charts.git"
cd helm-charts
# Set default remote for gh CLI
gh repo set-default GreptimeTeam/helm-charts
# Checkout a new branch.
BRANCH_NAME="chore/greptimedb-${VERSION}"
git checkout -b $BRANCH_NAME
# Update version.
make update-version CHART=greptimedb-cluster VERSION=${VERSION}
make update-version CHART=greptimedb-standalone VERSION=${VERSION}
# Update docs.
make docs
# Commit the changes.
git add .
git commit -s -m "chore: Update GreptimeDB version to ${VERSION}"
git push origin $BRANCH_NAME
# Create a Pull Request.
gh pr create \
--title "chore: Update GreptimeDB version to ${VERSION}" \
--body "This PR updates the GreptimeDB version." \
--base main \
--head $BRANCH_NAME \
--reviewer zyy17 \
--reviewer daviderli614
}
update_helm_charts_version

View File

@@ -0,0 +1,42 @@
#!/bin/bash
set -e
VERSION=${VERSION}
GITHUB_TOKEN=${GITHUB_TOKEN}
update_homebrew_greptime_version() {
# Configure Git configs.
git config --global user.email update-greptime-version@greptime.com
git config --global user.name update-greptime-version
# Clone helm-charts repository.
git clone "https://x-access-token:${GITHUB_TOKEN}@github.com/GreptimeTeam/homebrew-greptime.git"
cd homebrew-greptime
# Set default remote for gh CLI
gh repo set-default GreptimeTeam/homebrew-greptime
# Checkout a new branch.
BRANCH_NAME="chore/greptimedb-${VERSION}"
git checkout -b $BRANCH_NAME
# Update version.
make update-greptime-version VERSION=${VERSION}
# Commit the changes.
git add .
git commit -s -m "chore: Update GreptimeDB version to ${VERSION}"
git push origin $BRANCH_NAME
# Create a Pull Request.
gh pr create \
--title "chore: Update GreptimeDB version to ${VERSION}" \
--body "This PR updates the GreptimeDB version." \
--base main \
--head $BRANCH_NAME \
--reviewer zyy17 \
--reviewer daviderli614
}
update_homebrew_greptime_version

View File

@@ -41,7 +41,7 @@ function upload_artifacts() {
# Updates the latest version information in AWS S3 if UPDATE_VERSION_INFO is true. # Updates the latest version information in AWS S3 if UPDATE_VERSION_INFO is true.
function update_version_info() { function update_version_info() {
if [ "$UPDATE_VERSION_INFO" == "true" ]; then if [ "$UPDATE_VERSION_INFO" == "true" ]; then
# If it's the officail release(like v1.0.0, v1.0.1, v1.0.2, etc.), update latest-version.txt. # If it's the official release(like v1.0.0, v1.0.1, v1.0.2, etc.), update latest-version.txt.
if [[ "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then if [[ "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "Updating latest-version.txt" echo "Updating latest-version.txt"
echo "$VERSION" > latest-version.txt echo "$VERSION" > latest-version.txt

View File

@@ -14,7 +14,7 @@ name: Build API docs
jobs: jobs:
apidoc: apidoc:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:

View File

@@ -16,11 +16,11 @@ on:
description: The runner uses to build linux-amd64 artifacts description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64 default: ec2-c6i.4xlarge-amd64
options: options:
- ubuntu-20.04 - ubuntu-22.04
- ubuntu-20.04-8-cores - ubuntu-22.04-8-cores
- ubuntu-20.04-16-cores - ubuntu-22.04-16-cores
- ubuntu-20.04-32-cores - ubuntu-22.04-32-cores
- ubuntu-20.04-64-cores - ubuntu-22.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G - ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G - ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G - ec2-c6i.4xlarge-amd64 # 16C32G
@@ -55,6 +55,11 @@ on:
description: Build and push images to DockerHub and ACR description: Build and push images to DockerHub and ACR
required: false required: false
default: true default: true
upload_artifacts_to_s3:
type: boolean
description: Whether upload artifacts to s3
required: false
default: false
cargo_profile: cargo_profile:
type: choice type: choice
description: The cargo profile to use in building GreptimeDB. description: The cargo profile to use in building GreptimeDB.
@@ -83,7 +88,7 @@ jobs:
allocate-runners: allocate-runners:
name: Allocate runners name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }} linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }} linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -218,7 +223,7 @@ jobs:
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
build-result: ${{ steps.set-build-result.outputs.build-result }} build-result: ${{ steps.set-build-result.outputs.build-result }}
steps: steps:
@@ -239,6 +244,13 @@ jobs:
push-latest-tag: false # Don't push the latest tag to registry. push-latest-tag: false # Don't push the latest tag to registry.
dev-mode: true # Only build the standard images. dev-mode: true # Only build the standard images.
- name: Echo Docker image tag to step summary
run: |
echo "## Docker Image Tag" >> $GITHUB_STEP_SUMMARY
echo "Image Tag: \`${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
echo "Full Image Name: \`docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
echo "Pull Command: \`docker pull docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
- name: Set build result - name: Set build result
id: set-build-result id: set-build-result
run: | run: |
@@ -251,7 +263,7 @@ jobs:
allocate-runners, allocate-runners,
release-images-to-dockerhub, release-images-to-dockerhub,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
continue-on-error: true continue-on-error: true
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -274,7 +286,7 @@ jobs:
aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }} aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }} aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }} aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
upload-to-s3: false upload-to-s3: ${{ inputs.upload_artifacts_to_s3 }}
dev-mode: true # Only build the standard images(exclude centos images). dev-mode: true # Only build the standard images(exclude centos images).
push-latest-tag: false # Don't push the latest tag to registry. push-latest-tag: false # Don't push the latest tag to registry.
update-version-info: false # Don't update the version info in S3. update-version-info: false # Don't update the version info in S3.
@@ -283,7 +295,7 @@ jobs:
name: Stop linux-amd64 runner name: Stop linux-amd64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
@@ -309,7 +321,7 @@ jobs:
name: Stop linux-arm64 runner name: Stop linux-arm64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
@@ -337,7 +349,7 @@ jobs:
needs: [ needs: [
release-images-to-dockerhub release-images-to-dockerhub
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
permissions: permissions:
issues: write issues: write

View File

@@ -22,8 +22,9 @@ concurrency:
jobs: jobs:
check-typos-and-docs: check-typos-and-docs:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Check typos and docs name: Check typos and docs
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
@@ -36,7 +37,8 @@ jobs:
|| (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1) || (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)
license-header-check: license-header-check:
runs-on: ubuntu-20.04 if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
name: Check License Header name: Check License Header
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -45,11 +47,12 @@ jobs:
- uses: korandoru/hawkeye@v5 - uses: korandoru/hawkeye@v5
check: check:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Check name: Check
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ ubuntu-20.04 ] os: [ ubuntu-latest ]
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -71,8 +74,9 @@ jobs:
run: cargo check --locked --workspace --all-targets run: cargo check --locked --workspace --all-targets
toml: toml:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Toml Check name: Toml Check
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -85,11 +89,12 @@ jobs:
run: taplo format --check run: taplo format --check
build: build:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Build GreptimeDB binaries name: Build GreptimeDB binaries
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ ubuntu-20.04 ] os: [ ubuntu-latest ]
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -127,6 +132,7 @@ jobs:
version: current version: current
fuzztest: fuzztest:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Fuzz Test name: Fuzz Test
needs: build needs: build
runs-on: ubuntu-latest runs-on: ubuntu-latest
@@ -183,11 +189,13 @@ jobs:
max-total-time: 120 max-total-time: 120
unstable-fuzztest: unstable-fuzztest:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Unstable Fuzz Test name: Unstable Fuzz Test
needs: build-greptime-ci needs: build-greptime-ci
runs-on: ubuntu-latest runs-on: ubuntu-latest
timeout-minutes: 60 timeout-minutes: 60
strategy: strategy:
fail-fast: false
matrix: matrix:
target: [ "unstable_fuzz_create_table_standalone" ] target: [ "unstable_fuzz_create_table_standalone" ]
steps: steps:
@@ -215,12 +223,12 @@ jobs:
run: | run: |
sudo apt update && sudo apt install -y libfuzzer-14-dev sudo apt update && sudo apt install -y libfuzzer-14-dev
cargo install cargo-fuzz cargo-gc-bin --force cargo install cargo-fuzz cargo-gc-bin --force
- name: Download pre-built binariy - name: Download pre-built binary
uses: actions/download-artifact@v4 uses: actions/download-artifact@v4
with: with:
name: bin name: bin
path: . path: .
- name: Unzip bianry - name: Unzip binary
run: | run: |
tar -xvf ./bin.tar.gz tar -xvf ./bin.tar.gz
rm ./bin.tar.gz rm ./bin.tar.gz
@@ -242,13 +250,19 @@ jobs:
name: unstable-fuzz-logs name: unstable-fuzz-logs
path: /tmp/unstable-greptime/ path: /tmp/unstable-greptime/
retention-days: 3 retention-days: 3
- name: Describe pods
if: failure()
shell: bash
run: |
kubectl describe pod -n my-greptimedb
build-greptime-ci: build-greptime-ci:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Build GreptimeDB binary (profile-CI) name: Build GreptimeDB binary (profile-CI)
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ ubuntu-20.04 ] os: [ ubuntu-latest ]
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -267,7 +281,7 @@ jobs:
- name: Install cargo-gc-bin - name: Install cargo-gc-bin
shell: bash shell: bash
run: cargo install cargo-gc-bin --force run: cargo install cargo-gc-bin --force
- name: Build greptime bianry - name: Build greptime binary
shell: bash shell: bash
# `cargo gc` will invoke `cargo build` with specified args # `cargo gc` will invoke `cargo build` with specified args
run: cargo gc --profile ci -- --bin greptime --features pg_kvbackend run: cargo gc --profile ci -- --bin greptime --features pg_kvbackend
@@ -285,11 +299,13 @@ jobs:
version: current version: current
distributed-fuzztest: distributed-fuzztest:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Fuzz Test (Distributed, ${{ matrix.mode.name }}, ${{ matrix.target }}) name: Fuzz Test (Distributed, ${{ matrix.mode.name }}, ${{ matrix.target }})
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: build-greptime-ci needs: build-greptime-ci
timeout-minutes: 60 timeout-minutes: 60
strategy: strategy:
fail-fast: false
matrix: matrix:
target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table", "fuzz_alter_logical_table", "fuzz_insert", "fuzz_insert_logical_table" ] target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table", "fuzz_alter_logical_table", "fuzz_insert", "fuzz_insert_logical_table" ]
mode: mode:
@@ -319,9 +335,9 @@ jobs:
name: Setup Minio name: Setup Minio
uses: ./.github/actions/setup-minio uses: ./.github/actions/setup-minio
- if: matrix.mode.kafka - if: matrix.mode.kafka
name: Setup Kafka cluser name: Setup Kafka cluster
uses: ./.github/actions/setup-kafka-cluster uses: ./.github/actions/setup-kafka-cluster
- name: Setup Etcd cluser - name: Setup Etcd cluster
uses: ./.github/actions/setup-etcd-cluster uses: ./.github/actions/setup-etcd-cluster
# Prepares for fuzz tests # Prepares for fuzz tests
- uses: arduino/setup-protoc@v3 - uses: arduino/setup-protoc@v3
@@ -394,6 +410,11 @@ jobs:
shell: bash shell: bash
run: | run: |
kubectl describe nodes kubectl describe nodes
- name: Describe pod
if: failure()
shell: bash
run: |
kubectl describe pod -n my-greptimedb
- name: Export kind logs - name: Export kind logs
if: failure() if: failure()
shell: bash shell: bash
@@ -416,11 +437,13 @@ jobs:
docker system prune -f docker system prune -f
distributed-fuzztest-with-chaos: distributed-fuzztest-with-chaos:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Fuzz Test with Chaos (Distributed, ${{ matrix.mode.name }}, ${{ matrix.target }}) name: Fuzz Test with Chaos (Distributed, ${{ matrix.mode.name }}, ${{ matrix.target }})
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: build-greptime-ci needs: build-greptime-ci
timeout-minutes: 60 timeout-minutes: 60
strategy: strategy:
fail-fast: false
matrix: matrix:
target: ["fuzz_migrate_mito_regions", "fuzz_migrate_metric_regions", "fuzz_failover_mito_regions", "fuzz_failover_metric_regions"] target: ["fuzz_migrate_mito_regions", "fuzz_migrate_metric_regions", "fuzz_failover_mito_regions", "fuzz_failover_metric_regions"]
mode: mode:
@@ -465,9 +488,9 @@ jobs:
name: Setup Minio name: Setup Minio
uses: ./.github/actions/setup-minio uses: ./.github/actions/setup-minio
- if: matrix.mode.kafka - if: matrix.mode.kafka
name: Setup Kafka cluser name: Setup Kafka cluster
uses: ./.github/actions/setup-kafka-cluster uses: ./.github/actions/setup-kafka-cluster
- name: Setup Etcd cluser - name: Setup Etcd cluster
uses: ./.github/actions/setup-etcd-cluster uses: ./.github/actions/setup-etcd-cluster
# Prepares for fuzz tests # Prepares for fuzz tests
- uses: arduino/setup-protoc@v3 - uses: arduino/setup-protoc@v3
@@ -541,6 +564,11 @@ jobs:
shell: bash shell: bash
run: | run: |
kubectl describe nodes kubectl describe nodes
- name: Describe pods
if: failure()
shell: bash
run: |
kubectl describe pod -n my-greptimedb
- name: Export kind logs - name: Export kind logs
if: failure() if: failure()
shell: bash shell: bash
@@ -563,12 +591,14 @@ jobs:
docker system prune -f docker system prune -f
sqlness: sqlness:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Sqlness Test (${{ matrix.mode.name }}) name: Sqlness Test (${{ matrix.mode.name }})
needs: build needs: build
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
fail-fast: false
matrix: matrix:
os: [ ubuntu-20.04 ] os: [ ubuntu-latest ]
mode: mode:
- name: "Basic" - name: "Basic"
opts: "" opts: ""
@@ -576,7 +606,7 @@ jobs:
- name: "Remote WAL" - name: "Remote WAL"
opts: "-w kafka -k 127.0.0.1:9092" opts: "-w kafka -k 127.0.0.1:9092"
kafka: true kafka: true
- name: "Pg Kvbackend" - name: "PostgreSQL KvBackend"
opts: "--setup-pg" opts: "--setup-pg"
kafka: false kafka: false
timeout-minutes: 60 timeout-minutes: 60
@@ -606,8 +636,9 @@ jobs:
retention-days: 3 retention-days: 3
fmt: fmt:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Rustfmt name: Rustfmt
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -623,8 +654,9 @@ jobs:
run: make fmt-check run: make fmt-check
clippy: clippy:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Clippy name: Clippy
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -648,6 +680,7 @@ jobs:
run: make clippy run: make clippy
conflict-check: conflict-check:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
name: Check for conflict name: Check for conflict
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
@@ -658,7 +691,7 @@ jobs:
uses: olivernybroe/action-conflict-finder@v4.0 uses: olivernybroe/action-conflict-finder@v4.0
test: test:
if: github.event_name != 'merge_group' if: ${{ github.repository == 'GreptimeTeam/greptimedb' && github.event_name != 'merge_group' }}
runs-on: ubuntu-22.04-arm runs-on: ubuntu-22.04-arm
timeout-minutes: 60 timeout-minutes: 60
needs: [conflict-check, clippy, fmt] needs: [conflict-check, clippy, fmt]
@@ -673,7 +706,7 @@ jobs:
- name: Install toolchain - name: Install toolchain
uses: actions-rust-lang/setup-rust-toolchain@v1 uses: actions-rust-lang/setup-rust-toolchain@v1
with: with:
cache: false cache: false
- name: Rust Cache - name: Rust Cache
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
with: with:
@@ -704,13 +737,14 @@ jobs:
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000 GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379 GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092 GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093 GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
UNITTEST_LOG_DIR: "__unittest_logs" UNITTEST_LOG_DIR: "__unittest_logs"
coverage: coverage:
if: github.event_name == 'merge_group' if: ${{ github.repository == 'GreptimeTeam/greptimedb' && github.event_name == 'merge_group' }}
runs-on: ubuntu-20.04-8-cores runs-on: ubuntu-22.04-8-cores
timeout-minutes: 60 timeout-minutes: 60
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -755,6 +789,7 @@ jobs:
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000 GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379 GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092 GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093 GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
UNITTEST_LOG_DIR: "__unittest_logs" UNITTEST_LOG_DIR: "__unittest_logs"
@@ -768,9 +803,10 @@ jobs:
verbose: true verbose: true
# compat: # compat:
# if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
# name: Compatibility Test # name: Compatibility Test
# needs: build # needs: build
# runs-on: ubuntu-20.04 # runs-on: ubuntu-22.04
# timeout-minutes: 60 # timeout-minutes: 60
# steps: # steps:
# - uses: actions/checkout@v4 # - uses: actions/checkout@v4

View File

@@ -9,7 +9,7 @@ concurrency:
jobs: jobs:
docbot: docbot:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
permissions: permissions:
pull-requests: write pull-requests: write
contents: read contents: read

View File

@@ -31,7 +31,7 @@ name: CI
jobs: jobs:
typos: typos:
name: Spell Check with Typos name: Spell Check with Typos
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
@@ -39,7 +39,7 @@ jobs:
- uses: crate-ci/typos@master - uses: crate-ci/typos@master
license-header-check: license-header-check:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
name: Check License Header name: Check License Header
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -49,29 +49,29 @@ jobs:
check: check:
name: Check name: Check
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
fmt: fmt:
name: Rustfmt name: Rustfmt
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
clippy: clippy:
name: Clippy name: Clippy
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
coverage: coverage:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
test: test:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- run: 'echo "No action required"' - run: 'echo "No action required"'
@@ -80,7 +80,7 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ ubuntu-20.04 ] os: [ ubuntu-latest ]
mode: mode:
- name: "Basic" - name: "Basic"
- name: "Remote WAL" - name: "Remote WAL"

View File

@@ -14,11 +14,11 @@ on:
description: The runner uses to build linux-amd64 artifacts description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64 default: ec2-c6i.4xlarge-amd64
options: options:
- ubuntu-20.04 - ubuntu-22.04
- ubuntu-20.04-8-cores - ubuntu-22.04-8-cores
- ubuntu-20.04-16-cores - ubuntu-22.04-16-cores
- ubuntu-20.04-32-cores - ubuntu-22.04-32-cores
- ubuntu-20.04-64-cores - ubuntu-22.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G - ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G - ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G - ec2-c6i.4xlarge-amd64 # 16C32G
@@ -70,7 +70,7 @@ jobs:
allocate-runners: allocate-runners:
name: Allocate runners name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }} linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }} linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -182,7 +182,7 @@ jobs:
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }} nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
steps: steps:
@@ -214,7 +214,7 @@ jobs:
allocate-runners, allocate-runners,
release-images-to-dockerhub, release-images-to-dockerhub,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
# When we push to ACR, it's easy to fail due to some unknown network issues. # When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this. # However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated. # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -249,7 +249,7 @@ jobs:
name: Stop linux-amd64 runner name: Stop linux-amd64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
@@ -275,7 +275,7 @@ jobs:
name: Stop linux-arm64 runner name: Stop linux-arm64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
@@ -303,7 +303,7 @@ jobs:
needs: [ needs: [
release-images-to-dockerhub release-images-to-dockerhub
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
permissions: permissions:
issues: write issues: write
env: env:

View File

@@ -13,7 +13,7 @@ jobs:
sqlness-test: sqlness-test:
name: Run sqlness test name: Run sqlness test
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v4 uses: actions/checkout@v4
@@ -107,7 +107,6 @@ jobs:
CARGO_BUILD_RUSTFLAGS: "-C linker=lld-link" CARGO_BUILD_RUSTFLAGS: "-C linker=lld-link"
RUST_BACKTRACE: 1 RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0 CARGO_INCREMENTAL: 0
RUSTUP_WINDOWS_PATH_ADD_BIN: 1 # Workaround for https://github.com/nextest-rs/nextest/issues/1493
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }} GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }} GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }} GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
@@ -118,22 +117,22 @@ jobs:
name: Run clean build on Linux name: Run clean build on Linux
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
timeout-minutes: 60 timeout-minutes: 45
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
persist-credentials: false persist-credentials: false
- uses: cachix/install-nix-action@v27 - uses: cachix/install-nix-action@v31
with: - run: nix develop --command cargo check --bin greptime
nix_path: nixpkgs=channel:nixos-24.11 env:
- run: nix develop --command cargo build CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
check-status: check-status:
name: Check status name: Check status
needs: [sqlness-test, sqlness-windows, test-on-windows] needs: [sqlness-test, sqlness-windows, test-on-windows]
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
check-result: ${{ steps.set-check-result.outputs.check-result }} check-result: ${{ steps.set-check-result.outputs.check-result }}
steps: steps:
@@ -146,7 +145,7 @@ jobs:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' && always() }} # Not requiring successful dependent jobs, always run. if: ${{ github.repository == 'GreptimeTeam/greptimedb' && always() }} # Not requiring successful dependent jobs, always run.
name: Send notification to Greptime team name: Send notification to Greptime team
needs: [check-status] needs: [check-status]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
env: env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }} SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps: steps:

42
.github/workflows/pr-labeling.yaml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: 'PR Labeling'
on:
pull_request_target:
types:
- opened
- synchronize
- reopened
permissions:
contents: read
pull-requests: write
issues: write
jobs:
labeler:
runs-on: ubuntu-latest
steps:
- name: Checkout sources
uses: actions/checkout@v4
- uses: actions/labeler@v5
with:
configuration-path: ".github/labeler.yaml"
repo-token: "${{ secrets.GITHUB_TOKEN }}"
size-label:
runs-on: ubuntu-latest
steps:
- uses: pascalgn/size-label-action@v0.5.5
env:
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
with:
sizes: >
{
"0": "XS",
"100": "S",
"300": "M",
"1000": "L",
"1500": "XL",
"2000": "XXL"
}

View File

@@ -24,12 +24,20 @@ on:
description: Release dev-builder-android image description: Release dev-builder-android image
required: false required: false
default: false default: false
update_dev_builder_image_tag:
type: boolean
description: Update the DEV_BUILDER_IMAGE_TAG in Makefile and create a PR
required: false
default: false
jobs: jobs:
release-dev-builder-images: release-dev-builder-images:
name: Release dev builder images name: Release dev builder images
if: ${{ inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }} # Only manually trigger this job. # The jobs are triggered by the following events:
runs-on: ubuntu-20.04-16-cores # 1. Manually triggered workflow_dispatch event
# 2. Push event when the PR that modifies the `rust-toolchain.toml` or `docker/dev-builder/**` is merged to main
if: ${{ github.event_name == 'push' || inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }}
runs-on: ubuntu-latest
outputs: outputs:
version: ${{ steps.set-version.outputs.version }} version: ${{ steps.set-version.outputs.version }}
steps: steps:
@@ -57,13 +65,13 @@ jobs:
version: ${{ env.VERSION }} version: ${{ env.VERSION }}
dockerhub-image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }} dockerhub-image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
dockerhub-image-registry-token: ${{ secrets.DOCKERHUB_TOKEN }} dockerhub-image-registry-token: ${{ secrets.DOCKERHUB_TOKEN }}
build-dev-builder-ubuntu: ${{ inputs.release_dev_builder_ubuntu_image }} build-dev-builder-ubuntu: ${{ inputs.release_dev_builder_ubuntu_image || github.event_name == 'push' }}
build-dev-builder-centos: ${{ inputs.release_dev_builder_centos_image }} build-dev-builder-centos: ${{ inputs.release_dev_builder_centos_image || github.event_name == 'push' }}
build-dev-builder-android: ${{ inputs.release_dev_builder_android_image }} build-dev-builder-android: ${{ inputs.release_dev_builder_android_image || github.event_name == 'push' }}
release-dev-builder-images-ecr: release-dev-builder-images-ecr:
name: Release dev builder images to AWS ECR name: Release dev builder images to AWS ECR
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
release-dev-builder-images release-dev-builder-images
] ]
@@ -85,7 +93,7 @@ jobs:
- name: Push dev-builder-ubuntu image - name: Push dev-builder-ubuntu image
shell: bash shell: bash
if: ${{ inputs.release_dev_builder_ubuntu_image }} if: ${{ inputs.release_dev_builder_ubuntu_image || github.event_name == 'push' }}
env: env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }} IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }} IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
@@ -106,7 +114,7 @@ jobs:
- name: Push dev-builder-centos image - name: Push dev-builder-centos image
shell: bash shell: bash
if: ${{ inputs.release_dev_builder_centos_image }} if: ${{ inputs.release_dev_builder_centos_image || github.event_name == 'push' }}
env: env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }} IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }} IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
@@ -127,7 +135,7 @@ jobs:
- name: Push dev-builder-android image - name: Push dev-builder-android image
shell: bash shell: bash
if: ${{ inputs.release_dev_builder_android_image }} if: ${{ inputs.release_dev_builder_android_image || github.event_name == 'push' }}
env: env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }} IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }} IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
@@ -148,7 +156,7 @@ jobs:
release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container. release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
name: Release dev builder images to CN region name: Release dev builder images to CN region
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
release-dev-builder-images release-dev-builder-images
] ]
@@ -162,7 +170,7 @@ jobs:
- name: Push dev-builder-ubuntu image - name: Push dev-builder-ubuntu image
shell: bash shell: bash
if: ${{ inputs.release_dev_builder_ubuntu_image }} if: ${{ inputs.release_dev_builder_ubuntu_image || github.event_name == 'push' }}
env: env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }} IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }} IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
@@ -176,7 +184,7 @@ jobs:
- name: Push dev-builder-centos image - name: Push dev-builder-centos image
shell: bash shell: bash
if: ${{ inputs.release_dev_builder_centos_image }} if: ${{ inputs.release_dev_builder_centos_image || github.event_name == 'push' }}
env: env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }} IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }} IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
@@ -190,7 +198,7 @@ jobs:
- name: Push dev-builder-android image - name: Push dev-builder-android image
shell: bash shell: bash
if: ${{ inputs.release_dev_builder_android_image }} if: ${{ inputs.release_dev_builder_android_image || github.event_name == 'push' }}
env: env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }} IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }} IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
@@ -201,3 +209,24 @@ jobs:
quay.io/skopeo/stable:latest \ quay.io/skopeo/stable:latest \
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION \ copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION \
docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION
update-dev-builder-image-tag:
name: Update dev-builder image tag
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
if: ${{ github.event_name == 'push' || inputs.update_dev_builder_image_tag }}
needs: [
release-dev-builder-images
]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Update dev-builder image tag
shell: bash
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
./.github/scripts/update-dev-builder-version.sh ${{ needs.release-dev-builder-images.outputs.version }}

View File

@@ -18,11 +18,11 @@ on:
description: The runner uses to build linux-amd64 artifacts description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64 default: ec2-c6i.4xlarge-amd64
options: options:
- ubuntu-20.04 - ubuntu-22.04
- ubuntu-20.04-8-cores - ubuntu-22.04-8-cores
- ubuntu-20.04-16-cores - ubuntu-22.04-16-cores
- ubuntu-20.04-32-cores - ubuntu-22.04-32-cores
- ubuntu-20.04-64-cores - ubuntu-22.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G - ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G - ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G - ec2-c6i.4xlarge-amd64 # 16C32G
@@ -88,16 +88,14 @@ env:
# Controls whether to run tests, include unit-test, integration-test and sqlness. # Controls whether to run tests, include unit-test, integration-test and sqlness.
DISABLE_RUN_TESTS: ${{ inputs.skip_test || vars.DEFAULT_SKIP_TEST }} DISABLE_RUN_TESTS: ${{ inputs.skip_test || vars.DEFAULT_SKIP_TEST }}
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313; # The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nightly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.12.0
jobs: jobs:
allocate-runners: allocate-runners:
name: Allocate runners name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
outputs: outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }} linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }} linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -112,6 +110,8 @@ jobs:
# The 'version' use as the global tag name of the release workflow. # The 'version' use as the global tag name of the release workflow.
version: ${{ steps.create-version.outputs.version }} version: ${{ steps.create-version.outputs.version }}
should-push-latest-tag: ${{ steps.check-version.outputs.should-push-latest-tag }}
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v4 uses: actions/checkout@v4
@@ -126,7 +126,7 @@ jobs:
# The create-version will create a global variable named 'version' in the global workflows. # The create-version will create a global variable named 'version' in the global workflows.
# - If it's a tag push release, the version is the tag name(${{ github.ref_name }}); # - If it's a tag push release, the version is the tag name(${{ github.ref_name }});
# - If it's a scheduled release, the version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-$buildTime', like v0.2.0-nigthly-20230313; # - If it's a scheduled release, the version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-$buildTime', like v0.2.0-nightly-20230313;
# - If it's a manual release, the version is '${{ env.NEXT_RELEASE_VERSION }}-<short-git-sha>-YYYYMMDDSS', like v0.2.0-e5b243c-2023071245; # - If it's a manual release, the version is '${{ env.NEXT_RELEASE_VERSION }}-<short-git-sha>-YYYYMMDDSS', like v0.2.0-e5b243c-2023071245;
- name: Create version - name: Create version
id: create-version id: create-version
@@ -135,9 +135,13 @@ jobs:
env: env:
GITHUB_EVENT_NAME: ${{ github.event_name }} GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_REF_NAME: ${{ github.ref_name }} GITHUB_REF_NAME: ${{ github.ref_name }}
NEXT_RELEASE_VERSION: ${{ env.NEXT_RELEASE_VERSION }}
NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }} NIGHTLY_RELEASE_PREFIX: ${{ env.NIGHTLY_RELEASE_PREFIX }}
- name: Check version
id: check-version
run: |
./.github/scripts/check-version.sh "${{ steps.create-version.outputs.version }}"
- name: Allocate linux-amd64 runner - name: Allocate linux-amd64 runner
if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }} if: ${{ inputs.build_linux_amd64_artifacts || github.event_name == 'push' || github.event_name == 'schedule' }}
uses: ./.github/actions/start-runner uses: ./.github/actions/start-runner
@@ -299,7 +303,7 @@ jobs:
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
] ]
runs-on: ubuntu-2004-16-cores runs-on: ubuntu-latest
outputs: outputs:
build-image-result: ${{ steps.set-build-image-result.outputs.build-image-result }} build-image-result: ${{ steps.set-build-image-result.outputs.build-image-result }}
steps: steps:
@@ -317,7 +321,7 @@ jobs:
image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }} image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }} image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
version: ${{ needs.allocate-runners.outputs.version }} version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: true push-latest-tag: ${{ needs.allocate-runners.outputs.should-push-latest-tag == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
- name: Set build image result - name: Set build image result
id: set-build-image-result id: set-build-image-result
@@ -335,7 +339,7 @@ jobs:
build-windows-artifacts, build-windows-artifacts,
release-images-to-dockerhub, release-images-to-dockerhub,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest-16-cores
# When we push to ACR, it's easy to fail due to some unknown network issues. # When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this. # However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated. # The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -364,7 +368,7 @@ jobs:
dev-mode: false dev-mode: false
upload-to-s3: true upload-to-s3: true
update-version-info: true update-version-info: true
push-latest-tag: true push-latest-tag: ${{ needs.allocate-runners.outputs.should-push-latest-tag == 'true' && github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
publish-github-release: publish-github-release:
name: Create GitHub release and upload artifacts name: Create GitHub release and upload artifacts
@@ -377,7 +381,7 @@ jobs:
build-windows-artifacts, build-windows-artifacts,
release-images-to-dockerhub, release-images-to-dockerhub,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
@@ -391,12 +395,12 @@ jobs:
### Stop runners ### ### Stop runners ###
# It's very necessary to split the job of releasing runners into 'stop-linux-amd64-runner' and 'stop-linux-arm64-runner'. # It's very necessary to split the job of releasing runners into 'stop-linux-amd64-runner' and 'stop-linux-arm64-runner'.
# Because we can terminate the specified EC2 instance immediately after the job is finished without uncessary waiting. # Because we can terminate the specified EC2 instance immediately after the job is finished without unnecessary waiting.
stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released. stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
name: Stop linux-amd64 runner name: Stop linux-amd64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-amd64-artifacts, build-linux-amd64-artifacts,
@@ -422,7 +426,7 @@ jobs:
name: Stop linux-arm64 runner name: Stop linux-arm64 runner
# Only run this job when the runner is allocated. # Only run this job when the runner is allocated.
if: ${{ always() }} if: ${{ always() }}
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
needs: [ needs: [
allocate-runners, allocate-runners,
build-linux-arm64-artifacts, build-linux-arm64-artifacts,
@@ -444,11 +448,11 @@ jobs:
aws-region: ${{ vars.EC2_RUNNER_REGION }} aws-region: ${{ vars.EC2_RUNNER_REGION }}
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
bump-doc-version: bump-downstream-repo-versions:
name: Bump doc version name: Bump downstream repo versions
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }} if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [allocate-runners] needs: [allocate-runners, publish-github-release]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions: permissions:
issues: write # Allows the action to create issues for cyborg. issues: write # Allows the action to create issues for cyborg.
@@ -459,13 +463,58 @@ jobs:
fetch-depth: 0 fetch-depth: 0
persist-credentials: false persist-credentials: false
- uses: ./.github/actions/setup-cyborg - uses: ./.github/actions/setup-cyborg
- name: Bump doc version - name: Bump downstream repo versions
working-directory: cyborg working-directory: cyborg
run: pnpm tsx bin/bump-doc-version.ts run: pnpm tsx bin/bump-versions.ts
env: env:
TARGET_REPOS: website,docs,demo
VERSION: ${{ needs.allocate-runners.outputs.version }} VERSION: ${{ needs.allocate-runners.outputs.version }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
WEBSITE_REPO_TOKEN: ${{ secrets.WEBSITE_REPO_TOKEN }}
DOCS_REPO_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }} DOCS_REPO_TOKEN: ${{ secrets.DOCS_REPO_TOKEN }}
DEMO_REPO_TOKEN: ${{ secrets.DEMO_REPO_TOKEN }}
bump-helm-charts-version:
name: Bump helm charts version
if: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
needs: [allocate-runners, publish-github-release]
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Bump helm charts version
env:
GITHUB_TOKEN: ${{ secrets.HELM_CHARTS_REPO_TOKEN }}
VERSION: ${{ needs.allocate-runners.outputs.version }}
run: |
./.github/scripts/update-helm-charts-version.sh
bump-homebrew-greptime-version:
name: Bump homebrew greptime version
if: ${{ github.ref_type == 'tag' && !contains(github.ref_name, 'nightly') && github.event_name != 'schedule' }}
needs: [allocate-runners, publish-github-release]
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Bump homebrew greptime version
env:
GITHUB_TOKEN: ${{ secrets.HOMEBREW_GREPTIME_REPO_TOKEN }}
VERSION: ${{ needs.allocate-runners.outputs.version }}
run: |
./.github/scripts/update-homebrew-greptme-version.sh
notification: notification:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'push' || github.event_name == 'schedule') && always() }} if: ${{ github.repository == 'GreptimeTeam/greptimedb' && (github.event_name == 'push' || github.event_name == 'schedule') && always() }}
@@ -475,7 +524,7 @@ jobs:
build-macos-artifacts, build-macos-artifacts,
build-windows-artifacts, build-windows-artifacts,
] ]
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions: permissions:
issues: write # Allows the action to create issues for cyborg. issues: write # Allows the action to create issues for cyborg.

View File

@@ -11,14 +11,17 @@ concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true cancel-in-progress: true
permissions:
issues: write
contents: write
pull-requests: write
jobs: jobs:
check: check:
runs-on: ubuntu-20.04 runs-on: ubuntu-latest
timeout-minutes: 10 timeout-minutes: 10
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with:
persist-credentials: false
- uses: ./.github/actions/setup-cyborg - uses: ./.github/actions/setup-cyborg
- name: Check Pull Request - name: Check Pull Request
working-directory: cyborg working-directory: cyborg

7
.gitignore vendored
View File

@@ -54,3 +54,10 @@ tests-fuzz/corpus/
# Nix # Nix
.direnv .direnv
.envrc .envrc
## default data home
greptimedb_data
# github
!/.github

157
Cargo.lock generated
View File

@@ -185,7 +185,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c"
[[package]] [[package]]
name = "api" name = "api"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"common-base", "common-base",
"common-decimal", "common-decimal",
@@ -710,7 +710,7 @@ dependencies = [
[[package]] [[package]]
name = "auth" name = "auth"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"async-trait", "async-trait",
@@ -1324,7 +1324,7 @@ dependencies = [
[[package]] [[package]]
name = "cache" name = "cache"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"catalog", "catalog",
"common-error", "common-error",
@@ -1348,7 +1348,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]] [[package]]
name = "catalog" name = "catalog"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"arrow", "arrow",
@@ -1661,7 +1661,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]] [[package]]
name = "cli" name = "cli"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"auth", "auth",
@@ -1703,7 +1703,7 @@ dependencies = [
"session", "session",
"snafu 0.8.5", "snafu 0.8.5",
"store-api", "store-api",
"substrait 0.12.0", "substrait 0.12.2",
"table", "table",
"tempfile", "tempfile",
"tokio", "tokio",
@@ -1712,7 +1712,7 @@ dependencies = [
[[package]] [[package]]
name = "client" name = "client"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"arc-swap", "arc-swap",
@@ -1739,7 +1739,7 @@ dependencies = [
"rand", "rand",
"serde_json", "serde_json",
"snafu 0.8.5", "snafu 0.8.5",
"substrait 0.12.0", "substrait 0.12.2",
"substrait 0.37.3", "substrait 0.37.3",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
@@ -1780,7 +1780,7 @@ dependencies = [
[[package]] [[package]]
name = "cmd" name = "cmd"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"auth", "auth",
@@ -1791,6 +1791,7 @@ dependencies = [
"clap 4.5.19", "clap 4.5.19",
"cli", "cli",
"client", "client",
"colored",
"common-base", "common-base",
"common-catalog", "common-catalog",
"common-config", "common-config",
@@ -1825,7 +1826,10 @@ dependencies = [
"mito2", "mito2",
"moka", "moka",
"nu-ansi-term", "nu-ansi-term",
"object-store",
"parquet",
"plugins", "plugins",
"pprof",
"prometheus", "prometheus",
"prost 0.13.3", "prost 0.13.3",
"query", "query",
@@ -1841,7 +1845,7 @@ dependencies = [
"similar-asserts", "similar-asserts",
"snafu 0.8.5", "snafu 0.8.5",
"store-api", "store-api",
"substrait 0.12.0", "substrait 0.12.2",
"table", "table",
"temp-env", "temp-env",
"tempfile", "tempfile",
@@ -1858,6 +1862,16 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
[[package]]
name = "colored"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
dependencies = [
"lazy_static",
"windows-sys 0.59.0",
]
[[package]] [[package]]
name = "combine" name = "combine"
version = "4.6.7" version = "4.6.7"
@@ -1887,7 +1901,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335"
[[package]] [[package]]
name = "common-base" name = "common-base"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"anymap2", "anymap2",
"async-trait", "async-trait",
@@ -1909,11 +1923,11 @@ dependencies = [
[[package]] [[package]]
name = "common-catalog" name = "common-catalog"
version = "0.12.0" version = "0.12.2"
[[package]] [[package]]
name = "common-config" name = "common-config"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"common-base", "common-base",
"common-error", "common-error",
@@ -1938,7 +1952,7 @@ dependencies = [
[[package]] [[package]]
name = "common-datasource" name = "common-datasource"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-schema", "arrow-schema",
@@ -1974,7 +1988,7 @@ dependencies = [
[[package]] [[package]]
name = "common-decimal" name = "common-decimal"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"bigdecimal 0.4.5", "bigdecimal 0.4.5",
"common-error", "common-error",
@@ -1987,7 +2001,7 @@ dependencies = [
[[package]] [[package]]
name = "common-error" name = "common-error"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"http 1.1.0", "http 1.1.0",
"snafu 0.8.5", "snafu 0.8.5",
@@ -1997,7 +2011,7 @@ dependencies = [
[[package]] [[package]]
name = "common-frontend" name = "common-frontend"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"common-error", "common-error",
@@ -2007,7 +2021,7 @@ dependencies = [
[[package]] [[package]]
name = "common-function" name = "common-function"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"api", "api",
@@ -2055,7 +2069,7 @@ dependencies = [
[[package]] [[package]]
name = "common-greptimedb-telemetry" name = "common-greptimedb-telemetry"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"common-runtime", "common-runtime",
@@ -2072,7 +2086,7 @@ dependencies = [
[[package]] [[package]]
name = "common-grpc" name = "common-grpc"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"arrow-flight", "arrow-flight",
@@ -2100,7 +2114,7 @@ dependencies = [
[[package]] [[package]]
name = "common-grpc-expr" name = "common-grpc-expr"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"common-base", "common-base",
@@ -2119,7 +2133,7 @@ dependencies = [
[[package]] [[package]]
name = "common-macro" name = "common-macro"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"arc-swap", "arc-swap",
"common-query", "common-query",
@@ -2133,7 +2147,7 @@ dependencies = [
[[package]] [[package]]
name = "common-mem-prof" name = "common-mem-prof"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"common-error", "common-error",
"common-macro", "common-macro",
@@ -2146,7 +2160,7 @@ dependencies = [
[[package]] [[package]]
name = "common-meta" name = "common-meta"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"anymap2", "anymap2",
"api", "api",
@@ -2206,7 +2220,7 @@ dependencies = [
[[package]] [[package]]
name = "common-options" name = "common-options"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"common-grpc", "common-grpc",
"humantime-serde", "humantime-serde",
@@ -2215,11 +2229,11 @@ dependencies = [
[[package]] [[package]]
name = "common-plugins" name = "common-plugins"
version = "0.12.0" version = "0.12.2"
[[package]] [[package]]
name = "common-pprof" name = "common-pprof"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"common-error", "common-error",
"common-macro", "common-macro",
@@ -2231,7 +2245,7 @@ dependencies = [
[[package]] [[package]]
name = "common-procedure" name = "common-procedure"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"async-trait", "async-trait",
@@ -2258,7 +2272,7 @@ dependencies = [
[[package]] [[package]]
name = "common-procedure-test" name = "common-procedure-test"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"common-procedure", "common-procedure",
@@ -2266,7 +2280,7 @@ dependencies = [
[[package]] [[package]]
name = "common-query" name = "common-query"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"async-trait", "async-trait",
@@ -2292,7 +2306,7 @@ dependencies = [
[[package]] [[package]]
name = "common-recordbatch" name = "common-recordbatch"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"arc-swap", "arc-swap",
"common-error", "common-error",
@@ -2311,7 +2325,7 @@ dependencies = [
[[package]] [[package]]
name = "common-runtime" name = "common-runtime"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"clap 4.5.19", "clap 4.5.19",
@@ -2341,7 +2355,7 @@ dependencies = [
[[package]] [[package]]
name = "common-telemetry" name = "common-telemetry"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"atty", "atty",
"backtrace", "backtrace",
@@ -2369,7 +2383,7 @@ dependencies = [
[[package]] [[package]]
name = "common-test-util" name = "common-test-util"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"client", "client",
"common-query", "common-query",
@@ -2381,7 +2395,7 @@ dependencies = [
[[package]] [[package]]
name = "common-time" name = "common-time"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"arrow", "arrow",
"chrono", "chrono",
@@ -2399,7 +2413,7 @@ dependencies = [
[[package]] [[package]]
name = "common-version" name = "common-version"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"build-data", "build-data",
"const_format", "const_format",
@@ -2409,7 +2423,7 @@ dependencies = [
[[package]] [[package]]
name = "common-wal" name = "common-wal"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"common-base", "common-base",
"common-error", "common-error",
@@ -3340,7 +3354,7 @@ dependencies = [
[[package]] [[package]]
name = "datanode" name = "datanode"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"arrow-flight", "arrow-flight",
@@ -3392,7 +3406,7 @@ dependencies = [
"session", "session",
"snafu 0.8.5", "snafu 0.8.5",
"store-api", "store-api",
"substrait 0.12.0", "substrait 0.12.2",
"table", "table",
"tokio", "tokio",
"toml 0.8.19", "toml 0.8.19",
@@ -3401,7 +3415,7 @@ dependencies = [
[[package]] [[package]]
name = "datatypes" name = "datatypes"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"arrow", "arrow",
"arrow-array", "arrow-array",
@@ -4045,7 +4059,7 @@ dependencies = [
[[package]] [[package]]
name = "file-engine" name = "file-engine"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"async-trait", "async-trait",
@@ -4155,7 +4169,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8"
[[package]] [[package]]
name = "flow" name = "flow"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"arrow", "arrow",
@@ -4165,7 +4179,6 @@ dependencies = [
"bytes", "bytes",
"cache", "cache",
"catalog", "catalog",
"chrono",
"client", "client",
"common-base", "common-base",
"common-catalog", "common-catalog",
@@ -4217,7 +4230,7 @@ dependencies = [
"snafu 0.8.5", "snafu 0.8.5",
"store-api", "store-api",
"strum 0.25.0", "strum 0.25.0",
"substrait 0.12.0", "substrait 0.12.2",
"table", "table",
"tokio", "tokio",
"tonic 0.12.3", "tonic 0.12.3",
@@ -4272,7 +4285,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa"
[[package]] [[package]]
name = "frontend" name = "frontend"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"arc-swap", "arc-swap",
@@ -5540,7 +5553,7 @@ dependencies = [
[[package]] [[package]]
name = "index" name = "index"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"asynchronous-codec", "asynchronous-codec",
@@ -6332,7 +6345,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]] [[package]]
name = "log-query" name = "log-query"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"chrono", "chrono",
"common-error", "common-error",
@@ -6344,7 +6357,7 @@ dependencies = [
[[package]] [[package]]
name = "log-store" name = "log-store"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"async-trait", "async-trait",
@@ -6637,7 +6650,7 @@ dependencies = [
[[package]] [[package]]
name = "meta-client" name = "meta-client"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"async-trait", "async-trait",
@@ -6664,7 +6677,7 @@ dependencies = [
[[package]] [[package]]
name = "meta-srv" name = "meta-srv"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"async-trait", "async-trait",
@@ -6750,7 +6763,7 @@ dependencies = [
[[package]] [[package]]
name = "metric-engine" name = "metric-engine"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"aquamarine", "aquamarine",
@@ -6848,7 +6861,7 @@ dependencies = [
[[package]] [[package]]
name = "mito2" name = "mito2"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"aquamarine", "aquamarine",
@@ -7545,7 +7558,7 @@ dependencies = [
[[package]] [[package]]
name = "object-store" name = "object-store"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes", "bytes",
@@ -7794,7 +7807,7 @@ dependencies = [
[[package]] [[package]]
name = "operator" name = "operator"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"api", "api",
@@ -7842,7 +7855,7 @@ dependencies = [
"sql", "sql",
"sqlparser 0.52.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=71dd86058d2af97b9925093d40c4e03360403170)", "sqlparser 0.52.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=71dd86058d2af97b9925093d40c4e03360403170)",
"store-api", "store-api",
"substrait 0.12.0", "substrait 0.12.2",
"table", "table",
"tokio", "tokio",
"tokio-util", "tokio-util",
@@ -8079,7 +8092,7 @@ dependencies = [
[[package]] [[package]]
name = "partition" name = "partition"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"async-trait", "async-trait",
@@ -8347,7 +8360,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]] [[package]]
name = "pipeline" name = "pipeline"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"api", "api",
@@ -8487,7 +8500,7 @@ dependencies = [
[[package]] [[package]]
name = "plugins" name = "plugins"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"auth", "auth",
"clap 4.5.19", "clap 4.5.19",
@@ -8749,7 +8762,7 @@ dependencies = [
[[package]] [[package]]
name = "promql" name = "promql"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"async-trait", "async-trait",
@@ -8994,7 +9007,7 @@ dependencies = [
[[package]] [[package]]
name = "puffin" name = "puffin"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-compression 0.4.13", "async-compression 0.4.13",
"async-trait", "async-trait",
@@ -9035,7 +9048,7 @@ dependencies = [
[[package]] [[package]]
name = "query" name = "query"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"api", "api",
@@ -9100,7 +9113,7 @@ dependencies = [
"sqlparser 0.52.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=71dd86058d2af97b9925093d40c4e03360403170)", "sqlparser 0.52.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=71dd86058d2af97b9925093d40c4e03360403170)",
"statrs", "statrs",
"store-api", "store-api",
"substrait 0.12.0", "substrait 0.12.2",
"table", "table",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
@@ -10445,7 +10458,7 @@ dependencies = [
[[package]] [[package]]
name = "servers" name = "servers"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"api", "api",
@@ -10562,7 +10575,7 @@ dependencies = [
[[package]] [[package]]
name = "session" name = "session"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"arc-swap", "arc-swap",
@@ -10871,7 +10884,7 @@ dependencies = [
[[package]] [[package]]
name = "sql" name = "sql"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"chrono", "chrono",
@@ -10925,7 +10938,7 @@ dependencies = [
[[package]] [[package]]
name = "sqlness-runner" name = "sqlness-runner"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"clap 4.5.19", "clap 4.5.19",
@@ -11242,7 +11255,7 @@ dependencies = [
[[package]] [[package]]
name = "store-api" name = "store-api"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"aquamarine", "aquamarine",
@@ -11372,7 +11385,7 @@ dependencies = [
[[package]] [[package]]
name = "substrait" name = "substrait"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"bytes", "bytes",
@@ -11553,7 +11566,7 @@ dependencies = [
[[package]] [[package]]
name = "table" name = "table"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"async-trait", "async-trait",
@@ -11804,7 +11817,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
[[package]] [[package]]
name = "tests-fuzz" name = "tests-fuzz"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"async-trait", "async-trait",
@@ -11848,7 +11861,7 @@ dependencies = [
[[package]] [[package]]
name = "tests-integration" name = "tests-integration"
version = "0.12.0" version = "0.12.2"
dependencies = [ dependencies = [
"api", "api",
"arrow-flight", "arrow-flight",
@@ -11914,7 +11927,7 @@ dependencies = [
"sql", "sql",
"sqlx", "sqlx",
"store-api", "store-api",
"substrait 0.12.0", "substrait 0.12.2",
"table", "table",
"tempfile", "tempfile",
"time", "time",

View File

@@ -67,7 +67,7 @@ members = [
resolver = "2" resolver = "2"
[workspace.package] [workspace.package]
version = "0.12.0" version = "0.12.2"
edition = "2021" edition = "2021"
license = "Apache-2.0" license = "Apache-2.0"

View File

@@ -231,6 +231,7 @@ overwrite_entry_start_id = false
# secret_access_key = "123456" # secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com" # endpoint = "https://s3.amazonaws.com"
# region = "us-west-2" # region = "us-west-2"
# enable_virtual_host_style = false
# Example of using Oss as the storage. # Example of using Oss as the storage.
# [storage] # [storage]

View File

@@ -318,6 +318,7 @@ retry_delay = "500ms"
# secret_access_key = "123456" # secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com" # endpoint = "https://s3.amazonaws.com"
# region = "us-west-2" # region = "us-west-2"
# enable_virtual_host_style = false
# Example of using Oss as the storage. # Example of using Oss as the storage.
# [storage] # [storage]

156
cyborg/bin/bump-versions.ts Normal file
View File

@@ -0,0 +1,156 @@
/*
* Copyright 2023 Greptime Team
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import * as core from "@actions/core";
import {obtainClient} from "@/common";
interface RepoConfig {
tokenEnv: string;
repo: string;
workflowLogic: (version: string) => [string, string] | null;
}
const REPO_CONFIGS: Record<string, RepoConfig> = {
website: {
tokenEnv: "WEBSITE_REPO_TOKEN",
repo: "website",
workflowLogic: (version: string) => {
// Skip nightly versions for website
if (version.includes('nightly')) {
console.log('Nightly version detected for website, skipping workflow trigger.');
return null;
}
return ['bump-patch-version.yml', version];
}
},
demo: {
tokenEnv: "DEMO_REPO_TOKEN",
repo: "demo-scene",
workflowLogic: (version: string) => {
// Skip nightly versions for demo
if (version.includes('nightly')) {
console.log('Nightly version detected for demo, skipping workflow trigger.');
return null;
}
return ['bump-patch-version.yml', version];
}
},
docs: {
tokenEnv: "DOCS_REPO_TOKEN",
repo: "docs",
workflowLogic: (version: string) => {
// Check if it's a nightly version
if (version.includes('nightly')) {
return ['bump-nightly-version.yml', version];
}
const parts = version.split('.');
if (parts.length !== 3) {
throw new Error('Invalid version format');
}
// If patch version (last number) is 0, it's a major version
// Return only major.minor version
if (parts[2] === '0') {
return ['bump-version.yml', `${parts[0]}.${parts[1]}`];
}
// Otherwise it's a patch version, use full version
return ['bump-patch-version.yml', version];
}
}
};
async function triggerWorkflow(repoConfig: RepoConfig, workflowId: string, version: string) {
const client = obtainClient(repoConfig.tokenEnv);
try {
await client.rest.actions.createWorkflowDispatch({
owner: "GreptimeTeam",
repo: repoConfig.repo,
workflow_id: workflowId,
ref: "main",
inputs: {
version,
},
});
console.log(`Successfully triggered ${workflowId} workflow for ${repoConfig.repo} with version ${version}`);
} catch (error) {
core.setFailed(`Failed to trigger workflow for ${repoConfig.repo}: ${error.message}`);
throw error;
}
}
async function processRepo(repoName: string, version: string) {
const repoConfig = REPO_CONFIGS[repoName];
if (!repoConfig) {
throw new Error(`Unknown repository: ${repoName}`);
}
try {
const workflowResult = repoConfig.workflowLogic(version);
if (workflowResult === null) {
// Skip this repo (e.g., nightly version for website)
return;
}
const [workflowId, apiVersion] = workflowResult;
await triggerWorkflow(repoConfig, workflowId, apiVersion);
} catch (error) {
core.setFailed(`Error processing ${repoName} with version ${version}: ${error.message}`);
throw error;
}
}
async function main() {
const version = process.env.VERSION;
if (!version) {
core.setFailed("VERSION environment variable is required");
process.exit(1);
}
// Remove 'v' prefix if exists
const cleanVersion = version.startsWith('v') ? version.slice(1) : version;
// Get target repositories from environment variable
// Default to both if not specified
const targetRepos = process.env.TARGET_REPOS?.split(',').map(repo => repo.trim()) || ['website', 'docs'];
console.log(`Processing version ${cleanVersion} for repositories: ${targetRepos.join(', ')}`);
const errors: string[] = [];
// Process each repository
for (const repo of targetRepos) {
try {
await processRepo(repo, cleanVersion);
} catch (error) {
errors.push(`${repo}: ${error.message}`);
}
}
if (errors.length > 0) {
core.setFailed(`Failed to process some repositories: ${errors.join('; ')}`);
process.exit(1);
}
console.log('All repositories processed successfully');
}
// Execute main function
main().catch((error) => {
core.setFailed(`Unexpected error: ${error.message}`);
process.exit(1);
});

View File

@@ -55,12 +55,25 @@ async function main() {
await client.rest.issues.addLabels({ await client.rest.issues.addLabels({
owner, repo, issue_number: number, labels: [labelDocsRequired], owner, repo, issue_number: number, labels: [labelDocsRequired],
}) })
// Get available assignees for the docs repo
const assigneesResponse = await docsClient.rest.issues.listAssignees({
owner: 'GreptimeTeam',
repo: 'docs',
})
const validAssignees = assigneesResponse.data.map(assignee => assignee.login)
core.info(`Available assignees: ${validAssignees.join(', ')}`)
// Check if the actor is a valid assignee, otherwise fallback to fengjiachun
const assignee = validAssignees.includes(actor) ? actor : 'fengjiachun'
core.info(`Assigning issue to: ${assignee}`)
await docsClient.rest.issues.create({ await docsClient.rest.issues.create({
owner: 'GreptimeTeam', owner: 'GreptimeTeam',
repo: 'docs', repo: 'docs',
title: `Update docs for ${title}`, title: `Update docs for ${title}`,
body: `A document change request is generated from ${html_url}`, body: `A document change request is generated from ${html_url}`,
assignee: actor, assignee: assignee,
}).then((res) => { }).then((res) => {
core.info(`Created issue ${res.data}`) core.info(`Created issue ${res.data}`)
}) })

View File

@@ -53,6 +53,54 @@ get_arch_type() {
esac esac
} }
# Verify SHA256 checksum
verify_sha256() {
file="$1"
expected_sha256="$2"
if command -v sha256sum >/dev/null 2>&1; then
actual_sha256=$(sha256sum "$file" | cut -d' ' -f1)
elif command -v shasum >/dev/null 2>&1; then
actual_sha256=$(shasum -a 256 "$file" | cut -d' ' -f1)
else
echo "Warning: No SHA256 verification tool found (sha256sum or shasum). Skipping checksum verification."
return 0
fi
if [ "$actual_sha256" = "$expected_sha256" ]; then
echo "SHA256 checksum verified successfully."
return 0
else
echo "Error: SHA256 checksum verification failed!"
echo "Expected: $expected_sha256"
echo "Actual: $actual_sha256"
return 1
fi
}
# Prompt for user confirmation (compatible with different shells)
prompt_confirmation() {
message="$1"
printf "%s (y/N): " "$message"
# Try to read user input, fallback if read fails
answer=""
if read answer </dev/tty 2>/dev/null; then
case "$answer" in
[Yy]|[Yy][Ee][Ss])
return 0
;;
*)
return 1
;;
esac
else
echo ""
echo "Cannot read user input. Defaulting to No."
return 1
fi
}
download_artifact() { download_artifact() {
if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then if [ -n "${OS_TYPE}" ] && [ -n "${ARCH_TYPE}" ]; then
# Use the latest stable released version. # Use the latest stable released version.
@@ -71,17 +119,104 @@ download_artifact() {
fi fi
echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}" echo "Downloading ${BIN}, OS: ${OS_TYPE}, Arch: ${ARCH_TYPE}, Version: ${VERSION}"
PACKAGE_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}.tar.gz" PKG_NAME="${BIN}-${OS_TYPE}-${ARCH_TYPE}-${VERSION}"
PACKAGE_NAME="${PKG_NAME}.tar.gz"
SHA256_FILE="${PKG_NAME}.sha256sum"
if [ -n "${PACKAGE_NAME}" ]; then if [ -n "${PACKAGE_NAME}" ]; then
wget "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}" # Check if files already exist and prompt for override
if [ -f "${PACKAGE_NAME}" ]; then
echo "File ${PACKAGE_NAME} already exists."
if prompt_confirmation "Do you want to override it?"; then
echo "Overriding existing file..."
rm -f "${PACKAGE_NAME}"
else
echo "Skipping download. Using existing file."
fi
fi
if [ -f "${BIN}" ]; then
echo "Binary ${BIN} already exists."
if prompt_confirmation "Do you want to override it?"; then
echo "Will override existing binary..."
rm -f "${BIN}"
else
echo "Installation cancelled."
exit 0
fi
fi
# Download package if not exists
if [ ! -f "${PACKAGE_NAME}" ]; then
echo "Downloading ${PACKAGE_NAME}..."
# Use curl instead of wget for better compatibility
if command -v curl >/dev/null 2>&1; then
if ! curl -L -o "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
echo "Error: Failed to download ${PACKAGE_NAME}"
exit 1
fi
elif command -v wget >/dev/null 2>&1; then
if ! wget -O "${PACKAGE_NAME}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${PACKAGE_NAME}"; then
echo "Error: Failed to download ${PACKAGE_NAME}"
exit 1
fi
else
echo "Error: Neither curl nor wget is available for downloading."
exit 1
fi
fi
# Download and verify SHA256 checksum
echo "Downloading SHA256 checksum..."
sha256_download_success=0
if command -v curl >/dev/null 2>&1; then
if curl -L -s -o "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
sha256_download_success=1
fi
elif command -v wget >/dev/null 2>&1; then
if wget -q -O "${SHA256_FILE}" "https://github.com/${GITHUB_ORG}/${GITHUB_REPO}/releases/download/${VERSION}/${SHA256_FILE}" 2>/dev/null; then
sha256_download_success=1
fi
fi
if [ $sha256_download_success -eq 1 ] && [ -f "${SHA256_FILE}" ]; then
expected_sha256=$(cat "${SHA256_FILE}" | cut -d' ' -f1)
if [ -n "$expected_sha256" ]; then
if ! verify_sha256 "${PACKAGE_NAME}" "${expected_sha256}"; then
echo "SHA256 verification failed. Removing downloaded file."
rm -f "${PACKAGE_NAME}" "${SHA256_FILE}"
exit 1
fi
else
echo "Warning: Could not parse SHA256 checksum from file."
fi
rm -f "${SHA256_FILE}"
else
echo "Warning: Could not download SHA256 checksum file. Skipping verification."
fi
# Extract the binary and clean the rest. # Extract the binary and clean the rest.
tar xvf "${PACKAGE_NAME}" && \ echo "Extracting ${PACKAGE_NAME}..."
mv "${PACKAGE_NAME%.tar.gz}/${BIN}" "${PWD}" && \ if ! tar xf "${PACKAGE_NAME}"; then
rm -r "${PACKAGE_NAME}" && \ echo "Error: Failed to extract ${PACKAGE_NAME}"
rm -r "${PACKAGE_NAME%.tar.gz}" && \ exit 1
echo "Run './${BIN} --help' to get started" fi
# Find the binary in the extracted directory
extracted_dir="${PACKAGE_NAME%.tar.gz}"
if [ -f "${extracted_dir}/${BIN}" ]; then
mv "${extracted_dir}/${BIN}" "${PWD}/"
rm -f "${PACKAGE_NAME}"
rm -rf "${extracted_dir}"
chmod +x "${BIN}"
echo "Installation completed successfully!"
echo "Run './${BIN} --help' to get started"
else
echo "Error: Binary ${BIN} not found in extracted archive"
rm -f "${PACKAGE_NAME}"
rm -rf "${extracted_dir}"
exit 1
fi
fi fi
fi fi
} }

View File

@@ -16,6 +16,7 @@
mod client; mod client;
pub mod client_manager; pub mod client_manager;
#[cfg(feature = "testing")]
mod database; mod database;
pub mod error; pub mod error;
pub mod flow; pub mod flow;
@@ -33,6 +34,7 @@ pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
use snafu::OptionExt; use snafu::OptionExt;
pub use self::client::Client; pub use self::client::Client;
#[cfg(feature = "testing")]
pub use self::database::Database; pub use self::database::Database;
pub use self::error::{Error, Result}; pub use self::error::{Error, Result};
use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu}; use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};

View File

@@ -9,6 +9,10 @@ default-run = "greptime"
name = "greptime" name = "greptime"
path = "src/bin/greptime.rs" path = "src/bin/greptime.rs"
[[bin]]
name = "objbench"
path = "src/bin/objbench.rs"
[features] [features]
default = ["servers/pprof", "servers/mem-prof"] default = ["servers/pprof", "servers/mem-prof"]
tokio-console = ["common-telemetry/tokio-console"] tokio-console = ["common-telemetry/tokio-console"]
@@ -20,6 +24,7 @@ workspace = true
async-trait.workspace = true async-trait.workspace = true
auth.workspace = true auth.workspace = true
base64.workspace = true base64.workspace = true
colored = "2.0"
cache.workspace = true cache.workspace = true
catalog.workspace = true catalog.workspace = true
chrono.workspace = true chrono.workspace = true
@@ -55,6 +60,9 @@ futures.workspace = true
human-panic = "2.0" human-panic = "2.0"
humantime.workspace = true humantime.workspace = true
lazy_static.workspace = true lazy_static.workspace = true
object-store.workspace = true
parquet = "53"
pprof = "0.14"
meta-client.workspace = true meta-client.workspace = true
meta-srv.workspace = true meta-srv.workspace = true
metric-engine.workspace = true metric-engine.workspace = true

View File

@@ -21,6 +21,8 @@ use cmd::{cli, datanode, flownode, frontend, metasrv, standalone, App};
use common_version::version; use common_version::version;
use servers::install_ring_crypto_provider; use servers::install_ring_crypto_provider;
pub mod objbench;
#[derive(Parser)] #[derive(Parser)]
#[command(name = "greptime", author, version, long_version = version(), about)] #[command(name = "greptime", author, version, long_version = version(), about)]
#[command(propagate_version = true)] #[command(propagate_version = true)]

602
src/cmd/src/bin/objbench.rs Normal file
View File

@@ -0,0 +1,602 @@
// Copyright 2025 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::path::{Path, PathBuf};
use std::time::Instant;
use clap::Parser;
use cmd::error::{self, Result};
use colored::Colorize;
use datanode::config::ObjectStoreConfig;
use mito2::config::{FulltextIndexConfig, MitoConfig, Mode};
use mito2::read::Source;
use mito2::sst::file::{FileHandle, FileId, FileMeta};
use mito2::sst::file_purger::{FilePurger, FilePurgerRef, PurgeRequest};
use mito2::sst::parquet::{WriteOptions, PARQUET_METADATA_KEY};
use mito2::{build_access_layer, Metrics, OperationType, SstWriteRequest};
use object_store::ObjectStore;
use serde::{Deserialize, Serialize};
use store_api::metadata::{RegionMetadata, RegionMetadataRef};
#[tokio::main]
pub async fn main() {
// common_telemetry::init_default_ut_logging();
let cmd = Command::parse();
if let Err(e) = cmd.run().await {
eprintln!("{}: {}", "Error".red().bold(), e);
std::process::exit(1);
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
#[serde(default)]
pub struct StorageConfigWrapper {
storage: StorageConfig,
}
/// Storage engine config
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
#[serde(default)]
pub struct StorageConfig {
/// The working directory of database
pub data_home: String,
#[serde(flatten)]
pub store: ObjectStoreConfig,
}
#[derive(Debug, Parser)]
pub struct Command {
/// Path to the object-store config file (TOML). Must deserialize into datanode::config::ObjectStoreConfig.
#[clap(long, value_name = "FILE")]
pub config: PathBuf,
/// Source SST file path in object-store (e.g. "region_dir/<uuid>.parquet").
#[clap(long, value_name = "PATH")]
pub source: String,
/// Target SST file path in object-store; its parent directory is used as destination region dir.
#[clap(long, value_name = "PATH")]
pub target: String,
/// Verbose output
#[clap(short, long, default_value_t = false)]
pub verbose: bool,
/// Output file path for pprof flamegraph (enables profiling)
#[clap(long, value_name = "FILE")]
pub pprof_file: Option<PathBuf>,
}
impl Command {
pub async fn run(&self) -> Result<()> {
if self.verbose {
common_telemetry::init_default_ut_logging();
}
println!("{}", "Starting objbench...".cyan().bold());
// Build object store from config
let cfg_str = std::fs::read_to_string(&self.config).map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("failed to read config {}: {e}", self.config.display()),
}
.build()
})?;
let store_cfg: StorageConfigWrapper = toml::from_str(&cfg_str).map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("failed to parse config {}: {e}", self.config.display()),
}
.build()
})?;
let object_store = build_object_store(&store_cfg.storage).await?;
println!("{} Object store initialized", "".green());
// Prepare source identifiers
let (src_region_dir, src_file_id) = split_sst_path(&self.source)?;
println!("{} Source path parsed: {}", "".green(), self.source);
// Load parquet metadata to extract RegionMetadata and file stats
println!("{}", "Loading parquet metadata...".yellow());
let file_size = object_store
.stat(&self.source)
.await
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("stat failed: {e}"),
}
.build()
})?
.content_length();
let parquet_meta = load_parquet_metadata(object_store.clone(), &self.source, file_size)
.await
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("read parquet metadata failed: {e}"),
}
.build()
})?;
let region_meta = extract_region_metadata(&self.source, &parquet_meta)?;
let num_rows = parquet_meta.file_metadata().num_rows() as u64;
let num_row_groups = parquet_meta.num_row_groups() as u64;
println!(
"{} Metadata loaded - rows: {}, size: {} bytes",
"".green(),
num_rows,
file_size
);
// Build a FileHandle for the source file
let file_meta = FileMeta {
region_id: region_meta.region_id,
file_id: src_file_id,
time_range: Default::default(),
level: 0,
file_size,
available_indexes: Default::default(),
index_file_size: 0,
num_rows,
num_row_groups,
sequence: None,
};
let src_handle = FileHandle::new(file_meta, new_noop_file_purger());
// Build the reader for a single file via ParquetReaderBuilder
println!("{}", "Building reader...".yellow());
let (_src_access_layer, _cache_manager) =
build_access_layer_simple(src_region_dir.clone(), object_store.clone()).await?;
let reader_build_start = Instant::now();
let reader = mito2::sst::parquet::reader::ParquetReaderBuilder::new(
src_region_dir.clone(),
src_handle.clone(),
object_store.clone(),
)
.expected_metadata(Some(region_meta.clone()))
.build()
.await
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("build reader failed: {e}"),
}
.build()
})?;
let reader_build_elapsed = reader_build_start.elapsed();
let total_rows = reader.parquet_metadata().file_metadata().num_rows();
println!("{} Reader built in {:?}", "".green(), reader_build_elapsed);
// Prepare target access layer for writing
println!("{}", "Preparing target access layer...".yellow());
let (tgt_access_layer, tgt_cache_manager) =
build_access_layer_simple(self.target.clone(), object_store.clone()).await?;
// Build write request
let fulltext_index_config = FulltextIndexConfig {
create_on_compaction: Mode::Disable,
..Default::default()
};
let write_opts = WriteOptions::default();
let write_req = SstWriteRequest {
op_type: OperationType::Compact,
metadata: region_meta,
source: Source::Reader(Box::new(reader)),
cache_manager: tgt_cache_manager,
storage: None,
max_sequence: None,
index_options: Default::default(),
inverted_index_config: MitoConfig::default().inverted_index,
fulltext_index_config,
bloom_filter_index_config: MitoConfig::default().bloom_filter_index,
};
// Write SST
println!("{}", "Writing SST...".yellow());
let mut metrics = Metrics::default();
// Start profiling if pprof_file is specified
#[cfg(unix)]
let profiler_guard = if self.pprof_file.is_some() {
println!("{} Starting profiling...", "".yellow());
Some(
pprof::ProfilerGuardBuilder::default()
.frequency(99)
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
.build()
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("Failed to start profiler: {e}"),
}
.build()
})?,
)
} else {
None
};
#[cfg(not(unix))]
if self.pprof_file.is_some() {
eprintln!(
"{}: Profiling is not supported on this platform",
"Warning".yellow()
);
}
let write_start = Instant::now();
let infos = tgt_access_layer
.write_sst(write_req, &write_opts, &mut metrics)
.await
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("write_sst failed: {e}"),
}
.build()
})?;
let write_elapsed = write_start.elapsed();
// Stop profiling and generate flamegraph if enabled
#[cfg(unix)]
if let (Some(guard), Some(pprof_file)) = (profiler_guard, &self.pprof_file) {
println!("{} Generating flamegraph...", "🔥".yellow());
match guard.report().build() {
Ok(report) => {
let mut flamegraph_data = Vec::new();
if let Err(e) = report.flamegraph(&mut flamegraph_data) {
eprintln!(
"{}: Failed to generate flamegraph: {}",
"Warning".yellow(),
e
);
} else if let Err(e) = std::fs::write(pprof_file, flamegraph_data) {
eprintln!(
"{}: Failed to write flamegraph to {}: {}",
"Warning".yellow(),
pprof_file.display(),
e
);
} else {
println!(
"{} Flamegraph saved to {}",
"".green(),
pprof_file.display().to_string().cyan()
);
}
}
Err(e) => {
eprintln!(
"{}: Failed to generate pprof report: {}",
"Warning".yellow(),
e
);
}
}
}
assert_eq!(infos.len(), 1);
let dst_file_id = infos[0].file_id;
let dst_file_path = format!("{}{}", self.target, dst_file_id.as_parquet(),);
// Report results with ANSI colors
println!("\n{} {}", "Write complete!".green().bold(), "".green());
println!(" {}: {}", "Destination file".bold(), dst_file_path.cyan());
println!(" {}: {}", "Rows".bold(), total_rows.to_string().cyan());
println!(
" {}: {}",
"File size".bold(),
format!("{} bytes", file_size).cyan()
);
println!(
" {}: {:?}",
"Reader build time".bold(),
reader_build_elapsed
);
println!(" {}: {:?}", "Total time".bold(), write_elapsed);
// Print metrics in a formatted way
println!(
" {}: {:?}, sum: {:?}",
"Metrics".bold(),
metrics,
metrics.sum()
);
// Print infos
println!(" {}: {:?}", "Index".bold(), infos[0].index_metadata);
// Cleanup
println!("\n{}", "Cleaning up...".yellow());
object_store.delete(&dst_file_path).await.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("Failed to delete dest file {}: {}", dst_file_path, e),
}
.build()
})?;
println!("{} Temporary file deleted", "".green());
println!("\n{}", "Benchmark completed successfully!".green().bold());
Ok(())
}
}
fn split_sst_path(path: &str) -> Result<(String, FileId)> {
let p = Path::new(path);
let file_name = p.file_name().and_then(|s| s.to_str()).ok_or_else(|| {
error::IllegalConfigSnafu {
msg: "invalid source path".to_string(),
}
.build()
})?;
let uuid_str = file_name.strip_suffix(".parquet").ok_or_else(|| {
error::IllegalConfigSnafu {
msg: "expect .parquet file".to_string(),
}
.build()
})?;
let file_id = FileId::parse_str(uuid_str).map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("invalid file id: {e}"),
}
.build()
})?;
let parent = p
.parent()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_string();
Ok((parent, file_id))
}
fn extract_region_metadata(
file_path: &str,
meta: &parquet::file::metadata::ParquetMetaData,
) -> Result<RegionMetadataRef> {
use parquet::format::KeyValue;
let kvs: Option<&Vec<KeyValue>> = meta.file_metadata().key_value_metadata();
let Some(kvs) = kvs else {
return Err(error::IllegalConfigSnafu {
msg: format!("{file_path}: missing parquet key_value metadata"),
}
.build());
};
let json = kvs
.iter()
.find(|kv| kv.key == PARQUET_METADATA_KEY)
.and_then(|kv| kv.value.as_ref())
.ok_or_else(|| {
error::IllegalConfigSnafu {
msg: format!("{file_path}: key {PARQUET_METADATA_KEY} not found or empty"),
}
.build()
})?;
let region: RegionMetadata = RegionMetadata::from_json(json).map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("invalid region metadata json: {e}"),
}
.build()
})?;
Ok(std::sync::Arc::new(region))
}
async fn build_object_store(sc: &StorageConfig) -> Result<ObjectStore> {
use datanode::config::ObjectStoreConfig::*;
let oss = &sc.store;
match oss {
File(_) => {
use object_store::services::Fs;
let builder = Fs::default().root(&sc.data_home);
Ok(ObjectStore::new(builder)
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("init fs backend failed: {e}"),
}
.build()
})?
.finish())
}
S3(s3) => {
use common_base::secrets::ExposeSecret;
use object_store::services::S3;
use object_store::util;
let root = util::normalize_dir(&s3.root);
let mut builder = S3::default()
.root(&root)
.bucket(&s3.bucket)
.access_key_id(s3.access_key_id.expose_secret())
.secret_access_key(s3.secret_access_key.expose_secret());
if let Some(ep) = &s3.endpoint {
builder = builder.endpoint(ep);
}
if let Some(region) = &s3.region {
builder = builder.region(region);
}
if s3.enable_virtual_host_style {
builder = builder.enable_virtual_host_style();
}
Ok(ObjectStore::new(builder)
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("init s3 backend failed: {e}"),
}
.build()
})?
.finish())
}
Oss(oss) => {
use common_base::secrets::ExposeSecret;
use object_store::services::Oss;
use object_store::util;
let root = util::normalize_dir(&oss.root);
let builder = Oss::default()
.root(&root)
.bucket(&oss.bucket)
.endpoint(&oss.endpoint)
.access_key_id(oss.access_key_id.expose_secret())
.access_key_secret(oss.access_key_secret.expose_secret());
Ok(ObjectStore::new(builder)
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("init oss backend failed: {e}"),
}
.build()
})?
.finish())
}
Azblob(az) => {
use common_base::secrets::ExposeSecret;
use object_store::services::Azblob;
use object_store::util;
let root = util::normalize_dir(&az.root);
let mut builder = Azblob::default()
.root(&root)
.container(&az.container)
.endpoint(&az.endpoint)
.account_name(az.account_name.expose_secret())
.account_key(az.account_key.expose_secret());
if let Some(token) = &az.sas_token {
builder = builder.sas_token(token);
}
Ok(ObjectStore::new(builder)
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("init azblob backend failed: {e}"),
}
.build()
})?
.finish())
}
Gcs(gcs) => {
use common_base::secrets::ExposeSecret;
use object_store::services::Gcs;
use object_store::util;
let root = util::normalize_dir(&gcs.root);
let builder = Gcs::default()
.root(&root)
.bucket(&gcs.bucket)
.scope(&gcs.scope)
.credential_path(gcs.credential_path.expose_secret())
.credential(gcs.credential.expose_secret())
.endpoint(&gcs.endpoint);
Ok(ObjectStore::new(builder)
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("init gcs backend failed: {e}"),
}
.build()
})?
.finish())
}
}
}
async fn build_access_layer_simple(
region_dir: String,
object_store: ObjectStore,
) -> Result<(
std::sync::Arc<mito2::AccessLayer>,
std::sync::Arc<mito2::CacheManager>,
)> {
// Minimal index aux path setup
let mut mito_cfg = MitoConfig::default();
// Use a temporary directory as aux path
let data_home = std::env::temp_dir().join("greptime_objbench");
let _ = std::fs::create_dir_all(&data_home);
let _ = mito_cfg.index.sanitize(
data_home.to_str().unwrap_or("/tmp"),
&mito_cfg.inverted_index,
);
let access_layer = build_access_layer(&region_dir, object_store, &mito_cfg)
.await
.map_err(|e| {
error::IllegalConfigSnafu {
msg: format!("build_access_layer failed: {e}"),
}
.build()
})?;
Ok((
access_layer,
std::sync::Arc::new(mito2::CacheManager::default()),
))
}
fn new_noop_file_purger() -> FilePurgerRef {
#[derive(Debug)]
struct Noop;
impl FilePurger for Noop {
fn send_request(&self, _request: PurgeRequest) {}
}
std::sync::Arc::new(Noop)
}
async fn load_parquet_metadata(
object_store: ObjectStore,
path: &str,
file_size: u64,
) -> std::result::Result<
parquet::file::metadata::ParquetMetaData,
Box<dyn std::error::Error + Send + Sync>,
> {
use parquet::file::metadata::ParquetMetaDataReader;
use parquet::file::FOOTER_SIZE;
let actual_size = if file_size == 0 {
object_store.stat(path).await?.content_length()
} else {
file_size
};
if actual_size < FOOTER_SIZE as u64 {
return Err("file too small".into());
}
let prefetch: u64 = 64 * 1024;
let start = actual_size.saturating_sub(prefetch);
let buffer = object_store
.read_with(path)
.range(start..actual_size)
.await?
.to_vec();
let buffer_len = buffer.len();
let mut footer = [0; 8];
footer.copy_from_slice(&buffer[buffer_len - FOOTER_SIZE..]);
let metadata_len = ParquetMetaDataReader::decode_footer(&footer)? as u64;
if actual_size - (FOOTER_SIZE as u64) < metadata_len {
return Err("invalid footer/metadata length".into());
}
if (metadata_len as usize) <= buffer_len - FOOTER_SIZE {
let metadata_start = buffer_len - metadata_len as usize - FOOTER_SIZE;
let meta = ParquetMetaDataReader::decode_metadata(
&buffer[metadata_start..buffer_len - FOOTER_SIZE],
)?;
Ok(meta)
} else {
let metadata_start = actual_size - metadata_len - FOOTER_SIZE as u64;
let data = object_store
.read_with(path)
.range(metadata_start..(actual_size - FOOTER_SIZE as u64))
.await?
.to_vec();
let meta = ParquetMetaDataReader::decode_metadata(&data)?;
Ok(meta)
}
}
#[cfg(test)]
mod tests {
use super::StorageConfigWrapper;
#[test]
fn test_decode() {
let cfg = std::fs::read_to_string("/home/lei/datanode-bulk.toml").unwrap();
let storage: StorageConfigWrapper = toml::from_str(&cfg).unwrap();
println!("{:?}", storage);
}
}

View File

@@ -32,7 +32,7 @@ use common_meta::key::TableMetadataManager;
use common_telemetry::info; use common_telemetry::info;
use common_telemetry::logging::TracingOptions; use common_telemetry::logging::TracingOptions;
use common_version::{short_version, version}; use common_version::{short_version, version};
use flow::{FlownodeBuilder, FlownodeInstance, FrontendClient, FrontendInvoker}; use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
use meta_client::{MetaClientOptions, MetaClientType}; use meta_client::{MetaClientOptions, MetaClientType};
use servers::Mode; use servers::Mode;
use snafu::{OptionExt, ResultExt}; use snafu::{OptionExt, ResultExt};
@@ -317,8 +317,6 @@ impl StartCommand {
Arc::new(executor), Arc::new(executor),
); );
let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone())); let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
let flownode_builder = FlownodeBuilder::new( let flownode_builder = FlownodeBuilder::new(
opts, opts,
@@ -326,7 +324,6 @@ impl StartCommand {
table_metadata_manager, table_metadata_manager,
catalog_manager.clone(), catalog_manager.clone(),
flow_metadata_manager, flow_metadata_manager,
Arc::new(frontend_client),
) )
.with_heartbeat_task(heartbeat_task); .with_heartbeat_task(heartbeat_task);

View File

@@ -54,10 +54,7 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
use datanode::datanode::{Datanode, DatanodeBuilder}; use datanode::datanode::{Datanode, DatanodeBuilder};
use datanode::region_server::RegionServer; use datanode::region_server::RegionServer;
use file_engine::config::EngineConfig as FileEngineConfig; use file_engine::config::EngineConfig as FileEngineConfig;
use flow::{ use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendClient,
FrontendInvoker,
};
use frontend::frontend::FrontendOptions; use frontend::frontend::FrontendOptions;
use frontend::instance::builder::FrontendBuilder; use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager}; use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
@@ -536,16 +533,12 @@ impl StartCommand {
flow: opts.flow.clone(), flow: opts.flow.clone(),
..Default::default() ..Default::default()
}; };
let fe_server_addr = fe_opts.grpc.bind_addr.clone();
let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
let flow_builder = FlownodeBuilder::new( let flow_builder = FlownodeBuilder::new(
flownode_options, flownode_options,
plugins.clone(), plugins.clone(),
table_metadata_manager.clone(), table_metadata_manager.clone(),
catalog_manager.clone(), catalog_manager.clone(),
flow_metadata_manager.clone(), flow_metadata_manager.clone(),
Arc::new(frontend_client),
); );
let flownode = Arc::new( let flownode = Arc::new(
flow_builder flow_builder

View File

@@ -445,16 +445,10 @@ impl Pool {
async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) { async fn recycle_channel_in_loop(pool: Arc<Pool>, interval_secs: u64) {
let mut interval = tokio::time::interval(Duration::from_secs(interval_secs)); let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
// use weak ref here to prevent pool being leaked
let pool_weak = Arc::downgrade(&pool);
loop { loop {
let _ = interval.tick().await; let _ = interval.tick().await;
if let Some(pool) = pool_weak.upgrade() { pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
pool.retain_channel(|_, c| c.access.swap(0, Ordering::Relaxed) != 0)
} else {
// no one is using this pool, so we can also let go
break;
}
} }
} }

View File

@@ -343,7 +343,6 @@ pub enum FlowType {
impl FlowType { impl FlowType {
pub const RECORDING_RULE: &str = "recording_rule"; pub const RECORDING_RULE: &str = "recording_rule";
pub const STREAMING: &str = "streaming"; pub const STREAMING: &str = "streaming";
pub const FLOW_TYPE_KEY: &str = "flow_type";
} }
impl Default for FlowType { impl Default for FlowType {
@@ -399,8 +398,7 @@ impl From<&CreateFlowData> for CreateRequest {
}; };
let flow_type = value.flow_type.unwrap_or_default().to_string(); let flow_type = value.flow_type.unwrap_or_default().to_string();
req.flow_options req.flow_options.insert("flow_type".to_string(), flow_type);
.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
req req
} }
} }
@@ -432,7 +430,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let flow_type = value.flow_type.unwrap_or_default().to_string(); let flow_type = value.flow_type.unwrap_or_default().to_string();
options.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type); options.insert("flow_type".to_string(), flow_type);
let flow_info = FlowInfoValue { let flow_info = FlowInfoValue {
source_table_ids: value.source_table_ids.clone(), source_table_ids: value.source_table_ids.clone(),

View File

@@ -171,6 +171,10 @@ pub struct S3Config {
pub secret_access_key: SecretString, pub secret_access_key: SecretString,
pub endpoint: Option<String>, pub endpoint: Option<String>,
pub region: Option<String>, pub region: Option<String>,
/// Enable virtual host style so that opendal will send API requests in virtual host style instead of path style.
/// By default, opendal will send API to https://s3.us-east-1.amazonaws.com/bucket_name
/// Enabled, opendal will send API to https://bucket_name.s3.us-east-1.amazonaws.com
pub enable_virtual_host_style: bool,
#[serde(flatten)] #[serde(flatten)]
pub cache: ObjectStorageCacheConfig, pub cache: ObjectStorageCacheConfig,
pub http_client: HttpClientConfig, pub http_client: HttpClientConfig,
@@ -185,6 +189,7 @@ impl PartialEq for S3Config {
&& self.secret_access_key.expose_secret() == other.secret_access_key.expose_secret() && self.secret_access_key.expose_secret() == other.secret_access_key.expose_secret()
&& self.endpoint == other.endpoint && self.endpoint == other.endpoint
&& self.region == other.region && self.region == other.region
&& self.enable_virtual_host_style == other.enable_virtual_host_style
&& self.cache == other.cache && self.cache == other.cache
&& self.http_client == other.http_client && self.http_client == other.http_client
} }
@@ -289,6 +294,7 @@ impl Default for S3Config {
root: String::default(), root: String::default(),
access_key_id: SecretString::from(String::default()), access_key_id: SecretString::from(String::default()),
secret_access_key: SecretString::from(String::default()), secret_access_key: SecretString::from(String::default()),
enable_virtual_host_style: false,
endpoint: Option::default(), endpoint: Option::default(),
region: Option::default(), region: Option::default(),
cache: ObjectStorageCacheConfig::default(), cache: ObjectStorageCacheConfig::default(),

View File

@@ -41,10 +41,13 @@ pub(crate) async fn new_s3_object_store(s3_config: &S3Config) -> Result<ObjectSt
if s3_config.endpoint.is_some() { if s3_config.endpoint.is_some() {
builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap()); builder = builder.endpoint(s3_config.endpoint.as_ref().unwrap());
}; }
if s3_config.region.is_some() { if s3_config.region.is_some() {
builder = builder.region(s3_config.region.as_ref().unwrap()); builder = builder.region(s3_config.region.as_ref().unwrap());
}; }
if s3_config.enable_virtual_host_style {
builder = builder.enable_virtual_host_style();
}
Ok(ObjectStore::new(builder) Ok(ObjectStore::new(builder)
.context(error::InitBackendSnafu)? .context(error::InitBackendSnafu)?

View File

@@ -16,7 +16,6 @@ async-trait.workspace = true
bytes.workspace = true bytes.workspace = true
cache.workspace = true cache.workspace = true
catalog.workspace = true catalog.workspace = true
chrono.workspace = true
client.workspace = true client.workspace = true
common-base.workspace = true common-base.workspace = true
common-config.workspace = true common-config.workspace = true

View File

@@ -49,13 +49,12 @@ pub(crate) use crate::adapter::node_context::FlownodeContext;
use crate::adapter::refill::RefillTask; use crate::adapter::refill::RefillTask;
use crate::adapter::table_source::ManagedTableSource; use crate::adapter::table_source::ManagedTableSource;
use crate::adapter::util::relation_desc_to_column_schemas_with_fallback; use crate::adapter::util::relation_desc_to_column_schemas_with_fallback;
pub(crate) use crate::adapter::worker::{create_worker, WorkerHandle}; pub(crate) use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
use crate::compute::ErrCollector; use crate::compute::ErrCollector;
use crate::df_optimizer::sql_to_flow_plan; use crate::df_optimizer::sql_to_flow_plan;
use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, InvalidQuerySnafu, UnexpectedSnafu}; use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, InvalidQuerySnafu, UnexpectedSnafu};
use crate::expr::Batch; use crate::expr::Batch;
use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_ROWS, METRIC_FLOW_RUN_INTERVAL_MS}; use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_ROWS, METRIC_FLOW_RUN_INTERVAL_MS};
use crate::recording_rules::RecordingRuleEngine;
use crate::repr::{self, DiffRow, RelationDesc, Row, BATCH_SIZE}; use crate::repr::{self, DiffRow, RelationDesc, Row, BATCH_SIZE};
mod flownode_impl; mod flownode_impl;
@@ -64,7 +63,7 @@ pub(crate) mod refill;
mod stat; mod stat;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
pub(crate) mod util; mod util;
mod worker; mod worker;
pub(crate) mod node_context; pub(crate) mod node_context;
@@ -172,8 +171,6 @@ pub struct FlowWorkerManager {
flush_lock: RwLock<()>, flush_lock: RwLock<()>,
/// receive a oneshot sender to send state size report /// receive a oneshot sender to send state size report
state_report_handler: RwLock<Option<StateReportHandler>>, state_report_handler: RwLock<Option<StateReportHandler>>,
/// engine for recording rule
rule_engine: RecordingRuleEngine,
} }
/// Building FlownodeManager /// Building FlownodeManager
@@ -188,7 +185,6 @@ impl FlowWorkerManager {
node_id: Option<u32>, node_id: Option<u32>,
query_engine: Arc<dyn QueryEngine>, query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef, table_meta: TableMetadataManagerRef,
rule_engine: RecordingRuleEngine,
) -> Self { ) -> Self {
let srv_map = ManagedTableSource::new( let srv_map = ManagedTableSource::new(
table_meta.table_info_manager().clone(), table_meta.table_info_manager().clone(),
@@ -211,7 +207,6 @@ impl FlowWorkerManager {
node_id, node_id,
flush_lock: RwLock::new(()), flush_lock: RwLock::new(()),
state_report_handler: RwLock::new(None), state_report_handler: RwLock::new(None),
rule_engine,
} }
} }
@@ -220,6 +215,25 @@ impl FlowWorkerManager {
self self
} }
/// Create a flownode manager with one worker
pub fn new_with_workers<'s>(
node_id: Option<u32>,
query_engine: Arc<dyn QueryEngine>,
table_meta: TableMetadataManagerRef,
num_workers: usize,
) -> (Self, Vec<Worker<'s>>) {
let mut zelf = Self::new(node_id, query_engine, table_meta);
let workers: Vec<_> = (0..num_workers)
.map(|_| {
let (handle, worker) = create_worker();
zelf.add_worker_handle(handle);
worker
})
.collect();
(zelf, workers)
}
/// add a worker handler to manager, meaning this corresponding worker is under it's manage /// add a worker handler to manager, meaning this corresponding worker is under it's manage
pub fn add_worker_handle(&mut self, handle: WorkerHandle) { pub fn add_worker_handle(&mut self, handle: WorkerHandle) {
self.worker_handles.push(handle); self.worker_handles.push(handle);
@@ -737,11 +751,7 @@ pub struct CreateFlowArgs {
/// Create&Remove flow /// Create&Remove flow
impl FlowWorkerManager { impl FlowWorkerManager {
/// remove a flow by it's id /// remove a flow by it's id
#[allow(unreachable_code)]
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> { pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
// TODO(discord9): reroute some back to streaming engine later
return self.rule_engine.remove_flow(flow_id).await;
for handle in self.worker_handles.iter() { for handle in self.worker_handles.iter() {
if handle.contains_flow(flow_id).await? { if handle.contains_flow(flow_id).await? {
handle.remove_flow(flow_id).await?; handle.remove_flow(flow_id).await?;
@@ -757,10 +767,8 @@ impl FlowWorkerManager {
/// steps to create task: /// steps to create task:
/// 1. parse query into typed plan(and optional parse expire_after expr) /// 1. parse query into typed plan(and optional parse expire_after expr)
/// 2. render source/sink with output table id and used input table id /// 2. render source/sink with output table id and used input table id
#[allow(clippy::too_many_arguments, unreachable_code)] #[allow(clippy::too_many_arguments)]
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> { pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
// TODO(discord9): reroute some back to streaming engine later
return self.rule_engine.create_flow(args).await;
let CreateFlowArgs { let CreateFlowArgs {
flow_id, flow_id,
sink_table_name, sink_table_name,

View File

@@ -133,7 +133,7 @@ impl Flownode for FlowWorkerManager {
.map_err(to_meta_err(snafu::location!()))?; .map_err(to_meta_err(snafu::location!()))?;
debug!( debug!(
"Done to flush flow_id={:?} with {} input rows flushed, {} rows sended and {} output rows flushed", "Done to flush flow_id={:?} with {} input rows flushed, {} rows sent and {} output rows flushed",
flow_id, flushed_input_rows, rows_send, row flow_id, flushed_input_rows, rows_send, row
); );
Ok(FlowResponse { Ok(FlowResponse {
@@ -153,13 +153,7 @@ impl Flownode for FlowWorkerManager {
} }
} }
#[allow(unreachable_code, unused)]
async fn handle_inserts(&self, request: InsertRequests) -> Result<FlowResponse> { async fn handle_inserts(&self, request: InsertRequests) -> Result<FlowResponse> {
return self
.rule_engine
.handle_inserts(request)
.await
.map_err(to_meta_err(snafu::location!()));
// using try_read to ensure two things: // using try_read to ensure two things:
// 1. flush wouldn't happen until inserts before it is inserted // 1. flush wouldn't happen until inserts before it is inserted
// 2. inserts happening concurrently with flush wouldn't be block by flush // 2. inserts happening concurrently with flush wouldn't be block by flush
@@ -212,15 +206,15 @@ impl Flownode for FlowWorkerManager {
.collect_vec(); .collect_vec();
let table_col_names = table_schema.relation_desc.names; let table_col_names = table_schema.relation_desc.names;
let table_col_names = table_col_names let table_col_names = table_col_names
.iter().enumerate() .iter().enumerate()
.map(|(idx,name)| match name { .map(|(idx,name)| match name {
Some(name) => Ok(name.clone()), Some(name) => Ok(name.clone()),
None => InternalSnafu { None => InternalSnafu {
reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"), reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
} }
.fail().map_err(BoxedError::new).context(ExternalSnafu), .fail().map_err(BoxedError::new).context(ExternalSnafu),
}) })
.collect::<Result<Vec<_>>>()?; .collect::<Result<Vec<_>>>()?;
let name_to_col = HashMap::<_, _>::from_iter( let name_to_col = HashMap::<_, _>::from_iter(
insert_schema insert_schema
.iter() .iter()

View File

@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
//! Some utility functions
use std::sync::Arc; use std::sync::Arc;
use api::helper::ColumnDataTypeWrapper; use api::helper::ColumnDataTypeWrapper;

View File

@@ -16,7 +16,6 @@
use std::any::Any; use std::any::Any;
use arrow_schema::ArrowError;
use common_error::ext::BoxedError; use common_error::ext::BoxedError;
use common_error::{define_into_tonic_status, from_err_code_msg_to_header}; use common_error::{define_into_tonic_status, from_err_code_msg_to_header};
use common_macro::stack_trace_debug; use common_macro::stack_trace_debug;
@@ -54,13 +53,6 @@ pub enum Error {
location: Location, location: Location,
}, },
#[snafu(display("Time error"))]
Time {
source: common_time::error::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("External error"))] #[snafu(display("External error"))]
External { External {
source: BoxedError, source: BoxedError,
@@ -164,15 +156,6 @@ pub enum Error {
location: Location, location: Location,
}, },
#[snafu(display("Arrow error: {raw:?} in context: {context}"))]
Arrow {
#[snafu(source)]
raw: ArrowError,
context: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Datafusion error: {raw:?} in context: {context}"))] #[snafu(display("Datafusion error: {raw:?} in context: {context}"))]
Datafusion { Datafusion {
#[snafu(source)] #[snafu(source)]
@@ -247,7 +230,6 @@ impl ErrorExt for Error {
match self { match self {
Self::Eval { .. } Self::Eval { .. }
| Self::JoinTask { .. } | Self::JoinTask { .. }
| Self::Arrow { .. }
| Self::Datafusion { .. } | Self::Datafusion { .. }
| Self::InsertIntoFlow { .. } => StatusCode::Internal, | Self::InsertIntoFlow { .. } => StatusCode::Internal,
Self::FlowAlreadyExist { .. } => StatusCode::TableAlreadyExists, Self::FlowAlreadyExist { .. } => StatusCode::TableAlreadyExists,
@@ -256,9 +238,7 @@ impl ErrorExt for Error {
| Self::FlowNotFound { .. } | Self::FlowNotFound { .. }
| Self::ListFlows { .. } => StatusCode::TableNotFound, | Self::ListFlows { .. } => StatusCode::TableNotFound,
Self::Plan { .. } | Self::Datatypes { .. } => StatusCode::PlanQuery, Self::Plan { .. } | Self::Datatypes { .. } => StatusCode::PlanQuery,
Self::InvalidQuery { .. } | Self::CreateFlow { .. } | Self::Time { .. } => { Self::InvalidQuery { .. } | Self::CreateFlow { .. } => StatusCode::EngineExecuteQuery,
StatusCode::EngineExecuteQuery
}
Self::Unexpected { .. } => StatusCode::Unexpected, Self::Unexpected { .. } => StatusCode::Unexpected,
Self::NotImplemented { .. } | Self::UnsupportedTemporalFilter { .. } => { Self::NotImplemented { .. } | Self::UnsupportedTemporalFilter { .. } => {
StatusCode::Unsupported StatusCode::Unsupported

View File

@@ -238,7 +238,6 @@ mod test {
for (sql, current, expected) in &testcases { for (sql, current, expected) in &testcases {
let plan = sql_to_substrait(engine.clone(), sql).await; let plan = sql_to_substrait(engine.clone(), sql).await;
let mut ctx = create_test_ctx(); let mut ctx = create_test_ctx();
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan) let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan)
.await .await

View File

@@ -130,6 +130,13 @@ impl HeartbeatTask {
pub fn shutdown(&self) { pub fn shutdown(&self) {
info!("Close heartbeat task for flownode"); info!("Close heartbeat task for flownode");
if self
.running
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
.is_err()
{
warn!("Call close heartbeat task multiple times");
}
} }
fn new_heartbeat_request( fn new_heartbeat_request(
@@ -207,6 +214,7 @@ impl HeartbeatTask {
if let Some(message) = message { if let Some(message) = message {
Self::new_heartbeat_request(&heartbeat_request, Some(message), &latest_report) Self::new_heartbeat_request(&heartbeat_request, Some(message), &latest_report)
} else { } else {
warn!("Sender has been dropped, exiting the heartbeat loop");
// Receives None that means Sender was dropped, we need to break the current loop // Receives None that means Sender was dropped, we need to break the current loop
break break
} }
@@ -248,7 +256,11 @@ impl HeartbeatTask {
error!(e; "Error while handling heartbeat response"); error!(e; "Error while handling heartbeat response");
} }
} }
Ok(None) => break, Ok(None) => {
warn!("Heartbeat response stream closed");
capture_self.start_with_retry(retry_interval).await;
break;
}
Err(e) => { Err(e) => {
error!(e; "Occur error while reading heartbeat response"); error!(e; "Occur error while reading heartbeat response");
capture_self.start_with_retry(retry_interval).await; capture_self.start_with_retry(retry_interval).await;

View File

@@ -33,7 +33,6 @@ mod expr;
pub mod heartbeat; pub mod heartbeat;
mod metrics; mod metrics;
mod plan; mod plan;
mod recording_rules;
mod repr; mod repr;
mod server; mod server;
mod transform; mod transform;
@@ -44,5 +43,4 @@ mod test_utils;
pub use adapter::{FlowConfig, FlowWorkerManager, FlowWorkerManagerRef, FlownodeOptions}; pub use adapter::{FlowConfig, FlowWorkerManager, FlowWorkerManagerRef, FlownodeOptions};
pub use error::{Error, Result}; pub use error::{Error, Result};
pub use recording_rules::FrontendClient;
pub use server::{FlownodeBuilder, FlownodeInstance, FlownodeServer, FrontendInvoker}; pub use server::{FlownodeBuilder, FlownodeInstance, FlownodeServer, FrontendInvoker};

View File

@@ -28,32 +28,6 @@ lazy_static! {
&["table_id"] &["table_id"]
) )
.unwrap(); .unwrap();
pub static ref METRIC_FLOW_RULE_ENGINE_QUERY_TIME: HistogramVec = register_histogram_vec!(
"greptime_flow_rule_engine_query_time",
"flow rule engine query time",
&["flow_id"],
vec![
0.0,
1.,
3.,
5.,
10.,
20.,
30.,
60.,
2. * 60.,
5. * 60.,
10. * 60.
]
)
.unwrap();
pub static ref METRIC_FLOW_RULE_ENGINE_SLOW_QUERY: HistogramVec = register_histogram_vec!(
"greptime_flow_rule_engine_slow_query",
"flow rule engine slow query",
&["flow_id", "sql", "peer"],
vec![60., 2. * 60., 3. * 60., 5. * 60., 10. * 60.]
)
.unwrap();
pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge = pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge =
register_int_gauge!("greptime_flow_run_interval_ms", "flow run interval in ms").unwrap(); register_int_gauge!("greptime_flow_run_interval_ms", "flow run interval in ms").unwrap();
pub static ref METRIC_FLOW_ROWS: IntCounterVec = register_int_counter_vec!( pub static ref METRIC_FLOW_ROWS: IntCounterVec = register_int_counter_vec!(

View File

@@ -1,940 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Run flow as recording rule which is time-window-aware normal query triggered every tick set by user
mod engine;
mod frontend_client;
use std::collections::BTreeSet;
use std::sync::Arc;
use api::helper::pb_value_to_value_ref;
use catalog::CatalogManagerRef;
use common_error::ext::BoxedError;
use common_recordbatch::DfRecordBatch;
use common_telemetry::warn;
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use datafusion::error::Result as DfResult;
use datafusion::logical_expr::Expr;
use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
use datafusion::prelude::SessionContext;
use datafusion::sql::unparser::Unparser;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter};
use datafusion_common::{DFSchema, TableReference};
use datafusion_expr::{ColumnarValue, LogicalPlan};
use datafusion_physical_expr::PhysicalExprRef;
use datatypes::prelude::{ConcreteDataType, DataType};
use datatypes::scalars::ScalarVector;
use datatypes::schema::TIME_INDEX_KEY;
use datatypes::value::Value;
use datatypes::vectors::{
TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
TimestampSecondVector, Vector,
};
pub use engine::RecordingRuleEngine;
pub use frontend_client::FrontendClient;
use itertools::Itertools;
use query::parser::QueryLanguageParser;
use query::QueryEngineRef;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use crate::adapter::util::from_proto_to_data_type;
use crate::df_optimizer::apply_df_optimizer;
use crate::error::{ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, UnexpectedSnafu};
use crate::expr::error::DataTypeSnafu;
use crate::Error;
#[derive(Debug, Clone)]
pub struct TimeWindowExpr {
phy_expr: PhysicalExprRef,
column_name: String,
logical_expr: Expr,
df_schema: DFSchema,
}
impl TimeWindowExpr {
pub fn from_expr(expr: &Expr, column_name: &str, df_schema: &DFSchema) -> Result<Self, Error> {
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
Ok(Self {
phy_expr,
column_name: column_name.to_string(),
logical_expr: expr.clone(),
df_schema: df_schema.clone(),
})
}
pub fn eval(
&self,
current: Timestamp,
) -> Result<(Option<Timestamp>, Option<Timestamp>), Error> {
let lower_bound =
find_expr_time_window_lower_bound(&self.logical_expr, &self.df_schema, current)?;
let upper_bound =
find_expr_time_window_upper_bound(&self.logical_expr, &self.df_schema, current)?;
Ok((lower_bound, upper_bound))
}
/// Find timestamps from rows using time window expr
pub async fn handle_rows(
&self,
rows_list: Vec<api::v1::Rows>,
) -> Result<BTreeSet<Timestamp>, Error> {
let mut time_windows = BTreeSet::new();
for rows in rows_list {
// pick the time index column and use it to eval on `self.expr`
let ts_col_index = rows
.schema
.iter()
.map(|col| col.column_name.clone())
.position(|name| name == self.column_name);
let Some(ts_col_index) = ts_col_index else {
warn!("can't found time index column in schema: {:?}", rows.schema);
continue;
};
let col_schema = &rows.schema[ts_col_index];
let cdt = from_proto_to_data_type(col_schema)?;
let column_values = rows
.rows
.iter()
.map(|row| &row.values[ts_col_index])
.collect_vec();
let mut vector = cdt.create_mutable_vector(column_values.len());
for value in column_values {
let value = pb_value_to_value_ref(value, &None);
vector.try_push_value_ref(value).context(DataTypeSnafu {
msg: "Failed to convert rows to columns",
})?;
}
let vector = vector.to_vector();
let df_schema = create_df_schema_for_ts_column(&self.column_name, cdt)?;
let rb =
DfRecordBatch::try_new(df_schema.inner().clone(), vec![vector.to_arrow_array()])
.with_context(|_e| ArrowSnafu {
context: format!(
"Failed to create record batch from {df_schema:?} and {vector:?}"
),
})?;
let eval_res = self
.phy_expr
.evaluate(&rb)
.with_context(|_| DatafusionSnafu {
context: format!(
"Failed to evaluate physical expression {:?} on {rb:?}",
self.phy_expr
),
})?;
let res = columnar_to_ts_vector(&eval_res)?;
for ts in res.into_iter().flatten() {
time_windows.insert(ts);
}
}
Ok(time_windows)
}
}
fn create_df_schema_for_ts_column(name: &str, cdt: ConcreteDataType) -> Result<DFSchema, Error> {
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
name,
cdt.as_arrow_type(),
false,
)]));
let df_schema = DFSchema::from_field_specific_qualified_schema(
vec![Some(TableReference::bare("TimeIndexOnlyTable"))],
&arrow_schema,
)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
})?;
Ok(df_schema)
}
/// Convert `ColumnarValue` to `Vec<Option<Timestamp>>`
fn columnar_to_ts_vector(columnar: &ColumnarValue) -> Result<Vec<Option<Timestamp>>, Error> {
let val = match columnar {
datafusion_expr::ColumnarValue::Array(array) => {
let ty = array.data_type();
let ty = ConcreteDataType::from_arrow_type(ty);
let time_unit = if let ConcreteDataType::Timestamp(ty) = ty {
ty.unit()
} else {
return UnexpectedSnafu {
reason: format!("Non-timestamp type: {ty:?}"),
}
.fail();
};
match time_unit {
TimeUnit::Second => TimestampSecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec(),
TimeUnit::Millisecond => {
TimestampMillisecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec()
}
TimeUnit::Microsecond => {
TimestampMicrosecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec()
}
TimeUnit::Nanosecond => {
TimestampNanosecondVector::try_from_arrow_array(array.clone())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to create vector from arrow array {array:?}"),
})?
.iter_data()
.map(|d| d.map(|d| d.0))
.collect_vec()
}
}
}
datafusion_expr::ColumnarValue::Scalar(scalar) => {
let value = Value::try_from(scalar.clone()).with_context(|_| DatatypesSnafu {
extra: format!("Failed to convert scalar {scalar:?} to value"),
})?;
let ts = value.as_timestamp().context(UnexpectedSnafu {
reason: format!("Expect Timestamp, found {:?}", value),
})?;
vec![Some(ts)]
}
};
Ok(val)
}
/// Convert sql to datafusion logical plan
pub async fn sql_to_df_plan(
query_ctx: QueryContextRef,
engine: QueryEngineRef,
sql: &str,
optimize: bool,
) -> Result<LogicalPlan, Error> {
let stmt = QueryLanguageParser::parse_sql(sql, &query_ctx)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let plan = engine
.planner()
.plan(&stmt, query_ctx)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let plan = if optimize {
apply_df_optimizer(plan).await?
} else {
plan
};
Ok(plan)
}
/// Return (the column name of time index column, the time window expr, the expected time unit of time index column, the expr's schema for evaluating the time window)
async fn find_time_window_expr(
plan: &LogicalPlan,
catalog_man: CatalogManagerRef,
query_ctx: QueryContextRef,
) -> Result<(String, Option<datafusion_expr::Expr>, TimeUnit, DFSchema), Error> {
// TODO(discord9): find the expr that do time window
let mut table_name = None;
// first find the table source in the logical plan
plan.apply(|plan| {
let LogicalPlan::TableScan(table_scan) = plan else {
return Ok(TreeNodeRecursion::Continue);
};
table_name = Some(table_scan.table_name.clone());
Ok(TreeNodeRecursion::Stop)
})
.with_context(|_| DatafusionSnafu {
context: format!("Can't find table source in plan {plan:?}"),
})?;
let Some(table_name) = table_name else {
UnexpectedSnafu {
reason: format!("Can't find table source in plan {plan:?}"),
}
.fail()?
};
let current_schema = query_ctx.current_schema();
let catalog_name = table_name.catalog().unwrap_or(query_ctx.current_catalog());
let schema_name = table_name.schema().unwrap_or(&current_schema);
let table_name = table_name.table();
let Some(table_ref) = catalog_man
.table(catalog_name, schema_name, table_name, Some(&query_ctx))
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
else {
UnexpectedSnafu {
reason: format!(
"Can't find table {table_name:?} in catalog {catalog_name:?}/{schema_name:?}"
),
}
.fail()?
};
let schema = &table_ref.table_info().meta.schema;
let ts_index = schema.timestamp_column().context(UnexpectedSnafu {
reason: format!("Can't find timestamp column in table {table_name:?}"),
})?;
let ts_col_name = ts_index.name.clone();
let expected_time_unit = ts_index.data_type.as_timestamp().with_context(|| UnexpectedSnafu {
reason: format!(
"Expected timestamp column {ts_col_name:?} in table {table_name:?} to be timestamp, but got {ts_index:?}"
),
})?.unit();
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
ts_col_name.clone(),
ts_index.data_type.as_arrow_type(),
false,
)]));
let df_schema = DFSchema::from_field_specific_qualified_schema(
vec![Some(TableReference::bare(table_name))],
&arrow_schema,
)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
})?;
// find the time window expr which refers to the time index column
let mut aggr_expr = None;
let mut time_window_expr: Option<Expr> = None;
let find_inner_aggr_expr = |plan: &LogicalPlan| {
if let LogicalPlan::Aggregate(aggregate) = plan {
aggr_expr = Some(aggregate.clone());
};
Ok(TreeNodeRecursion::Continue)
};
plan.apply(find_inner_aggr_expr)
.with_context(|_| DatafusionSnafu {
context: format!("Can't find aggr expr in plan {plan:?}"),
})?;
if let Some(aggregate) = aggr_expr {
for group_expr in &aggregate.group_expr {
let refs = group_expr.column_refs();
if refs.len() != 1 {
continue;
}
let ref_col = refs.iter().next().unwrap();
let index = aggregate.input.schema().maybe_index_of_column(ref_col);
let Some(index) = index else {
continue;
};
let field = aggregate.input.schema().field(index);
let is_time_index = field.metadata().get(TIME_INDEX_KEY) == Some(&"true".to_string());
if is_time_index {
let rewrite_column = group_expr.clone();
let rewritten = rewrite_column
.rewrite(&mut RewriteColumn {
table_name: table_name.to_string(),
})
.with_context(|_| DatafusionSnafu {
context: format!("Rewrite expr failed, expr={:?}", group_expr),
})?
.data;
struct RewriteColumn {
table_name: String,
}
impl TreeNodeRewriter for RewriteColumn {
type Node = Expr;
fn f_down(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
let Expr::Column(mut column) = node else {
return Ok(Transformed::no(node));
};
column.relation = Some(TableReference::bare(self.table_name.clone()));
Ok(Transformed::yes(Expr::Column(column)))
}
}
time_window_expr = Some(rewritten);
break;
}
}
Ok((ts_col_name, time_window_expr, expected_time_unit, df_schema))
} else {
// can't found time window expr, return None
Ok((ts_col_name, None, expected_time_unit, df_schema))
}
}
/// Find nearest lower bound for time `current` in given `plan` for the time window expr.
/// i.e. for time window expr being `date_bin(INTERVAL '5 minutes', ts) as time_window` and `current="2021-07-01 00:01:01.000"`,
/// return `Some("2021-07-01 00:00:00.000")`
/// if `plan` doesn't contain a `TIME INDEX` column, return `None`
///
/// Time window expr is a expr that:
/// 1. ref only to a time index column
/// 2. is monotonic increasing
/// 3. show up in GROUP BY clause
///
/// note this plan should only contain one TableScan
pub async fn find_plan_time_window_bound(
plan: &LogicalPlan,
current: Timestamp,
query_ctx: QueryContextRef,
engine: QueryEngineRef,
) -> Result<(String, Option<Timestamp>, Option<Timestamp>), Error> {
// TODO(discord9): find the expr that do time window
let catalog_man = engine.engine_state().catalog_manager();
let (ts_col_name, time_window_expr, expected_time_unit, df_schema) =
find_time_window_expr(plan, catalog_man.clone(), query_ctx).await?;
// cast current to ts_index's type
let new_current = current
.convert_to(expected_time_unit)
.with_context(|| UnexpectedSnafu {
reason: format!("Failed to cast current timestamp {current:?} to {expected_time_unit}"),
})?;
// if no time_window_expr is found, return None
if let Some(time_window_expr) = time_window_expr {
let lower_bound =
find_expr_time_window_lower_bound(&time_window_expr, &df_schema, new_current)?;
let upper_bound =
find_expr_time_window_upper_bound(&time_window_expr, &df_schema, new_current)?;
Ok((ts_col_name, lower_bound, upper_bound))
} else {
Ok((ts_col_name, None, None))
}
}
/// Find the lower bound of time window in given `expr` and `current` timestamp.
///
/// i.e. for `current="2021-07-01 00:01:01.000"` and `expr=date_bin(INTERVAL '5 minutes', ts) as time_window` and `ts_col=ts`,
/// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
/// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
/// of current time window given the current timestamp
///
/// if return None, meaning this time window have no lower bound
fn find_expr_time_window_lower_bound(
expr: &Expr,
df_schema: &DFSchema,
current: Timestamp,
) -> Result<Option<Timestamp>, Error> {
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
let input_time_unit = cur_time_window.unit();
Ok(cur_time_window.convert_to(input_time_unit))
}
/// Find the upper bound for time window expression
fn find_expr_time_window_upper_bound(
expr: &Expr,
df_schema: &DFSchema,
current: Timestamp,
) -> Result<Option<Timestamp>, Error> {
use std::cmp::Ordering;
let phy_planner = DefaultPhysicalPlanner::default();
let phy_expr: PhysicalExprRef = phy_planner
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
.with_context(|_e| DatafusionSnafu {
context: format!(
"Failed to create physical expression from {expr:?} using {df_schema:?}"
),
})?;
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
// search to find the lower bound
let mut offset: i64 = 1;
let mut lower_bound = Some(current);
let upper_bound;
// first expontial probe to found a range for binary search
loop {
let Some(next_val) = current.value().checked_add(offset) else {
// no upper bound if overflow
return Ok(None);
};
let next_time_probe = common_time::Timestamp::new(next_val, current.unit());
let next_time_window = eval_ts_to_ts(&phy_expr, df_schema, next_time_probe)?;
match next_time_window.cmp(&cur_time_window) {
Ordering::Less => {UnexpectedSnafu {
reason: format!(
"Unsupported time window expression, expect monotonic increasing for time window expression {expr:?}"
),
}
.fail()?
}
Ordering::Equal => {
lower_bound = Some(next_time_probe);
}
Ordering::Greater => {
upper_bound = Some(next_time_probe);
break
}
}
let Some(new_offset) = offset.checked_mul(2) else {
// no upper bound if overflow
return Ok(None);
};
offset = new_offset;
}
// binary search for the exact upper bound
ensure!(lower_bound.map(|v|v.unit())==upper_bound.map(|v|v.unit()), UnexpectedSnafu{
reason: format!(" unit mismatch for time window expression {expr:?}, found {lower_bound:?} and {upper_bound:?}"),
});
let output_unit = upper_bound
.context(UnexpectedSnafu {
reason: "should have lower bound",
})?
.unit();
let mut low = lower_bound
.context(UnexpectedSnafu {
reason: "should have lower bound",
})?
.value();
let mut high = upper_bound
.context(UnexpectedSnafu {
reason: "should have upper bound",
})?
.value();
while low < high {
let mid = (low + high) / 2;
let mid_probe = common_time::Timestamp::new(mid, output_unit);
let mid_time_window = eval_ts_to_ts(&phy_expr, df_schema, mid_probe)?;
match mid_time_window.cmp(&cur_time_window) {
Ordering::Less => UnexpectedSnafu {
reason: format!("Binary search failed for time window expression {expr:?}"),
}
.fail()?,
Ordering::Equal => low = mid + 1,
Ordering::Greater => high = mid,
}
}
let final_upper_bound_for_time_window = common_time::Timestamp::new(high, output_unit);
Ok(Some(final_upper_bound_for_time_window))
}
fn eval_ts_to_ts(
phy: &PhysicalExprRef,
df_schema: &DFSchema,
input_value: Timestamp,
) -> Result<Timestamp, Error> {
let schema_ty = df_schema.field(0).data_type();
let schema_cdt = ConcreteDataType::from_arrow_type(schema_ty);
let schema_unit = if let ConcreteDataType::Timestamp(ts) = schema_cdt {
ts.unit()
} else {
return UnexpectedSnafu {
reason: format!("Expect Timestamp, found {:?}", schema_cdt),
}
.fail();
};
let input_value = input_value
.convert_to(schema_unit)
.with_context(|| UnexpectedSnafu {
reason: format!("Failed to convert timestamp {input_value:?} to {schema_unit}"),
})?;
let ts_vector = match schema_unit {
TimeUnit::Second => {
TimestampSecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Millisecond => {
TimestampMillisecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Microsecond => {
TimestampMicrosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
TimeUnit::Nanosecond => {
TimestampNanosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
}
};
let rb = DfRecordBatch::try_new(df_schema.inner().clone(), vec![ts_vector.clone()])
.with_context(|_| ArrowSnafu {
context: format!("Failed to create record batch from {df_schema:?} and {ts_vector:?}"),
})?;
let eval_res = phy.evaluate(&rb).with_context(|_| DatafusionSnafu {
context: format!("Failed to evaluate physical expression {phy:?} on {rb:?}"),
})?;
if let Some(Some(ts)) = columnar_to_ts_vector(&eval_res)?.first() {
Ok(*ts)
} else {
UnexpectedSnafu {
reason: format!(
"Expected timestamp in expression {phy:?} but got {:?}",
eval_res
),
}
.fail()?
}
}
// TODO(discord9): a method to found out the precise time window
/// Find out the `Filter` Node corresponding to outermost `WHERE` and add a new filter expr to it
#[derive(Debug)]
pub struct AddFilterRewriter {
extra_filter: Expr,
is_rewritten: bool,
}
impl AddFilterRewriter {
fn new(filter: Expr) -> Self {
Self {
extra_filter: filter,
is_rewritten: false,
}
}
}
impl TreeNodeRewriter for AddFilterRewriter {
type Node = LogicalPlan;
fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
if self.is_rewritten {
return Ok(Transformed::no(node));
}
match node {
LogicalPlan::Filter(mut filter) if !filter.having => {
filter.predicate = filter.predicate.and(self.extra_filter.clone());
self.is_rewritten = true;
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
}
LogicalPlan::TableScan(_) => {
// add a new filter
let filter =
datafusion_expr::Filter::try_new(self.extra_filter.clone(), Arc::new(node))?;
self.is_rewritten = true;
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
}
_ => Ok(Transformed::no(node)),
}
}
}
fn df_plan_to_sql(plan: &LogicalPlan) -> Result<String, Error> {
/// A dialect that forces all identifiers to be quoted
struct ForceQuoteIdentifiers;
impl datafusion::sql::unparser::dialect::Dialect for ForceQuoteIdentifiers {
fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
if identifier.to_lowercase() != identifier {
Some('"')
} else {
None
}
}
}
let unparser = Unparser::new(&ForceQuoteIdentifiers);
// first make all column qualified
let sql = unparser
.plan_to_sql(plan)
.with_context(|_e| DatafusionSnafu {
context: format!("Failed to unparse logical plan {plan:?}"),
})?;
Ok(sql.to_string())
}
#[cfg(test)]
mod test {
use datafusion_common::tree_node::TreeNode;
use pretty_assertions::assert_eq;
use session::context::QueryContext;
use super::{sql_to_df_plan, *};
use crate::recording_rules::{df_plan_to_sql, AddFilterRewriter};
use crate::test_utils::create_test_query_engine;
#[tokio::test]
async fn test_sql_plan_convert() {
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
let old = r#"SELECT "NUMBER" FROM "UPPERCASE_NUMBERS_WITH_TS""#;
let new = sql_to_df_plan(ctx.clone(), query_engine.clone(), old, false)
.await
.unwrap();
let new_sql = df_plan_to_sql(&new).unwrap();
assert_eq!(
r#"SELECT "UPPERCASE_NUMBERS_WITH_TS"."NUMBER" FROM "UPPERCASE_NUMBERS_WITH_TS""#,
new_sql
);
}
#[tokio::test]
async fn test_add_filter() {
let testcases = vec![
(
"SELECT number FROM numbers_with_ts GROUP BY number","SELECT numbers_with_ts.number FROM numbers_with_ts WHERE (number > 4) GROUP BY numbers_with_ts.number"
),
(
"SELECT number FROM numbers_with_ts WHERE number < 2 OR number >10",
"SELECT numbers_with_ts.number FROM numbers_with_ts WHERE ((numbers_with_ts.number < 2) OR (numbers_with_ts.number > 10)) AND (number > 4)"
),
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window",
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE (number > 4) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
)
];
use datafusion_expr::{col, lit};
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
for (before, after) in testcases {
let sql = before;
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
.await
.unwrap();
let mut add_filter = AddFilterRewriter::new(col("number").gt(lit(4u32)));
let plan = plan.rewrite(&mut add_filter).unwrap().data;
let new_sql = df_plan_to_sql(&plan).unwrap();
assert_eq!(after, new_sql);
}
}
#[tokio::test]
async fn test_plan_time_window_lower_bound() {
use datafusion_expr::{col, lit};
let query_engine = create_test_query_engine();
let ctx = QueryContext::arc();
let testcases = [
// same alias is not same column
(
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts GROUP BY ts;",
Timestamp::new(1740394109, TimeUnit::Second),
(
"ts".to_string(),
Some(Timestamp::new(1740394109000, TimeUnit::Millisecond)),
Some(Timestamp::new(1740394109001, TimeUnit::Millisecond)),
),
r#"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:29' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:48:29.001' AS TIMESTAMP))) GROUP BY numbers_with_ts.ts"#
),
// complex time window index
(
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(1740394109, TimeUnit::Second),
(
"ts".to_string(),
Some(Timestamp::new(1740394080, TimeUnit::Second)),
Some(Timestamp::new(1740394140, TimeUnit::Second)),
),
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')"
),
// no time index
(
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;",
Timestamp::new(23, TimeUnit::Millisecond),
("ts".to_string(), None, None),
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;"
),
// time index
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// on spot
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(0, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// different time unit
(
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23_000_000, TimeUnit::Nanosecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// time index with other fields
(
"SELECT sum(number) as sum_up, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT sum(numbers_with_ts.number) AS sum_up, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
),
// time index with other pks
(
"SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number"
),
// subquery
(
"SELECT number, time_window FROM (SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number);",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT numbers_with_ts.number, time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number)"
),
// cte
(
"with cte as (select number, date_bin('5 minutes', ts) as time_window from numbers_with_ts GROUP BY time_window, number) select number, time_window from cte;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT cte.number, cte.time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number) AS cte"
),
// complex subquery without alias
(
"SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) GROUP BY number, time_window, bucket_name;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT sum(numbers_with_ts.number), numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window, bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) GROUP BY numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts), bucket_name"
),
// complex subquery alias
(
"SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) as cte GROUP BY number, time_window, bucket_name;",
Timestamp::new(23, TimeUnit::Millisecond),
(
"ts".to_string(),
Some(Timestamp::new(0, TimeUnit::Millisecond)),
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
),
"SELECT sum(cte.number), cte.number, date_bin('5 minutes', cte.ts) AS time_window, cte.bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) AS cte GROUP BY cte.number, date_bin('5 minutes', cte.ts), cte.bucket_name"
),
];
for (sql, current, expected, expected_unparsed) in testcases {
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, true)
.await
.unwrap();
let real =
find_plan_time_window_bound(&plan, current, ctx.clone(), query_engine.clone())
.await
.unwrap();
assert_eq!(expected, real);
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
.await
.unwrap();
let (col_name, lower, upper) = real;
let new_sql = if lower.is_some() {
let to_df_literal = |value| {
let value = Value::from(value);
value.try_to_scalar_value(&value.data_type()).unwrap()
};
let lower = to_df_literal(lower.unwrap());
let upper = to_df_literal(upper.unwrap());
let expr = col(&col_name)
.gt_eq(lit(lower))
.and(col(&col_name).lt_eq(lit(upper)));
let mut add_filter = AddFilterRewriter::new(expr);
let plan = plan.rewrite(&mut add_filter).unwrap().data;
df_plan_to_sql(&plan).unwrap()
} else {
sql.to_string()
};
assert_eq!(expected_unparsed, new_sql);
}
}
}

View File

@@ -1,815 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{BTreeMap, HashMap, HashSet};
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use api::v1::flow::FlowResponse;
use common_error::ext::BoxedError;
use common_meta::ddl::create_flow::FlowType;
use common_meta::key::flow::FlowMetadataManagerRef;
use common_meta::key::table_info::TableInfoManager;
use common_meta::key::TableMetadataManagerRef;
use common_telemetry::tracing::warn;
use common_telemetry::{debug, info};
use common_time::Timestamp;
use datafusion::sql::unparser::expr_to_sql;
use datafusion_common::tree_node::TreeNode;
use datatypes::value::Value;
use query::QueryEngineRef;
use session::context::QueryContextRef;
use snafu::{ensure, OptionExt, ResultExt};
use store_api::storage::RegionId;
use table::metadata::TableId;
use tokio::sync::oneshot::error::TryRecvError;
use tokio::sync::{oneshot, RwLock};
use tokio::time::Instant;
use super::frontend_client::FrontendClient;
use super::{df_plan_to_sql, AddFilterRewriter, TimeWindowExpr};
use crate::adapter::{CreateFlowArgs, FlowId, TableName};
use crate::error::{
DatafusionSnafu, DatatypesSnafu, ExternalSnafu, FlowAlreadyExistSnafu, InternalSnafu,
TimeSnafu, UnexpectedSnafu,
};
use crate::metrics::{METRIC_FLOW_RULE_ENGINE_QUERY_TIME, METRIC_FLOW_RULE_ENGINE_SLOW_QUERY};
use crate::recording_rules::{find_time_window_expr, sql_to_df_plan};
use crate::Error;
/// TODO(discord9): make those constants configurable
/// The default rule engine query timeout is 10 minutes
pub const DEFAULT_RULE_ENGINE_QUERY_TIMEOUT: Duration = Duration::from_secs(10 * 60);
/// will output a warn log for any query that runs for more that 1 minutes, and also every 1 minutes when that query is still running
pub const SLOW_QUERY_THRESHOLD: Duration = Duration::from_secs(60);
/// TODO(discord9): determine how to configure refresh rate
pub struct RecordingRuleEngine {
tasks: RwLock<BTreeMap<FlowId, RecordingRuleTask>>,
shutdown_txs: RwLock<BTreeMap<FlowId, oneshot::Sender<()>>>,
frontend_client: Arc<FrontendClient>,
flow_metadata_manager: FlowMetadataManagerRef,
table_meta: TableMetadataManagerRef,
engine: QueryEngineRef,
}
impl RecordingRuleEngine {
pub fn new(
frontend_client: Arc<FrontendClient>,
engine: QueryEngineRef,
flow_metadata_manager: FlowMetadataManagerRef,
table_meta: TableMetadataManagerRef,
) -> Self {
Self {
tasks: Default::default(),
shutdown_txs: Default::default(),
frontend_client,
flow_metadata_manager,
table_meta,
engine,
}
}
pub async fn handle_inserts(
&self,
request: api::v1::region::InsertRequests,
) -> Result<FlowResponse, Error> {
let table_info_mgr = self.table_meta.table_info_manager();
let mut group_by_table_name: HashMap<TableName, Vec<api::v1::Rows>> = HashMap::new();
for r in request.requests {
let tid = RegionId::from(r.region_id).table_id();
let name = get_table_name(table_info_mgr, &tid).await?;
let entry = group_by_table_name.entry(name).or_default();
if let Some(rows) = r.rows {
entry.push(rows);
}
}
for (_flow_id, task) in self.tasks.read().await.iter() {
let src_table_names = &task.source_table_names;
for src_table_name in src_table_names {
if let Some(entry) = group_by_table_name.get(src_table_name) {
let Some(expr) = &task.time_window_expr else {
continue;
};
let involved_time_windows = expr.handle_rows(entry.clone()).await?;
let mut state = task.state.write().await;
state
.dirty_time_windows
.add_lower_bounds(involved_time_windows.into_iter());
}
}
}
Ok(Default::default())
}
}
async fn get_table_name(zelf: &TableInfoManager, table_id: &TableId) -> Result<TableName, Error> {
zelf.get(*table_id)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
.with_context(|| UnexpectedSnafu {
reason: format!("Table id = {:?}, couldn't found table name", table_id),
})
.map(|name| name.table_name())
.map(|name| [name.catalog_name, name.schema_name, name.table_name])
}
const MIN_REFRESH_DURATION: Duration = Duration::new(5, 0);
impl RecordingRuleEngine {
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
let CreateFlowArgs {
flow_id,
sink_table_name,
source_table_ids,
create_if_not_exists,
or_replace,
expire_after,
comment: _,
sql,
flow_options,
query_ctx,
} = args;
// or replace logic
{
let is_exist = self.tasks.read().await.contains_key(&flow_id);
match (create_if_not_exists, or_replace, is_exist) {
// if replace, ignore that old flow exists
(_, true, true) => {
info!("Replacing flow with id={}", flow_id);
}
(false, false, true) => FlowAlreadyExistSnafu { id: flow_id }.fail()?,
// already exists, and not replace, return None
(true, false, true) => {
info!("Flow with id={} already exists, do nothing", flow_id);
return Ok(None);
}
// continue as normal
(_, _, false) => (),
}
}
let flow_type = flow_options.get(FlowType::FLOW_TYPE_KEY);
ensure!(
flow_type == Some(&FlowType::RecordingRule.to_string()) || flow_type.is_none(),
UnexpectedSnafu {
reason: format!("Flow type is not RecordingRule nor None, got {flow_type:?}")
}
);
let Some(query_ctx) = query_ctx else {
UnexpectedSnafu {
reason: "Query context is None".to_string(),
}
.fail()?
};
let query_ctx = Arc::new(query_ctx);
let mut source_table_names = Vec::new();
for src_id in source_table_ids {
let table_name = self
.table_meta
.table_info_manager()
.get(src_id)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?
.with_context(|| UnexpectedSnafu {
reason: format!("Table id = {:?}, couldn't found table name", src_id),
})
.map(|name| name.table_name())
.map(|name| [name.catalog_name, name.schema_name, name.table_name])?;
source_table_names.push(table_name);
}
let (tx, rx) = oneshot::channel();
let plan = sql_to_df_plan(query_ctx.clone(), self.engine.clone(), &sql, true).await?;
let (column_name, time_window_expr, _, df_schema) = find_time_window_expr(
&plan,
self.engine.engine_state().catalog_manager().clone(),
query_ctx.clone(),
)
.await?;
let phy_expr = time_window_expr
.map(|expr| TimeWindowExpr::from_expr(&expr, &column_name, &df_schema))
.transpose()?;
info!("Flow id={}, found time window expr={:?}", flow_id, phy_expr);
let task = RecordingRuleTask::new(
flow_id,
&sql,
phy_expr,
expire_after,
sink_table_name,
source_table_names,
query_ctx,
rx,
);
let task_inner = task.clone();
let engine = self.engine.clone();
let frontend = self.frontend_client.clone();
// TODO(discord9): also save handle & use time wheel or what for better
let _handle = common_runtime::spawn_global(async move {
match task_inner.start_executing(engine, frontend).await {
Ok(()) => info!("Flow {} shutdown", task_inner.flow_id),
Err(err) => common_telemetry::error!(
"Flow {} encounter unrecoverable error: {err:?}",
task_inner.flow_id
),
}
});
// TODO(discord9): deal with replace logic
let replaced_old_task_opt = self.tasks.write().await.insert(flow_id, task);
drop(replaced_old_task_opt);
self.shutdown_txs.write().await.insert(flow_id, tx);
Ok(Some(flow_id))
}
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
if self.tasks.write().await.remove(&flow_id).is_none() {
warn!("Flow {flow_id} not found in tasks")
}
let Some(tx) = self.shutdown_txs.write().await.remove(&flow_id) else {
UnexpectedSnafu {
reason: format!("Can't found shutdown tx for flow {flow_id}"),
}
.fail()?
};
if tx.send(()).is_err() {
warn!("Fail to shutdown flow {flow_id} due to receiver already dropped, maybe flow {flow_id} is already dropped?")
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct RecordingRuleTask {
pub flow_id: FlowId,
query: String,
pub time_window_expr: Option<TimeWindowExpr>,
/// in seconds
pub expire_after: Option<i64>,
sink_table_name: [String; 3],
source_table_names: HashSet<[String; 3]>,
state: Arc<RwLock<RecordingRuleState>>,
}
impl RecordingRuleTask {
#[allow(clippy::too_many_arguments)]
pub fn new(
flow_id: FlowId,
query: &str,
time_window_expr: Option<TimeWindowExpr>,
expire_after: Option<i64>,
sink_table_name: [String; 3],
source_table_names: Vec<[String; 3]>,
query_ctx: QueryContextRef,
shutdown_rx: oneshot::Receiver<()>,
) -> Self {
Self {
flow_id,
query: query.to_string(),
time_window_expr,
expire_after,
sink_table_name,
source_table_names: source_table_names.into_iter().collect(),
state: Arc::new(RwLock::new(RecordingRuleState::new(query_ctx, shutdown_rx))),
}
}
}
impl RecordingRuleTask {
/// This should be called in a new tokio task
pub async fn start_executing(
&self,
engine: QueryEngineRef,
frontend_client: Arc<FrontendClient>,
) -> Result<(), Error> {
// only first query don't need upper bound
let mut is_first = true;
loop {
// FIXME(discord9): test if need upper bound also works
let new_query = self.gen_query_with_time_window(engine.clone()).await?;
let insert_into = if let Some(new_query) = new_query {
format!(
"INSERT INTO {}.{}.{} {}",
self.sink_table_name[0],
self.sink_table_name[1],
self.sink_table_name[2],
new_query
)
} else {
tokio::time::sleep(MIN_REFRESH_DURATION).await;
continue;
};
if is_first {
is_first = false;
}
let instant = Instant::now();
let flow_id = self.flow_id;
let db_client = frontend_client.get_database_client().await?;
let peer_addr = db_client.peer.addr;
debug!(
"Executing flow {flow_id}(expire_after={:?} secs) on {:?} with query {}",
self.expire_after, peer_addr, &insert_into
);
let timer = METRIC_FLOW_RULE_ENGINE_QUERY_TIME
.with_label_values(&[flow_id.to_string().as_str()])
.start_timer();
let res = db_client.database.sql(&insert_into).await;
drop(timer);
let elapsed = instant.elapsed();
if let Ok(res1) = &res {
debug!(
"Flow {flow_id} executed, result: {res1:?}, elapsed: {:?}",
elapsed
);
} else if let Err(res) = &res {
warn!(
"Failed to execute Flow {flow_id} on frontend {}, result: {res:?}, elapsed: {:?} with query: {}",
peer_addr, elapsed, &insert_into
);
}
// record slow query
if elapsed >= SLOW_QUERY_THRESHOLD {
warn!(
"Flow {flow_id} on frontend {} executed for {:?} before complete, query: {}",
peer_addr, elapsed, &insert_into
);
METRIC_FLOW_RULE_ENGINE_SLOW_QUERY
.with_label_values(&[flow_id.to_string().as_str(), &insert_into, &peer_addr])
.observe(elapsed.as_secs_f64());
}
self.state
.write()
.await
.after_query_exec(elapsed, res.is_ok());
// drop the result to free client-related resources
drop(res);
let sleep_until = {
let mut state = self.state.write().await;
match state.shutdown_rx.try_recv() {
Ok(()) => break Ok(()),
Err(TryRecvError::Closed) => {
warn!("Unexpected shutdown flow {flow_id}, shutdown anyway");
break Ok(());
}
Err(TryRecvError::Empty) => (),
}
state.get_next_start_query_time(None)
};
tokio::time::sleep_until(sleep_until).await;
}
}
/// will merge and use the first ten time window in query
async fn gen_query_with_time_window(
&self,
engine: QueryEngineRef,
) -> Result<Option<String>, Error> {
let query_ctx = self.state.read().await.query_ctx.clone();
let start = SystemTime::now();
let since_the_epoch = start
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");
let low_bound = self
.expire_after
.map(|e| since_the_epoch.as_secs() - e as u64)
.unwrap_or(u64::MIN);
let low_bound = Timestamp::new_second(low_bound as i64);
// TODO(discord9): use time window expr to get the precise expire lower bound
let expire_time_window_bound = self
.time_window_expr
.as_ref()
.map(|expr| expr.eval(low_bound))
.transpose()?;
let new_sql = {
let expr = {
match expire_time_window_bound {
Some((Some(l), Some(u))) => {
let window_size = u.sub(&l).with_context(|| UnexpectedSnafu {
reason: format!("Can't get window size from {u:?} - {l:?}"),
})?;
let col_name = self
.time_window_expr
.as_ref()
.map(|expr| expr.column_name.clone())
.with_context(|| UnexpectedSnafu {
reason: format!(
"Flow id={:?}, Failed to get column name from time window expr",
self.flow_id
),
})?;
self.state
.write()
.await
.dirty_time_windows
.gen_filter_exprs(&col_name, Some(l), window_size, self)?
}
_ => {
debug!(
"Flow id = {:?}, can't get window size: precise_lower_bound={expire_time_window_bound:?}, using the same query", self.flow_id
);
// since no time window lower/upper bound is found, just return the original query
return Ok(Some(self.query.clone()));
}
}
};
debug!(
"Flow id={:?}, Generated filter expr: {:?}",
self.flow_id,
expr.as_ref()
.map(|expr| expr_to_sql(expr).with_context(|_| DatafusionSnafu {
context: format!("Failed to generate filter expr from {expr:?}"),
}))
.transpose()?
.map(|s| s.to_string())
);
let Some(expr) = expr else {
// no new data, hence no need to update
debug!("Flow id={:?}, no new data, not update", self.flow_id);
return Ok(None);
};
let mut add_filter = AddFilterRewriter::new(expr);
// make a not optimized plan for clearer unparse
let plan =
sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.query, false).await?;
let plan = plan
.clone()
.rewrite(&mut add_filter)
.with_context(|_| DatafusionSnafu {
context: format!("Failed to rewrite plan {plan:?}"),
})?
.data;
df_plan_to_sql(&plan)?
};
Ok(Some(new_sql))
}
}
#[derive(Debug)]
pub struct RecordingRuleState {
query_ctx: QueryContextRef,
/// last query complete time
last_update_time: Instant,
/// last time query duration
last_query_duration: Duration,
/// Dirty Time windows need to be updated
/// mapping of `start -> end` and non-overlapping
dirty_time_windows: DirtyTimeWindows,
exec_state: ExecState,
shutdown_rx: oneshot::Receiver<()>,
}
#[derive(Debug, Clone, Default)]
pub struct DirtyTimeWindows {
windows: BTreeMap<Timestamp, Option<Timestamp>>,
}
fn to_df_literal(value: Timestamp) -> Result<datafusion_common::ScalarValue, Error> {
let value = Value::from(value);
let value = value
.try_to_scalar_value(&value.data_type())
.with_context(|_| DatatypesSnafu {
extra: format!("Failed to convert to scalar value: {}", value),
})?;
Ok(value)
}
impl DirtyTimeWindows {
/// Time window merge distance
const MERGE_DIST: i32 = 3;
/// Maximum number of filters allowed in a single query
const MAX_FILTER_NUM: usize = 20;
/// Add lower bounds to the dirty time windows. Upper bounds are ignored.
///
/// # Arguments
///
/// * `lower_bounds` - An iterator of lower bounds to be added.
pub fn add_lower_bounds(&mut self, lower_bounds: impl Iterator<Item = Timestamp>) {
for lower_bound in lower_bounds {
let entry = self.windows.entry(lower_bound);
entry.or_insert(None);
}
}
/// Generate all filter expressions consuming all time windows
pub fn gen_filter_exprs(
&mut self,
col_name: &str,
expire_lower_bound: Option<Timestamp>,
window_size: chrono::Duration,
task_ctx: &RecordingRuleTask,
) -> Result<Option<datafusion_expr::Expr>, Error> {
debug!(
"expire_lower_bound: {:?}, window_size: {:?}",
expire_lower_bound.map(|t| t.to_iso8601_string()),
window_size
);
self.merge_dirty_time_windows(window_size, expire_lower_bound)?;
if self.windows.len() > Self::MAX_FILTER_NUM {
let first_time_window = self.windows.first_key_value();
let last_time_window = self.windows.last_key_value();
warn!(
"Flow id = {:?}, too many time windows: {}, only the first {} are taken for this query, the group by expression might be wrong. Time window expr={:?}, expire_after={:?}, first_time_window={:?}, last_time_window={:?}, the original query: {:?}",
task_ctx.flow_id,
self.windows.len(),
Self::MAX_FILTER_NUM,
task_ctx.time_window_expr,
task_ctx.expire_after,
first_time_window,
last_time_window,
task_ctx.query
);
}
// get the first `MAX_FILTER_NUM` time windows
let nth = self
.windows
.iter()
.nth(Self::MAX_FILTER_NUM)
.map(|(key, _)| *key);
let first_nth = {
if let Some(nth) = nth {
let mut after = self.windows.split_off(&nth);
std::mem::swap(&mut self.windows, &mut after);
after
} else {
std::mem::take(&mut self.windows)
}
};
let mut expr_lst = vec![];
for (start, end) in first_nth.into_iter() {
debug!(
"Time window start: {:?}, end: {:?}",
start.to_iso8601_string(),
end.map(|t| t.to_iso8601_string())
);
use datafusion_expr::{col, lit};
let lower = to_df_literal(start)?;
let upper = end.map(to_df_literal).transpose()?;
let expr = if let Some(upper) = upper {
col(col_name)
.gt_eq(lit(lower))
.and(col(col_name).lt(lit(upper)))
} else {
col(col_name).gt_eq(lit(lower))
};
expr_lst.push(expr);
}
let expr = expr_lst.into_iter().reduce(|a, b| a.or(b));
Ok(expr)
}
/// Merge time windows that overlaps or get too close
pub fn merge_dirty_time_windows(
&mut self,
window_size: chrono::Duration,
expire_lower_bound: Option<Timestamp>,
) -> Result<(), Error> {
let mut new_windows = BTreeMap::new();
let mut prev_tw = None;
for (lower_bound, upper_bound) in std::mem::take(&mut self.windows) {
// filter out expired time window
if let Some(expire_lower_bound) = expire_lower_bound {
if lower_bound <= expire_lower_bound {
continue;
}
}
let Some(prev_tw) = &mut prev_tw else {
prev_tw = Some((lower_bound, upper_bound));
continue;
};
let std_window_size = window_size.to_std().map_err(|e| {
InternalSnafu {
reason: e.to_string(),
}
.build()
})?;
// if cur.lower - prev.upper <= window_size * 2, merge
let prev_upper = prev_tw
.1
.unwrap_or(prev_tw.0.add_duration(std_window_size).context(TimeSnafu)?);
prev_tw.1 = Some(prev_upper);
let cur_upper = upper_bound.unwrap_or(
lower_bound
.add_duration(std_window_size)
.context(TimeSnafu)?,
);
if lower_bound
.sub(&prev_upper)
.map(|dist| dist <= window_size * Self::MERGE_DIST)
.unwrap_or(false)
{
prev_tw.1 = Some(cur_upper);
} else {
new_windows.insert(prev_tw.0, prev_tw.1);
*prev_tw = (lower_bound, Some(cur_upper));
}
}
if let Some(prev_tw) = prev_tw {
new_windows.insert(prev_tw.0, prev_tw.1);
}
self.windows = new_windows;
Ok(())
}
}
impl RecordingRuleState {
pub fn new(query_ctx: QueryContextRef, shutdown_rx: oneshot::Receiver<()>) -> Self {
Self {
query_ctx,
last_update_time: Instant::now(),
last_query_duration: Duration::from_secs(0),
dirty_time_windows: Default::default(),
exec_state: ExecState::Idle,
shutdown_rx,
}
}
/// called after last query is done
/// `is_succ` indicate whether the last query is successful
pub fn after_query_exec(&mut self, elapsed: Duration, _is_succ: bool) {
self.exec_state = ExecState::Idle;
self.last_query_duration = elapsed;
self.last_update_time = Instant::now();
}
/// wait for at least `last_query_duration`, at most `max_timeout` to start next query
pub fn get_next_start_query_time(&self, max_timeout: Option<Duration>) -> Instant {
let next_duration = max_timeout
.unwrap_or(self.last_query_duration)
.min(self.last_query_duration);
let next_duration = next_duration.max(MIN_REFRESH_DURATION);
self.last_update_time + next_duration
}
}
#[derive(Debug, Clone)]
enum ExecState {
Idle,
Executing,
}
#[cfg(test)]
mod test {
use pretty_assertions::assert_eq;
use super::*;
#[test]
fn test_merge_dirty_time_windows() {
let mut dirty = DirtyTimeWindows::default();
dirty.add_lower_bounds(
vec![
Timestamp::new_second(0),
Timestamp::new_second((1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
]
.into_iter(),
);
dirty
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60), None)
.unwrap();
// just enough to merge
assert_eq!(
dirty.windows,
BTreeMap::from([(
Timestamp::new_second(0),
Some(Timestamp::new_second(
(2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
))
)])
);
// separate time window
let mut dirty = DirtyTimeWindows::default();
dirty.add_lower_bounds(
vec![
Timestamp::new_second(0),
Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
]
.into_iter(),
);
dirty
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60), None)
.unwrap();
// just enough to merge
assert_eq!(
BTreeMap::from([
(
Timestamp::new_second(0),
Some(Timestamp::new_second(5 * 60))
),
(
Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
Some(Timestamp::new_second(
(3 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
))
)
]),
dirty.windows
);
// overlapping
let mut dirty = DirtyTimeWindows::default();
dirty.add_lower_bounds(
vec![
Timestamp::new_second(0),
Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
]
.into_iter(),
);
dirty
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60), None)
.unwrap();
// just enough to merge
assert_eq!(
BTreeMap::from([(
Timestamp::new_second(0),
Some(Timestamp::new_second(
(1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
))
),]),
dirty.windows
);
// expired
let mut dirty = DirtyTimeWindows::default();
dirty.add_lower_bounds(
vec![
Timestamp::new_second(0),
Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
]
.into_iter(),
);
dirty
.merge_dirty_time_windows(
chrono::Duration::seconds(5 * 60),
Some(Timestamp::new_second(
(DirtyTimeWindows::MERGE_DIST as i64) * 6 * 60,
)),
)
.unwrap();
// just enough to merge
assert_eq!(BTreeMap::from([]), dirty.windows);
}
}

View File

@@ -1,163 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Frontend client to run flow as recording rule which is time-window-aware normal query triggered every tick set by user
use std::sync::Arc;
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
use common_error::ext::BoxedError;
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
use common_meta::cluster::{NodeInfo, NodeInfoKey, Role};
use common_meta::peer::Peer;
use common_meta::rpc::store::RangeRequest;
use meta_client::client::MetaClient;
use snafu::ResultExt;
use crate::error::{ExternalSnafu, UnexpectedSnafu};
use crate::recording_rules::engine::DEFAULT_RULE_ENGINE_QUERY_TIMEOUT;
use crate::Error;
fn default_channel_mgr() -> ChannelManager {
let cfg = ChannelConfig::new().timeout(DEFAULT_RULE_ENGINE_QUERY_TIMEOUT);
ChannelManager::with_config(cfg)
}
fn client_from_urls(addrs: Vec<String>) -> Client {
Client::with_manager_and_urls(default_channel_mgr(), addrs)
}
/// A simple frontend client able to execute sql using grpc protocol
#[derive(Debug)]
pub enum FrontendClient {
Distributed {
meta_client: Arc<MetaClient>,
channel_mgr: ChannelManager,
},
Standalone {
/// for the sake of simplicity still use grpc even in standalone mode
/// notice the client here should all be lazy, so that can wait after frontend is booted then make conn
/// TODO(discord9): not use grpc under standalone mode
database_client: DatabaseWithPeer,
},
}
#[derive(Debug, Clone)]
pub struct DatabaseWithPeer {
pub database: Database,
pub peer: Peer,
}
impl DatabaseWithPeer {
fn new(database: Database, peer: Peer) -> Self {
Self { database, peer }
}
}
impl FrontendClient {
pub fn from_meta_client(meta_client: Arc<MetaClient>) -> Self {
Self::Distributed {
meta_client,
channel_mgr: default_channel_mgr(),
}
}
pub fn from_static_grpc_addr(addr: String) -> Self {
let peer = Peer {
id: 0,
addr: addr.clone(),
};
let mgr = default_channel_mgr();
let client = Client::with_manager_and_urls(mgr.clone(), vec![addr]);
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
Self::Standalone {
database_client: DatabaseWithPeer::new(database, peer),
}
}
}
impl FrontendClient {
async fn scan_for_frontend(&self) -> Result<Vec<(NodeInfoKey, NodeInfo)>, Error> {
let Self::Distributed { meta_client, .. } = self else {
return Ok(vec![]);
};
let cluster_client = meta_client
.cluster_client()
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let cluster_id = meta_client.id().0;
let prefix = NodeInfoKey::key_prefix_with_role(cluster_id, Role::Frontend);
let req = RangeRequest::new().with_prefix(prefix);
let resp = cluster_client
.range(req)
.await
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let mut res = Vec::with_capacity(resp.kvs.len());
for kv in resp.kvs {
let key = NodeInfoKey::try_from(kv.key)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
let val = NodeInfo::try_from(kv.value)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
res.push((key, val));
}
Ok(res)
}
/// Get the database with max `last_activity_ts`
async fn get_last_active_frontend(&self) -> Result<DatabaseWithPeer, Error> {
if let Self::Standalone { database_client } = self {
return Ok(database_client.clone());
}
match &self {
Self::Standalone { database_client } => Ok(database_client.clone()),
Self::Distributed {
meta_client: _,
channel_mgr,
} => {
let frontends = self.scan_for_frontend().await?;
let mut last_activity_ts = i64::MIN;
let mut peer = None;
for (_key, val) in frontends.iter() {
if val.last_activity_ts > last_activity_ts {
last_activity_ts = val.last_activity_ts;
peer = Some(val.peer.clone());
}
}
let Some(peer) = peer else {
UnexpectedSnafu {
reason: format!("No frontend available: {:?}", frontends),
}
.fail()?
};
let client =
Client::with_manager_and_urls(channel_mgr.clone(), vec![peer.addr.clone()]);
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
Ok(DatabaseWithPeer::new(database, peer))
}
}
}
/// Get a database client, and possibly update it before returning.
pub async fn get_database_client(&self) -> Result<DatabaseWithPeer, Error> {
match self {
Self::Standalone { database_client } => Ok(database_client.clone()),
Self::Distributed { meta_client: _, .. } => self.get_last_active_frontend().await,
}
}
}

View File

@@ -57,7 +57,6 @@ use crate::error::{
}; };
use crate::heartbeat::HeartbeatTask; use crate::heartbeat::HeartbeatTask;
use crate::metrics::{METRIC_FLOW_PROCESSING_TIME, METRIC_FLOW_ROWS}; use crate::metrics::{METRIC_FLOW_PROCESSING_TIME, METRIC_FLOW_ROWS};
use crate::recording_rules::{FrontendClient, RecordingRuleEngine};
use crate::transform::register_function_to_query_engine; use crate::transform::register_function_to_query_engine;
use crate::utils::{SizeReportSender, StateReportHandler}; use crate::utils::{SizeReportSender, StateReportHandler};
use crate::{Error, FlowWorkerManager, FlownodeOptions}; use crate::{Error, FlowWorkerManager, FlownodeOptions};
@@ -246,7 +245,6 @@ impl FlownodeInstance {
self.server.shutdown().await.context(ShutdownServerSnafu)?; self.server.shutdown().await.context(ShutdownServerSnafu)?;
if let Some(task) = &self.heartbeat_task { if let Some(task) = &self.heartbeat_task {
info!("Close heartbeat task for flownode");
task.shutdown(); task.shutdown();
} }
@@ -273,8 +271,6 @@ pub struct FlownodeBuilder {
heartbeat_task: Option<HeartbeatTask>, heartbeat_task: Option<HeartbeatTask>,
/// receive a oneshot sender to send state size report /// receive a oneshot sender to send state size report
state_report_handler: Option<StateReportHandler>, state_report_handler: Option<StateReportHandler>,
/// Client to send sql to frontend
frontend_client: Arc<FrontendClient>,
} }
impl FlownodeBuilder { impl FlownodeBuilder {
@@ -285,7 +281,6 @@ impl FlownodeBuilder {
table_meta: TableMetadataManagerRef, table_meta: TableMetadataManagerRef,
catalog_manager: CatalogManagerRef, catalog_manager: CatalogManagerRef,
flow_metadata_manager: FlowMetadataManagerRef, flow_metadata_manager: FlowMetadataManagerRef,
frontend_client: Arc<FrontendClient>,
) -> Self { ) -> Self {
Self { Self {
opts, opts,
@@ -295,7 +290,6 @@ impl FlownodeBuilder {
flow_metadata_manager, flow_metadata_manager,
heartbeat_task: None, heartbeat_task: None,
state_report_handler: None, state_report_handler: None,
frontend_client,
} }
} }
@@ -453,14 +447,7 @@ impl FlownodeBuilder {
let node_id = self.opts.node_id.map(|id| id as u32); let node_id = self.opts.node_id.map(|id| id as u32);
let rule_engine = RecordingRuleEngine::new( let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta);
self.frontend_client.clone(),
query_engine.clone(),
self.flow_metadata_manager.clone(),
table_meta.clone(),
);
let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta, rule_engine);
for worker_id in 0..num_workers { for worker_id in 0..num_workers {
let (tx, rx) = oneshot::channel(); let (tx, rx) = oneshot::channel();

View File

@@ -86,8 +86,7 @@ pub fn create_test_query_engine() -> Arc<dyn QueryEngine> {
let schema = vec![ let schema = vec![
datatypes::schema::ColumnSchema::new("number", CDT::uint32_datatype(), false), datatypes::schema::ColumnSchema::new("number", CDT::uint32_datatype(), false),
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false) datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false),
.with_time_index(true),
]; ];
let mut columns = vec![]; let mut columns = vec![];
let numbers = (1..=10).collect_vec(); let numbers = (1..=10).collect_vec();
@@ -115,37 +114,6 @@ pub fn create_test_query_engine() -> Arc<dyn QueryEngine> {
}; };
catalog_list.register_table_sync(req_with_ts).unwrap(); catalog_list.register_table_sync(req_with_ts).unwrap();
let schema = vec![
datatypes::schema::ColumnSchema::new("NUMBER", CDT::uint32_datatype(), false),
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false)
.with_time_index(true),
];
let mut columns = vec![];
let numbers = (1..=10).collect_vec();
let column: VectorRef = Arc::new(<u32 as Scalar>::VectorType::from_vec(numbers));
columns.push(column);
let ts = (1..=10).collect_vec();
let mut builder = TimestampMillisecondVectorBuilder::with_capacity(10);
ts.into_iter()
.map(|v| builder.push(Some(TimestampMillisecond::new(v))))
.count();
let column: VectorRef = builder.to_vector_cloned();
columns.push(column);
let schema = Arc::new(Schema::new(schema));
let recordbatch = common_recordbatch::RecordBatch::new(schema, columns).unwrap();
let table = MemTable::table("UPPERCASE_NUMBERS_WITH_TS", recordbatch);
let req_with_ts = RegisterTableRequest {
catalog: DEFAULT_CATALOG_NAME.to_string(),
schema: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "UPPERCASE_NUMBERS_WITH_TS".to_string(),
table_id: 1025,
table,
};
catalog_list.register_table_sync(req_with_ts).unwrap();
let factory = query::QueryEngineFactory::new(catalog_list, None, None, None, None, false); let factory = query::QueryEngineFactory::new(catalog_list, None, None, None, None, false);
let engine = factory.query_engine(); let engine = factory.query_engine();

View File

@@ -23,7 +23,7 @@ use common_meta::heartbeat::handler::{
}; };
use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage}; use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage};
use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message; use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message;
use common_telemetry::{debug, error, info}; use common_telemetry::{debug, error, info, warn};
use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient}; use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient};
use servers::addrs; use servers::addrs;
use servers::heartbeat_options::HeartbeatOptions; use servers::heartbeat_options::HeartbeatOptions;
@@ -42,8 +42,8 @@ use crate::metrics::{HEARTBEAT_RECV_COUNT, HEARTBEAT_SENT_COUNT};
pub struct HeartbeatTask { pub struct HeartbeatTask {
peer_addr: String, peer_addr: String,
meta_client: Arc<MetaClient>, meta_client: Arc<MetaClient>,
report_interval: u64, report_interval: Duration,
retry_interval: u64, retry_interval: Duration,
resp_handler_executor: HeartbeatResponseHandlerExecutorRef, resp_handler_executor: HeartbeatResponseHandlerExecutorRef,
start_time_ms: u64, start_time_ms: u64,
} }
@@ -58,8 +58,8 @@ impl HeartbeatTask {
HeartbeatTask { HeartbeatTask {
peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)), peer_addr: addrs::resolve_addr(&opts.grpc.bind_addr, Some(&opts.grpc.server_addr)),
meta_client, meta_client,
report_interval: heartbeat_opts.interval.as_millis() as u64, report_interval: heartbeat_opts.interval,
retry_interval: heartbeat_opts.retry_interval.as_millis() as u64, retry_interval: heartbeat_opts.retry_interval,
resp_handler_executor, resp_handler_executor,
start_time_ms: common_time::util::current_time_millis() as u64, start_time_ms: common_time::util::current_time_millis() as u64,
} }
@@ -103,13 +103,15 @@ impl HeartbeatTask {
HEARTBEAT_RECV_COUNT.with_label_values(&["success"]).inc(); HEARTBEAT_RECV_COUNT.with_label_values(&["success"]).inc();
} }
} }
Ok(None) => break, Ok(None) => {
warn!("Heartbeat response stream closed");
capture_self.start_with_retry(retry_interval).await;
break;
}
Err(e) => { Err(e) => {
HEARTBEAT_RECV_COUNT.with_label_values(&["error"]).inc(); HEARTBEAT_RECV_COUNT.with_label_values(&["error"]).inc();
error!(e; "Occur error while reading heartbeat response"); error!(e; "Occur error while reading heartbeat response");
capture_self capture_self.start_with_retry(retry_interval).await;
.start_with_retry(Duration::from_millis(retry_interval))
.await;
break; break;
} }
@@ -177,12 +179,13 @@ impl HeartbeatTask {
if let Some(message) = message { if let Some(message) = message {
Self::new_heartbeat_request(&heartbeat_request, Some(message)) Self::new_heartbeat_request(&heartbeat_request, Some(message))
} else { } else {
warn!("Sender has been dropped, exiting the heartbeat loop");
// Receives None that means Sender was dropped, we need to break the current loop // Receives None that means Sender was dropped, we need to break the current loop
break break
} }
} }
_ = &mut sleep => { _ = &mut sleep => {
sleep.as_mut().reset(Instant::now() + Duration::from_millis(report_interval)); sleep.as_mut().reset(Instant::now() + report_interval);
Self::new_heartbeat_request(&heartbeat_request, None) Self::new_heartbeat_request(&heartbeat_request, None)
} }
}; };

View File

@@ -42,7 +42,16 @@ impl BloomFilterApplier {
) -> Result<Vec<Range<usize>>> { ) -> Result<Vec<Range<usize>>> {
let rows_per_segment = self.meta.rows_per_segment as usize; let rows_per_segment = self.meta.rows_per_segment as usize;
let start_seg = search_range.start / rows_per_segment; let start_seg = search_range.start / rows_per_segment;
let end_seg = search_range.end.div_ceil(rows_per_segment); let mut end_seg = search_range.end.div_ceil(rows_per_segment);
if end_seg == self.meta.segment_loc_indices.len() + 1 {
// In a previous version, there was a bug where if the last segment was all null,
// this segment would not be written into the index. This caused the slice
// `self.meta.segment_loc_indices[start_seg..end_seg]` to go out of bounds due to
// the missing segment. Since the `search` function does not search for nulls,
// we can simply ignore the last segment in this buggy scenario.
end_seg -= 1;
}
let locs = &self.meta.segment_loc_indices[start_seg..end_seg]; let locs = &self.meta.segment_loc_indices[start_seg..end_seg];

View File

@@ -64,6 +64,9 @@ pub struct BloomFilterCreator {
/// Storage for finalized Bloom filters. /// Storage for finalized Bloom filters.
finalized_bloom_filters: FinalizedBloomFilterStorage, finalized_bloom_filters: FinalizedBloomFilterStorage,
/// Row count that finalized so far.
finalized_row_count: usize,
/// Global memory usage of the bloom filter creator. /// Global memory usage of the bloom filter creator.
global_memory_usage: Arc<AtomicUsize>, global_memory_usage: Arc<AtomicUsize>,
} }
@@ -96,6 +99,7 @@ impl BloomFilterCreator {
global_memory_usage, global_memory_usage,
global_memory_usage_threshold, global_memory_usage_threshold,
), ),
finalized_row_count: 0,
} }
} }
@@ -136,6 +140,7 @@ impl BloomFilterCreator {
if self.accumulated_row_count % self.rows_per_segment == 0 { if self.accumulated_row_count % self.rows_per_segment == 0 {
self.finalize_segment().await?; self.finalize_segment().await?;
self.finalized_row_count = self.accumulated_row_count;
} }
} }
@@ -161,6 +166,7 @@ impl BloomFilterCreator {
if self.accumulated_row_count % self.rows_per_segment == 0 { if self.accumulated_row_count % self.rows_per_segment == 0 {
self.finalize_segment().await?; self.finalize_segment().await?;
self.finalized_row_count = self.accumulated_row_count;
} }
Ok(()) Ok(())
@@ -168,7 +174,7 @@ impl BloomFilterCreator {
/// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer. /// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer.
pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> { pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> {
if !self.cur_seg_distinct_elems.is_empty() { if self.accumulated_row_count > self.finalized_row_count {
self.finalize_segment().await?; self.finalize_segment().await?;
} }
@@ -406,4 +412,35 @@ mod tests {
assert!(bf.contains(&b"f")); assert!(bf.contains(&b"f"));
} }
} }
#[tokio::test]
async fn test_final_seg_all_null() {
let mut writer = Cursor::new(Vec::new());
let mut creator = BloomFilterCreator::new(
2,
Arc::new(MockExternalTempFileProvider::new()),
Arc::new(AtomicUsize::new(0)),
None,
);
creator
.push_n_row_elems(4, vec![b"a".to_vec(), b"b".to_vec()])
.await
.unwrap();
creator.push_row_elems(Vec::new()).await.unwrap();
creator.finish(&mut writer).await.unwrap();
let bytes = writer.into_inner();
let total_size = bytes.len();
let meta_size_offset = total_size - 4;
let meta_size = u32::from_le_bytes((&bytes[meta_size_offset..]).try_into().unwrap());
let meta_bytes = &bytes[total_size - meta_size as usize - 4..total_size - 4];
let meta = BloomFilterMeta::decode(meta_bytes).unwrap();
assert_eq!(meta.rows_per_segment, 2);
assert_eq!(meta.segment_count, 3);
assert_eq!(meta.row_count, 5);
}
} }

View File

@@ -112,7 +112,6 @@ impl MetaClientBuilder {
.enable_store() .enable_store()
.enable_heartbeat() .enable_heartbeat()
.enable_procedure() .enable_procedure()
.enable_access_cluster_info()
} }
pub fn enable_heartbeat(self) -> Self { pub fn enable_heartbeat(self) -> Self {

View File

@@ -7,7 +7,6 @@ license.workspace = true
[features] [features]
mock = [] mock = []
pg_kvbackend = ["dep:tokio-postgres", "common-meta/pg_kvbackend"] pg_kvbackend = ["dep:tokio-postgres", "common-meta/pg_kvbackend"]
mysql_kvbackend = [] # placeholder features so CI can compile
[lints] [lints]
workspace = true workspace = true

View File

@@ -27,10 +27,9 @@ use snafu::OptionExt;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio::sync::mpsc::Sender; use tokio::sync::mpsc::Sender;
use tokio_stream::wrappers::ReceiverStream; use tokio_stream::wrappers::ReceiverStream;
use tonic::{Request, Response, Streaming}; use tonic::{Request, Response, Status, Streaming};
use crate::error; use crate::error::{self, Result};
use crate::error::Result;
use crate::handler::{HeartbeatHandlerGroup, Pusher, PusherId}; use crate::handler::{HeartbeatHandlerGroup, Pusher, PusherId};
use crate::metasrv::{Context, Metasrv}; use crate::metasrv::{Context, Metasrv};
use crate::metrics::METRIC_META_HEARTBEAT_RECV; use crate::metrics::METRIC_META_HEARTBEAT_RECV;
@@ -109,6 +108,12 @@ impl heartbeat_server::Heartbeat for Metasrv {
if is_not_leader { if is_not_leader {
warn!("Quit because it is no longer the leader"); warn!("Quit because it is no longer the leader");
let _ = tx
.send(Err(Status::aborted(format!(
"The requested metasrv node is not leader, node addr: {}",
ctx.server_addr
))))
.await;
break; break;
} }
} }

View File

@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration;
use object_store::services::Fs; use object_store::services::Fs;
use object_store::util::{join_dir, with_instrument_layers}; use object_store::util::{join_dir, with_instrument_layers};
@@ -42,6 +43,29 @@ pub type AccessLayerRef = Arc<AccessLayer>;
/// SST write results. /// SST write results.
pub type SstInfoArray = SmallVec<[SstInfo; 2]>; pub type SstInfoArray = SmallVec<[SstInfo; 2]>;
#[derive(Debug, Default)]
pub struct Metrics {
pub read: Duration,
pub write: Duration,
pub convert: Duration,
pub index_update: Duration,
pub index_finish: Duration,
pub close: Duration,
pub num_series: usize,
// SST Opendal metrics.
pub opendal_create_cost: Duration,
pub opendal_num_writes: usize,
pub opendal_write_cost: Duration,
pub opendal_complete_cost: Duration,
}
impl Metrics {
pub fn sum(&self) -> Duration {
self.read + self.write + self.convert + self.index_update + self.index_finish + self.close
}
}
/// A layer to access SST files under the same directory. /// A layer to access SST files under the same directory.
pub struct AccessLayer { pub struct AccessLayer {
region_dir: String, region_dir: String,
@@ -121,10 +145,11 @@ impl AccessLayer {
/// Writes a SST with specific `file_id` and `metadata` to the layer. /// Writes a SST with specific `file_id` and `metadata` to the layer.
/// ///
/// Returns the info of the SST. If no data written, returns None. /// Returns the info of the SST. If no data written, returns None.
pub(crate) async fn write_sst( pub async fn write_sst(
&self, &self,
request: SstWriteRequest, request: SstWriteRequest,
write_opts: &WriteOptions, write_opts: &WriteOptions,
metrics: &mut Metrics,
) -> Result<SstInfoArray> { ) -> Result<SstInfoArray> {
let region_id = request.metadata.region_id; let region_id = request.metadata.region_id;
let cache_manager = request.cache_manager.clone(); let cache_manager = request.cache_manager.clone();
@@ -167,9 +192,16 @@ impl AccessLayer {
path_provider, path_provider,
) )
.await; .await;
writer let sst_info = writer
.write_all(request.source, request.max_sequence, write_opts) .write_all(request.source, request.max_sequence, write_opts, metrics)
.await? .await?;
let opendal_metrics = writer.opendal_metrics_val();
metrics.opendal_create_cost += opendal_metrics.create_cost;
metrics.opendal_num_writes += opendal_metrics.num_writes;
metrics.opendal_write_cost += opendal_metrics.write_cost;
metrics.opendal_complete_cost += opendal_metrics.complete_cost;
sst_info
}; };
// Put parquet metadata to cache manager. // Put parquet metadata to cache manager.
@@ -189,28 +221,53 @@ impl AccessLayer {
} }
} }
/// Helper to build an [AccessLayerRef] with internal index managers.
///
/// This is a convenience constructor intended for tooling that needs to
/// interact with SSTs without wiring all indexing internals manually.
pub async fn build_access_layer(
region_dir: &str,
object_store: ObjectStore,
config: &crate::config::MitoConfig,
) -> Result<AccessLayerRef> {
let puffin_manager_factory = PuffinManagerFactory::new(
&config.index.aux_path,
config.index.staging_size.as_bytes(),
Some(config.index.write_buffer_size.as_bytes() as _),
config.index.staging_ttl,
)
.await?;
let intermediate_manager = IntermediateManager::init_fs(&config.index.aux_path).await?;
Ok(Arc::new(AccessLayer::new(
region_dir,
object_store,
puffin_manager_factory,
intermediate_manager,
)))
}
/// `OperationType` represents the origin of the `SstWriteRequest`. /// `OperationType` represents the origin of the `SstWriteRequest`.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(crate) enum OperationType { pub enum OperationType {
Flush, Flush,
Compact, Compact,
} }
/// Contents to build a SST. /// Contents to build a SST.
pub(crate) struct SstWriteRequest { pub struct SstWriteRequest {
pub(crate) op_type: OperationType, pub op_type: OperationType,
pub(crate) metadata: RegionMetadataRef, pub metadata: RegionMetadataRef,
pub(crate) source: Source, pub source: Source,
pub(crate) cache_manager: CacheManagerRef, pub cache_manager: CacheManagerRef,
#[allow(dead_code)] #[allow(dead_code)]
pub(crate) storage: Option<String>, pub storage: Option<String>,
pub(crate) max_sequence: Option<SequenceNumber>, pub max_sequence: Option<SequenceNumber>,
/// Configs for index /// Configs for index
pub(crate) index_options: IndexOptions, pub index_options: IndexOptions,
pub(crate) inverted_index_config: InvertedIndexConfig, pub inverted_index_config: InvertedIndexConfig,
pub(crate) fulltext_index_config: FulltextIndexConfig, pub fulltext_index_config: FulltextIndexConfig,
pub(crate) bloom_filter_index_config: BloomFilterConfig, pub bloom_filter_index_config: BloomFilterConfig,
} }
pub(crate) async fn new_fs_cache_store(root: &str) -> Result<ObjectStore> { pub(crate) async fn new_fs_cache_store(root: &str) -> Result<ObjectStore> {

View File

@@ -40,6 +40,7 @@ use crate::sst::index::IndexerBuilderImpl;
use crate::sst::parquet::writer::ParquetWriter; use crate::sst::parquet::writer::ParquetWriter;
use crate::sst::parquet::WriteOptions; use crate::sst::parquet::WriteOptions;
use crate::sst::{DEFAULT_WRITE_BUFFER_SIZE, DEFAULT_WRITE_CONCURRENCY}; use crate::sst::{DEFAULT_WRITE_BUFFER_SIZE, DEFAULT_WRITE_CONCURRENCY};
use crate::Metrics;
/// A cache for uploading files to remote object stores. /// A cache for uploading files to remote object stores.
/// ///
@@ -140,7 +141,12 @@ impl WriteCache {
.await; .await;
let sst_info = writer let sst_info = writer
.write_all(write_request.source, write_request.max_sequence, write_opts) .write_all(
write_request.source,
write_request.max_sequence,
write_opts,
&mut Metrics::default(),
)
.await?; .await?;
timer.stop_and_record(); timer.stop_and_record();

View File

@@ -27,7 +27,7 @@ use snafu::{OptionExt, ResultExt};
use store_api::metadata::RegionMetadataRef; use store_api::metadata::RegionMetadataRef;
use store_api::storage::RegionId; use store_api::storage::RegionId;
use crate::access_layer::{AccessLayer, AccessLayerRef, OperationType, SstWriteRequest}; use crate::access_layer::{AccessLayer, AccessLayerRef, Metrics, OperationType, SstWriteRequest};
use crate::cache::{CacheManager, CacheManagerRef}; use crate::cache::{CacheManager, CacheManagerRef};
use crate::compaction::picker::{new_picker, PickerOutput}; use crate::compaction::picker::{new_picker, PickerOutput};
use crate::compaction::{find_ttl, CompactionSstReaderBuilder}; use crate::compaction::{find_ttl, CompactionSstReaderBuilder};
@@ -340,6 +340,7 @@ impl Compactor for DefaultCompactor {
bloom_filter_index_config, bloom_filter_index_config,
}, },
&write_opts, &write_opts,
&mut Metrics::default(),
) )
.await? .await?
.into_iter() .into_iter()

View File

@@ -25,7 +25,7 @@ use store_api::storage::RegionId;
use strum::IntoStaticStr; use strum::IntoStaticStr;
use tokio::sync::{mpsc, watch}; use tokio::sync::{mpsc, watch};
use crate::access_layer::{AccessLayerRef, OperationType, SstWriteRequest}; use crate::access_layer::{AccessLayerRef, Metrics, OperationType, SstWriteRequest};
use crate::cache::CacheManagerRef; use crate::cache::CacheManagerRef;
use crate::config::MitoConfig; use crate::config::MitoConfig;
use crate::error::{ use crate::error::{
@@ -366,7 +366,7 @@ impl RegionFlushTask {
let ssts_written = self let ssts_written = self
.access_layer .access_layer
.write_sst(write_request, &write_opts) .write_sst(write_request, &write_opts, &mut Metrics::default())
.await?; .await?;
if ssts_written.is_empty() { if ssts_written.is_empty() {
// No data written. // No data written.

View File

@@ -44,6 +44,12 @@ mod time_provider;
pub mod wal; pub mod wal;
mod worker; mod worker;
// Public re-exports for tooling convenience
pub use access_layer::{
build_access_layer, AccessLayer, AccessLayerRef, Metrics, OperationType, SstWriteRequest,
};
pub use cache::{CacheManager, CacheManagerRef};
#[cfg_attr(doc, aquamarine::aquamarine)] #[cfg_attr(doc, aquamarine::aquamarine)]
/// # Mito developer document /// # Mito developer document
/// ///

View File

@@ -109,6 +109,7 @@ mod tests {
new_batch_with_binary, new_source, sst_file_handle, sst_region_metadata, new_batch_with_binary, new_source, sst_file_handle, sst_region_metadata,
}; };
use crate::test_util::{check_reader_result, TestEnv}; use crate::test_util::{check_reader_result, TestEnv};
use crate::Metrics;
const FILE_DIR: &str = "/"; const FILE_DIR: &str = "/";
@@ -165,7 +166,7 @@ mod tests {
.await; .await;
let info = writer let info = writer
.write_all(source, None, &write_opts) .write_all(source, None, &write_opts, &mut Metrics::default())
.await .await
.unwrap() .unwrap()
.remove(0); .remove(0);
@@ -222,7 +223,7 @@ mod tests {
.await; .await;
writer writer
.write_all(source, None, &write_opts) .write_all(source, None, &write_opts, &mut Metrics::default())
.await .await
.unwrap() .unwrap()
.remove(0); .remove(0);
@@ -293,7 +294,7 @@ mod tests {
.await; .await;
let sst_info = writer let sst_info = writer
.write_all(source, None, &write_opts) .write_all(source, None, &write_opts, &mut Metrics::default())
.await .await
.unwrap() .unwrap()
.remove(0); .remove(0);
@@ -334,7 +335,7 @@ mod tests {
) )
.await; .await;
writer writer
.write_all(source, None, &write_opts) .write_all(source, None, &write_opts, &mut Metrics::default())
.await .await
.unwrap() .unwrap()
.remove(0); .remove(0);
@@ -389,7 +390,7 @@ mod tests {
) )
.await; .await;
writer writer
.write_all(source, None, &write_opts) .write_all(source, None, &write_opts, &mut Metrics::default())
.await .await
.unwrap() .unwrap()
.remove(0); .remove(0);
@@ -427,7 +428,7 @@ mod tests {
.await; .await;
writer writer
.write_all(source, None, &write_opts) .write_all(source, None, &write_opts, &mut Metrics::default())
.await .await
.unwrap() .unwrap()
.remove(0); .remove(0);

View File

@@ -1117,7 +1117,6 @@ impl ParquetReader {
self.context.read_format().metadata() self.context.read_format().metadata()
} }
#[cfg(test)]
pub fn parquet_metadata(&self) -> Arc<ParquetMetaData> { pub fn parquet_metadata(&self) -> Arc<ParquetMetaData> {
self.context.reader_builder().parquet_meta.clone() self.context.reader_builder().parquet_meta.clone()
} }

View File

@@ -17,14 +17,19 @@
use std::future::Future; use std::future::Future;
use std::pin::Pin; use std::pin::Pin;
use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc; use std::sync::{Arc, Mutex};
use std::task::{Context, Poll}; use std::task::{Context, Poll};
use std::time::{Duration, Instant};
use bytes::Bytes;
use common_time::Timestamp; use common_time::Timestamp;
use datatypes::arrow::datatypes::SchemaRef; use datatypes::arrow::datatypes::SchemaRef;
use object_store::{FuturesAsyncWriter, ObjectStore}; use futures::future::BoxFuture;
use object_store::{FuturesAsyncWriter, ObjectStore, Writer};
use parquet::arrow::async_writer::AsyncFileWriter;
use parquet::arrow::AsyncArrowWriter; use parquet::arrow::AsyncArrowWriter;
use parquet::basic::{Compression, Encoding, ZstdLevel}; use parquet::basic::{Compression, Encoding, ZstdLevel};
use parquet::errors::ParquetError;
use parquet::file::metadata::KeyValue; use parquet::file::metadata::KeyValue;
use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder}; use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
use parquet::schema::types::ColumnPath; use parquet::schema::types::ColumnPath;
@@ -45,12 +50,13 @@ use crate::sst::parquet::format::WriteFormat;
use crate::sst::parquet::helper::parse_parquet_metadata; use crate::sst::parquet::helper::parse_parquet_metadata;
use crate::sst::parquet::{SstInfo, WriteOptions, PARQUET_METADATA_KEY}; use crate::sst::parquet::{SstInfo, WriteOptions, PARQUET_METADATA_KEY};
use crate::sst::{DEFAULT_WRITE_BUFFER_SIZE, DEFAULT_WRITE_CONCURRENCY}; use crate::sst::{DEFAULT_WRITE_BUFFER_SIZE, DEFAULT_WRITE_CONCURRENCY};
use crate::Metrics;
/// Parquet SST writer. /// Parquet SST writer.
pub struct ParquetWriter<F: WriterFactory, I: IndexerBuilder, P: FilePathProvider> { pub struct ParquetWriter<F: WriterFactory, I: IndexerBuilder, P: FilePathProvider> {
/// Path provider that creates SST and index file paths according to file id. /// Path provider that creates SST and index file paths according to file id.
path_provider: P, path_provider: P,
writer: Option<AsyncArrowWriter<SizeAwareWriter<F::Writer>>>, writer: Option<AsyncArrowWriter<OpenDalWriter>>,
/// Current active file id. /// Current active file id.
current_file: FileId, current_file: FileId,
writer_factory: F, writer_factory: F,
@@ -61,11 +67,18 @@ pub struct ParquetWriter<F: WriterFactory, I: IndexerBuilder, P: FilePathProvide
/// Current active indexer. /// Current active indexer.
current_indexer: Option<Indexer>, current_indexer: Option<Indexer>,
bytes_written: Arc<AtomicUsize>, bytes_written: Arc<AtomicUsize>,
opendal_metrics: Arc<Mutex<OpenDalMetrics>>,
} }
pub trait WriterFactory { pub trait WriterFactory {
type Writer: AsyncWrite + Send + Unpin; type Writer: AsyncWrite + Send + Unpin;
fn create(&mut self, file_path: &str) -> impl Future<Output = Result<Self::Writer>>; fn create(&mut self, file_path: &str) -> impl Future<Output = Result<Self::Writer>>;
fn create_opendal(
&mut self,
file_path: &str,
size: Arc<AtomicUsize>,
) -> impl Future<Output = Result<OpenDalWriter>>;
} }
pub struct ObjectStoreWriterFactory { pub struct ObjectStoreWriterFactory {
@@ -84,6 +97,22 @@ impl WriterFactory for ObjectStoreWriterFactory {
.map(|v| v.into_futures_async_write().compat_write()) .map(|v| v.into_futures_async_write().compat_write())
.context(OpenDalSnafu) .context(OpenDalSnafu)
} }
async fn create_opendal(
&mut self,
file_path: &str,
size: Arc<AtomicUsize>,
) -> Result<OpenDalWriter> {
let writer = self
.object_store
.writer_with(file_path)
.chunk(DEFAULT_WRITE_BUFFER_SIZE.as_bytes() as usize)
.concurrent(DEFAULT_WRITE_CONCURRENCY)
.await
.context(OpenDalSnafu)?;
Ok(OpenDalWriter::new(writer, size))
}
} }
impl<I, P> ParquetWriter<ObjectStoreWriterFactory, I, P> impl<I, P> ParquetWriter<ObjectStoreWriterFactory, I, P>
@@ -105,6 +134,10 @@ where
) )
.await .await
} }
pub fn opendal_metrics_val(&self) -> OpenDalMetrics {
self.opendal_metrics.lock().unwrap().clone()
}
} }
impl<F, I, P> ParquetWriter<F, I, P> impl<F, I, P> ParquetWriter<F, I, P>
@@ -132,6 +165,7 @@ where
indexer_builder, indexer_builder,
current_indexer: Some(indexer), current_indexer: Some(indexer),
bytes_written: Arc::new(AtomicUsize::new(0)), bytes_written: Arc::new(AtomicUsize::new(0)),
opendal_metrics: Arc::new(Mutex::new(OpenDalMetrics::default())),
} }
} }
@@ -156,20 +190,33 @@ where
mut source: Source, mut source: Source,
override_sequence: Option<SequenceNumber>, // override the `sequence` field from `Source` override_sequence: Option<SequenceNumber>, // override the `sequence` field from `Source`
opts: &WriteOptions, opts: &WriteOptions,
metrics: &mut Metrics,
) -> Result<SstInfoArray> { ) -> Result<SstInfoArray> {
let write_format = let write_format =
WriteFormat::new(self.metadata.clone()).with_override_sequence(override_sequence); WriteFormat::new(self.metadata.clone()).with_override_sequence(override_sequence);
let mut stats = SourceStats::default(); let mut stats = SourceStats::default();
let mut last_key = None;
while let Some(res) = self while let Some(res) = self
.write_next_batch(&mut source, &write_format, opts) .write_next_batch(&mut source, &write_format, opts, metrics)
.await .await
.transpose() .transpose()
{ {
match res { match res {
Ok(mut batch) => { Ok(mut batch) => {
if let Some(last) = &last_key {
if last != batch.primary_key() {
metrics.num_series += 1;
last_key = Some(batch.primary_key().to_vec());
}
} else {
metrics.num_series += 1;
}
stats.update(&batch); stats.update(&batch);
let index_start = Instant::now();
self.get_or_create_indexer().await.update(&mut batch).await; self.get_or_create_indexer().await.update(&mut batch).await;
metrics.index_update += index_start.elapsed();
} }
Err(e) => { Err(e) => {
self.get_or_create_indexer().await.abort().await; self.get_or_create_indexer().await.abort().await;
@@ -178,7 +225,9 @@ where
} }
} }
let index_finish_start = Instant::now();
let index_output = self.get_or_create_indexer().await.finish().await; let index_output = self.get_or_create_indexer().await.finish().await;
metrics.index_finish += index_finish_start.elapsed();
if stats.num_rows == 0 { if stats.num_rows == 0 {
return Ok(smallvec![]); return Ok(smallvec![]);
@@ -189,9 +238,10 @@ where
return Ok(smallvec![]); return Ok(smallvec![]);
}; };
let close_start = Instant::now();
arrow_writer.flush().await.context(WriteParquetSnafu)?; arrow_writer.flush().await.context(WriteParquetSnafu)?;
let file_meta = arrow_writer.close().await.context(WriteParquetSnafu)?; let file_meta = arrow_writer.close().await.context(WriteParquetSnafu)?;
metrics.close += close_start.elapsed();
let file_size = self.bytes_written.load(Ordering::Relaxed) as u64; let file_size = self.bytes_written.load(Ordering::Relaxed) as u64;
// Safety: num rows > 0 so we must have min/max. // Safety: num rows > 0 so we must have min/max.
@@ -238,17 +288,25 @@ where
source: &mut Source, source: &mut Source,
write_format: &WriteFormat, write_format: &WriteFormat,
opts: &WriteOptions, opts: &WriteOptions,
metrics: &mut Metrics,
) -> Result<Option<Batch>> { ) -> Result<Option<Batch>> {
let read_start = Instant::now();
let Some(batch) = source.next_batch().await? else { let Some(batch) = source.next_batch().await? else {
return Ok(None); return Ok(None);
}; };
metrics.read += read_start.elapsed();
let convert_start = Instant::now();
let arrow_batch = write_format.convert_batch(&batch)?; let arrow_batch = write_format.convert_batch(&batch)?;
metrics.convert += convert_start.elapsed();
let write_start = Instant::now();
self.maybe_init_writer(write_format.arrow_schema(), opts) self.maybe_init_writer(write_format.arrow_schema(), opts)
.await? .await?
.write(&arrow_batch) .write(&arrow_batch)
.await .await
.context(WriteParquetSnafu)?; .context(WriteParquetSnafu)?;
metrics.write += write_start.elapsed();
Ok(Some(batch)) Ok(Some(batch))
} }
@@ -256,7 +314,7 @@ where
&mut self, &mut self,
schema: &SchemaRef, schema: &SchemaRef,
opts: &WriteOptions, opts: &WriteOptions,
) -> Result<&mut AsyncArrowWriter<SizeAwareWriter<F::Writer>>> { ) -> Result<&mut AsyncArrowWriter<OpenDalWriter>> {
if let Some(ref mut w) = self.writer { if let Some(ref mut w) = self.writer {
Ok(w) Ok(w)
} else { } else {
@@ -274,10 +332,17 @@ where
let writer_props = props_builder.build(); let writer_props = props_builder.build();
let sst_file_path = self.path_provider.build_sst_file_path(self.current_file); let sst_file_path = self.path_provider.build_sst_file_path(self.current_file);
let writer = SizeAwareWriter::new( // let writer = SizeAwareWriter::new(
self.writer_factory.create(&sst_file_path).await?, // self.writer_factory.create(&sst_file_path).await?,
self.bytes_written.clone(), // self.bytes_written.clone(),
); // );
let create_start = Instant::now();
let mut writer = self
.writer_factory
.create_opendal(&sst_file_path, self.bytes_written.clone())
.await?;
self.opendal_metrics.lock().unwrap().create_cost += create_start.elapsed();
writer = writer.with_metrics(self.opendal_metrics.clone());
let arrow_writer = let arrow_writer =
AsyncArrowWriter::try_new(writer, schema.clone(), Some(writer_props)) AsyncArrowWriter::try_new(writer, schema.clone(), Some(writer_props))
.context(WriteParquetSnafu)?; .context(WriteParquetSnafu)?;
@@ -317,6 +382,78 @@ impl SourceStats {
} }
} }
#[derive(Default, Debug, Clone)]
pub(crate) struct OpenDalMetrics {
pub(crate) create_cost: Duration,
pub(crate) num_writes: usize,
pub(crate) write_cost: Duration,
pub(crate) complete_cost: Duration,
}
/// Workaround for [AsyncArrowWriter] does not provide a method to
/// get total bytes written after close.
pub struct OpenDalWriter {
inner: Writer,
size: Arc<AtomicUsize>,
metrics: Option<Arc<Mutex<OpenDalMetrics>>>,
}
impl OpenDalWriter {
fn new(inner: Writer, size: Arc<AtomicUsize>) -> Self {
Self {
inner,
size: size.clone(),
metrics: None,
}
}
fn with_metrics(mut self, metrics: Arc<Mutex<OpenDalMetrics>>) -> Self {
self.metrics = Some(metrics);
self
}
}
impl AsyncFileWriter for OpenDalWriter {
fn write(&mut self, bs: Bytes) -> BoxFuture<'_, Result<(), ParquetError>> {
let write_start = Instant::now();
let size = self.size.clone();
let metrics = self.metrics.clone();
Box::pin(async move {
let bytes_written = bs.len();
self.inner
.write(bs)
.await
.map_err(|err| ParquetError::External(Box::new(err)))?;
size.fetch_add(bytes_written, Ordering::Relaxed);
if let Some(metrics) = metrics {
let mut m = metrics.lock().unwrap();
m.num_writes += 1;
m.write_cost += write_start.elapsed();
}
Ok(())
})
}
fn complete(&mut self) -> BoxFuture<'_, Result<(), ParquetError>> {
let complete_start = Instant::now();
let metrics = self.metrics.clone();
Box::pin(async move {
self.inner
.close()
.await
.map(|_| ())
.map_err(|err| ParquetError::External(Box::new(err)))?;
if let Some(metrics) = metrics {
let mut m = metrics.lock().unwrap();
m.complete_cost += complete_start.elapsed();
}
Ok(())
})
}
}
/// Workaround for [AsyncArrowWriter] does not provide a method to /// Workaround for [AsyncArrowWriter] does not provide a method to
/// get total bytes written after close. /// get total bytes written after close.
struct SizeAwareWriter<W> { struct SizeAwareWriter<W> {

View File

@@ -68,7 +68,6 @@ pub struct Inserter {
catalog_manager: CatalogManagerRef, catalog_manager: CatalogManagerRef,
partition_manager: PartitionRuleManagerRef, partition_manager: PartitionRuleManagerRef,
node_manager: NodeManagerRef, node_manager: NodeManagerRef,
#[allow(unused)]
table_flownode_set_cache: TableFlownodeSetCacheRef, table_flownode_set_cache: TableFlownodeSetCacheRef,
} }
@@ -339,8 +338,6 @@ impl Inserter {
instant_requests, instant_requests,
} = requests; } = requests;
// TODO(discord9): mirror some
// Mirror requests for source table to flownode asynchronously // Mirror requests for source table to flownode asynchronously
let flow_mirror_task = FlowMirrorTask::new( let flow_mirror_task = FlowMirrorTask::new(
&self.table_flownode_set_cache, &self.table_flownode_set_cache,
@@ -820,14 +817,12 @@ struct CreateAlterTableResult {
table_infos: HashMap<TableId, Arc<TableInfo>>, table_infos: HashMap<TableId, Arc<TableInfo>>,
} }
#[allow(unused)]
struct FlowMirrorTask { struct FlowMirrorTask {
requests: HashMap<Peer, RegionInsertRequests>, requests: HashMap<Peer, RegionInsertRequests>,
num_rows: usize, num_rows: usize,
} }
impl FlowMirrorTask { impl FlowMirrorTask {
#[allow(unused)]
async fn new( async fn new(
cache: &TableFlownodeSetCacheRef, cache: &TableFlownodeSetCacheRef,
requests: impl Iterator<Item = &RegionInsertRequest>, requests: impl Iterator<Item = &RegionInsertRequest>,
@@ -901,7 +896,6 @@ impl FlowMirrorTask {
}) })
} }
#[allow(unused)]
fn detach(self, node_manager: NodeManagerRef) -> Result<()> { fn detach(self, node_manager: NodeManagerRef) -> Result<()> {
crate::metrics::DIST_MIRROR_PENDING_ROW_COUNT.add(self.num_rows as i64); crate::metrics::DIST_MIRROR_PENDING_ROW_COUNT.add(self.num_rows as i64);
for (peer, inserts) in self.requests { for (peer, inserts) in self.requests {

View File

@@ -40,7 +40,7 @@ use common_procedure::options::ProcedureConfig;
use common_procedure::ProcedureManagerRef; use common_procedure::ProcedureManagerRef;
use common_wal::config::{DatanodeWalConfig, MetasrvWalConfig}; use common_wal::config::{DatanodeWalConfig, MetasrvWalConfig};
use datanode::datanode::DatanodeBuilder; use datanode::datanode::DatanodeBuilder;
use flow::{FlownodeBuilder, FrontendClient}; use flow::FlownodeBuilder;
use frontend::instance::builder::FrontendBuilder; use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance, StandaloneDatanodeManager}; use frontend::instance::{FrontendInstance, Instance, StandaloneDatanodeManager};
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ}; use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
@@ -164,15 +164,12 @@ impl GreptimeDbStandaloneBuilder {
Some(procedure_manager.clone()), Some(procedure_manager.clone()),
); );
let fe_server_addr = opts.frontend_options().grpc.bind_addr.clone();
let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
let flow_builder = FlownodeBuilder::new( let flow_builder = FlownodeBuilder::new(
Default::default(), Default::default(),
plugins.clone(), plugins.clone(),
table_metadata_manager.clone(), table_metadata_manager.clone(),
catalog_manager.clone(), catalog_manager.clone(),
flow_metadata_manager.clone(), flow_metadata_manager.clone(),
Arc::new(frontend_client),
); );
let flownode = Arc::new(flow_builder.build().await.unwrap()); let flownode = Arc::new(flow_builder.build().await.unwrap());

View File

@@ -1070,6 +1070,7 @@ fn drop_lines_with_inconsistent_results(input: String) -> String {
"root =", "root =",
"endpoint =", "endpoint =",
"region =", "region =",
"enable_virtual_host_style =",
"cache_path =", "cache_path =",
"cache_capacity =", "cache_capacity =",
"sas_token =", "sas_token =",

View File

@@ -4,6 +4,10 @@ ue = "ue"
worl = "worl" worl = "worl"
ot = "ot" ot = "ot"
unqualifed = "unqualifed" unqualifed = "unqualifed"
typ = "typ"
varidic = "varidic"
typs = "typs"
varadic = "varadic"
[files] [files]
extend-exclude = [ extend-exclude = [