Compare commits

..

25 Commits

Author SHA1 Message Date
Lei, HUANG
68593ae92a ### Added Deduplication and Merge Functionality
- Introduced `DedupReader` and `MergeReader` in `src/mito2/src/read/sync/dedup.rs` and `src/mito2/src/read/sync/merge.rs` to handle deduplication and merging of sorted batches.
 ### Enhanced `BulkMemtable` Iteration
 - Updated `BulkMemtable` in `src/mito2/src/memtable/bulk.rs` to support deduplication and merge modes during iteration.
 - Added `BulkIterContext` to manage iteration context.
 ### Testing Enhancements
 - Added comprehensive tests for `BulkMemtable` and `BulkPart` in `src/mito2/src/memtable/bulk.rs` and `src/mito2/src/memtable/bulk/part.rs`.
 ### Code Refactoring
 - Made `BulkPart` and `BulkPartMeta` cloneable in `src/mito2/src/memtable/bulk/part.rs`.
 - Exposed internal test modules for better test coverage in `src/mito2/src/memtable/time_series.rs` and `src/mito2/src/read/merge.rs`.
 ### New Modules
 - Created `sync` module in `src/mito2/src/read.rs` to organize synchronous read operations.
2025-02-20 08:54:44 +00:00
evenyag
91d755d9b5 feat: use spawn 2025-02-13 22:49:02 +08:00
Lei, HUANG
2566d254ad poc-write-path:
**Enhance BulkPart Metadata Handling**

 - Updated `BulkMemtable` in `bulk.rs` to track and update `max_sequence`, `max_timestamp`, `min_timestamp`, and `num_rows` using `BulkPart` metadata.
 - Extended `BulkPartMeta` in `bulk/part.rs` to include `max_sequence`.
 - Modified `mutations_to_record_batch` function to return `max_sequence` along with timestamps in `bulk/part.rs`.
 - Adjusted `BulkPartEncoder` to handle the new `max_sequence` metadata in `bulk/part.rs`.
2025-02-10 15:14:45 +00:00
WenyXu
0ec4ed804d fix: fix alter logic bug 2025-02-10 14:24:13 +00:00
evenyag
cc435234a4 chore: error handling in handle_batch_body 2025-02-10 21:38:10 +08:00
WenyXu
9c4aa81f85 fix: fix add column validation 2025-02-10 13:28:30 +00:00
evenyag
bdbb5435ea feat: region server use handle_batch_body 2025-02-10 21:20:14 +08:00
WenyXu
fd9940a253 fix: fix compile 2025-02-10 13:18:01 +00:00
WenyXu
e0bafd661c chore: unused error 2025-02-10 12:57:55 +00:00
WenyXu
99baa86b6a feat(metric-engine): introduce batch alter request handling 2025-02-10 12:57:54 +00:00
WenyXu
76d69901ea feat(metric-engine): introduce batch create request handling 2025-02-10 12:57:04 +00:00
WenyXu
764a57b80a refactor: introduce RegionRequestBundle::Vector 2025-02-10 12:55:30 +00:00
WenyXu
95b388d819 refactor: refactor region server requests handling 2025-02-10 12:55:28 +00:00
evenyag
c2b556e321 feat: batch put logical regions 2025-02-10 19:32:21 +08:00
evenyag
06ebe6b3fb feat: handle body 2025-02-10 19:32:14 +08:00
Lei, HUANG
bec8245e75 poc-write-path: Enhance Memtable Handling with Primary Key Encoding
• Introduced PrimaryKeyEncoding to differentiate between dense and sparse primary key encodings.
 • Updated BulkMemtableBuilder to conditionally create memtables based on primary key encoding.
 • Integrated PartitionTreeMemtableBuilder as a fallback for dense encodings.
 • Modified RegionWriteCtx to handle mutations differently based on primary key encoding.
 • Adjusted RegionWorkerLoop to skip bulk encoding for dense primary key mutations.
 • Refactored SparseEncoder to support conditional compilation for testing purposes.
2025-02-10 14:38:11 +08:00
Lei, HUANG
3cb2343f7f poc-write-path:
### Implement Sparse Primary Key Encoding

 - **Added `SparseEncoder`**: Introduced a new module `encoder.rs` to implement sparse primary key encoding, replacing the previous dense encoding approach.
 - **Updated `BulkPartEncoder`**: Modified `BulkPartEncoder` in `bulk/part.rs` to utilize `SparseEncoder` for encoding primary keys.
 - **Refactored `PartitionTree`**: Updated `partition_tree/tree.rs` to use the new `SparseEncoder` for primary key encoding.
 - **Code Adjustments**: Removed redundant code and adjusted imports in `key_values.rs` and `partition_tree/tree.rs` to align with the new encoding strategy.
2025-02-08 08:10:16 +00:00
Lei, HUANG
d10c207371 poc-write-path:
Add allocation tracking to `BulkMemtable` methods

 - Updated `write_bulk` in `bulk.rs` to track memory allocation using `alloc_tracker.on_allocation`.
 - Modified `freeze` in `bulk.rs` to signal completion of allocation with `alloc_tracker.done_allocating`.
2025-02-06 09:17:37 +00:00
Lei, HUANG
1a73a40bd9 Merge remote-tracking branch 'GreptimeTeam/poc-write-path' into poc-write-path 2025-02-06 08:55:39 +00:00
evenyag
713a73e9b2 feat: encode bulk before writing wal
* store bulk in wal
* write the BulkPart to the memtable directly
2025-02-06 16:36:41 +08:00
Lei, HUANG
65a88a63db poc-write-path:
**feat(memtable): Add BulkMemtableBuilder and BulkMemtable**

 - Introduced `BulkMemtableBuilder` and `BulkMemtable` in `memtable.rs` and `bulk.rs` to support bulk operations.
 - Added environment variable check for `enable_bulk_memtable` to conditionally use `BulkMemtableBuilder`.
 - Implemented `MemtableBuilder` for `BulkMemtableBuilder` and `Memtable` for `BulkMemtable`.
 - Included new fields `dedup` and `merge_mode` in `BulkMemtable` to handle deduplication and merge operations.
 - Temporarily disabled reads in `BulkMemtable` with `EmptyIter` as a placeholder iterator.
2025-02-06 07:36:37 +00:00
Lei, HUANG
5ad1436a8f Merge remote-tracking branch 'GreptimeTeam/poc-write-path' into poc-write-path 2025-02-06 06:44:33 +00:00
Lei, HUANG
ae59206caf feat: impl stats 2025-02-06 06:38:44 +00:00
evenyag
094d0fcdf5 feat: add bulk to wal Mutation 2025-02-05 21:16:18 +08:00
evenyag
7170120de6 chore: update proto (add bulk to Mutation) 2025-02-05 21:05:01 +08:00
785 changed files with 21774 additions and 39331 deletions

View File

@@ -3,12 +3,3 @@ linker = "aarch64-linux-gnu-gcc"
[alias]
sqlness = "run --bin sqlness-runner --"
[unstable.git]
shallow_index = true
shallow_deps = true
[unstable.gitoxide]
fetch = true
checkout = true
list_files = true
internal_use_git2 = false

View File

@@ -41,14 +41,7 @@ runs:
username: ${{ inputs.dockerhub-image-registry-username }}
password: ${{ inputs.dockerhub-image-registry-token }}
- name: Set up qemu for multi-platform builds
uses: docker/setup-qemu-action@v3
with:
platforms: linux/amd64,linux/arm64
# The latest version will lead to segmentation fault.
image: tonistiigi/binfmt:qemu-v7.0.0-28
- name: Build and push dev-builder-ubuntu image # Build image for amd64 and arm64 platform.
- name: Build and push dev-builder-ubuntu image
shell: bash
if: ${{ inputs.build-dev-builder-ubuntu == 'true' }}
run: |
@@ -59,7 +52,7 @@ runs:
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
- name: Build and push dev-builder-centos image # Only build image for amd64 platform.
- name: Build and push dev-builder-centos image
shell: bash
if: ${{ inputs.build-dev-builder-centos == 'true' }}
run: |
@@ -76,7 +69,8 @@ runs:
run: |
make dev-builder \
BASE_IMAGE=android \
BUILDX_MULTI_PLATFORM_BUILD=amd64 \
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }} && \
docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}

View File

@@ -34,8 +34,8 @@ inputs:
required: true
push-latest-tag:
description: Whether to push the latest tag
required: true
default: 'false'
required: false
default: 'true'
runs:
using: composite
steps:
@@ -47,11 +47,7 @@ runs:
password: ${{ inputs.image-registry-password }}
- name: Set up qemu for multi-platform builds
uses: docker/setup-qemu-action@v3
with:
platforms: linux/amd64,linux/arm64
# The latest version will lead to segmentation fault.
image: tonistiigi/binfmt:qemu-v7.0.0-28
uses: docker/setup-qemu-action@v2
- name: Set up buildx
uses: docker/setup-buildx-action@v2

View File

@@ -22,8 +22,8 @@ inputs:
required: true
push-latest-tag:
description: Whether to push the latest tag
required: true
default: 'false'
required: false
default: 'true'
dev-mode:
description: Enable dev mode, only build standard greptime
required: false

View File

@@ -52,7 +52,7 @@ runs:
uses: ./.github/actions/build-greptime-binary
with:
base-image: ubuntu
features: servers/dashboard,pg_kvbackend,mysql_kvbackend
features: servers/dashboard,pg_kvbackend
cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-${{ inputs.version }}
version: ${{ inputs.version }}
@@ -70,7 +70,7 @@ runs:
if: ${{ inputs.arch == 'amd64' && inputs.dev-mode == 'false' }} # Builds greptime for centos if the host machine is amd64.
with:
base-image: centos
features: servers/dashboard,pg_kvbackend,mysql_kvbackend
features: servers/dashboard,pg_kvbackend
cargo-profile: ${{ inputs.cargo-profile }}
artifacts-dir: greptime-linux-${{ inputs.arch }}-centos-${{ inputs.version }}
version: ${{ inputs.version }}

View File

@@ -51,8 +51,8 @@ inputs:
required: true
upload-to-s3:
description: Upload to S3
required: true
default: 'false'
required: false
default: 'true'
artifacts-dir:
description: Directory to store artifacts
required: false
@@ -77,21 +77,13 @@ runs:
with:
path: ${{ inputs.artifacts-dir }}
- name: Install s5cmd
shell: bash
run: |
wget https://github.com/peak/s5cmd/releases/download/v2.3.0/s5cmd_2.3.0_Linux-64bit.tar.gz
tar -xzf s5cmd_2.3.0_Linux-64bit.tar.gz
sudo mv s5cmd /usr/local/bin/
sudo chmod +x /usr/local/bin/s5cmd
- name: Release artifacts to cn region
uses: nick-invision/retry@v2
if: ${{ inputs.upload-to-s3 == 'true' }}
env:
AWS_ACCESS_KEY_ID: ${{ inputs.aws-cn-access-key-id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws-cn-secret-access-key }}
AWS_REGION: ${{ inputs.aws-cn-region }}
AWS_DEFAULT_REGION: ${{ inputs.aws-cn-region }}
UPDATE_VERSION_INFO: ${{ inputs.update-version-info }}
with:
max_attempts: ${{ inputs.upload-max-retry-times }}

View File

@@ -56,7 +56,7 @@ runs:
- name: Start EC2 runner
if: startsWith(inputs.runner, 'ec2')
uses: machulav/ec2-github-runner@v2.3.8
uses: machulav/ec2-github-runner@v2
id: start-linux-arm64-ec2-runner
with:
mode: start

View File

@@ -33,7 +33,7 @@ runs:
- name: Stop EC2 runner
if: ${{ inputs.label && inputs.ec2-instance-id }}
uses: machulav/ec2-github-runner@v2.3.8
uses: machulav/ec2-github-runner@v2
with:
mode: stop
label: ${{ inputs.label }}

View File

@@ -33,7 +33,7 @@ function upload_artifacts() {
# ├── greptime-darwin-amd64-v0.2.0.sha256sum
# └── greptime-darwin-amd64-v0.2.0.tar.gz
find "$ARTIFACTS_DIR" -type f \( -name "*.tar.gz" -o -name "*.sha256sum" \) | while IFS= read -r file; do
s5cmd cp \
aws s3 cp \
"$file" "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/$VERSION/$(basename "$file")"
done
}
@@ -45,7 +45,7 @@ function update_version_info() {
if [[ "$VERSION" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "Updating latest-version.txt"
echo "$VERSION" > latest-version.txt
s5cmd cp \
aws s3 cp \
latest-version.txt "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/latest-version.txt"
fi
@@ -53,7 +53,7 @@ function update_version_info() {
if [[ "$VERSION" == *"nightly"* ]]; then
echo "Updating latest-nightly-version.txt"
echo "$VERSION" > latest-nightly-version.txt
s5cmd cp \
aws s3 cp \
latest-nightly-version.txt "s3://$AWS_S3_BUCKET/$RELEASE_DIRS/latest-nightly-version.txt"
fi
fi

View File

@@ -14,11 +14,9 @@ name: Build API docs
jobs:
apidoc:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -12,8 +12,6 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
persist-credentials: false
- name: Set up Rust
uses: actions-rust-lang/setup-rust-toolchain@v1

View File

@@ -16,11 +16,11 @@ on:
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-22.04
- ubuntu-22.04-8-cores
- ubuntu-22.04-16-cores
- ubuntu-22.04-32-cores
- ubuntu-22.04-64-cores
- ubuntu-20.04
- ubuntu-20.04-8-cores
- ubuntu-20.04-16-cores
- ubuntu-20.04-32-cores
- ubuntu-20.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
@@ -76,14 +76,20 @@ env:
NIGHTLY_RELEASE_PREFIX: nightly
# Use the different image name to avoid conflict with the release images.
IMAGE_NAME: greptimedb-dev
# The source code will check out in the following path: '${WORKING_DIR}/dev/greptime'.
CHECKOUT_GREPTIMEDB_PATH: dev/greptimedb
permissions:
issues: write
jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -101,7 +107,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Create version
id: create-version
@@ -156,7 +161,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Checkout greptimedb
uses: actions/checkout@v4
@@ -164,7 +168,6 @@ jobs:
repository: ${{ inputs.repository }}
ref: ${{ inputs.commit }}
path: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
persist-credentials: true
- uses: ./.github/actions/build-linux-artifacts
with:
@@ -189,7 +192,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Checkout greptimedb
uses: actions/checkout@v4
@@ -197,7 +199,6 @@ jobs:
repository: ${{ inputs.repository }}
ref: ${{ inputs.commit }}
path: ${{ env.CHECKOUT_GREPTIMEDB_PATH }}
persist-credentials: true
- uses: ./.github/actions/build-linux-artifacts
with:
@@ -218,33 +219,25 @@ jobs:
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
build-result: ${{ steps.set-build-result.outputs.build-result }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Build and push images to dockerhub
uses: ./.github/actions/build-images
with:
image-registry: docker.io
image-namespace: ${{ vars.IMAGE_NAMESPACE }}
image-name: ${{ vars.DEV_BUILD_IMAGE_NAME }}
image-name: ${{ env.IMAGE_NAME }}
image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: false # Don't push the latest tag to registry.
dev-mode: true # Only build the standard images.
- name: Echo Docker image tag to step summary
run: |
echo "## Docker Image Tag" >> $GITHUB_STEP_SUMMARY
echo "Image Tag: \`${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
echo "Full Image Name: \`docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
echo "Pull Command: \`docker pull docker.io/${{ vars.IMAGE_NAMESPACE }}/${{ vars.DEV_BUILD_IMAGE_NAME }}:${{ needs.allocate-runners.outputs.version }}\`" >> $GITHUB_STEP_SUMMARY
- name: Set build result
id: set-build-result
@@ -258,20 +251,19 @@ jobs:
allocate-runners,
release-images-to-dockerhub,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Release artifacts to CN region
uses: ./.github/actions/release-cn-artifacts
with:
src-image-registry: docker.io
src-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
src-image-name: ${{ vars.DEV_BUILD_IMAGE_NAME }}
src-image-name: ${{ env.IMAGE_NAME }}
dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
@@ -281,7 +273,6 @@ jobs:
aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
upload-to-s3: false
dev-mode: true # Only build the standard images(exclude centos images).
push-latest-tag: false # Don't push the latest tag to registry.
update-version-info: false # Don't update the version info in S3.
@@ -290,7 +281,7 @@ jobs:
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-amd64-artifacts,
@@ -300,7 +291,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Stop EC2 runner
uses: ./.github/actions/stop-runner
@@ -316,7 +306,7 @@ jobs:
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-arm64-artifacts,
@@ -326,7 +316,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Stop EC2 runner
uses: ./.github/actions/stop-runner
@@ -344,17 +333,11 @@ jobs:
needs: [
release-images-to-dockerhub
]
runs-on: ubuntu-latest
permissions:
issues: write
runs-on: ubuntu-20.04
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/setup-cyborg
- name: Report CI status
id: report-ci-status

View File

@@ -23,11 +23,9 @@ concurrency:
jobs:
check-typos-and-docs:
name: Check typos and docs
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: crate-ci/typos@master
- name: Check the config docs
run: |
@@ -36,12 +34,10 @@ jobs:
|| (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)
license-header-check:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
name: Check License Header
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: korandoru/hawkeye@v5
check:
@@ -49,12 +45,10 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -72,12 +66,10 @@ jobs:
toml:
name: Toml Check
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: actions-rust-lang/setup-rust-toolchain@v1
- name: Install taplo
run: cargo +stable install taplo-cli --version ^0.9 --locked --force
@@ -89,12 +81,10 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -111,7 +101,7 @@ jobs:
- name: Build greptime binaries
shell: bash
# `cargo gc` will invoke `cargo build` with specified args
run: cargo gc -- --bin greptime --bin sqlness-runner --features "pg_kvbackend,mysql_kvbackend"
run: cargo gc -- --bin greptime --bin sqlness-runner --features pg_kvbackend
- name: Pack greptime binaries
shell: bash
run: |
@@ -149,8 +139,6 @@ jobs:
echo "Disk space after:"
df -h
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -204,8 +192,6 @@ jobs:
echo "Disk space after:"
df -h
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -248,12 +234,10 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-20.04 ]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -270,7 +254,7 @@ jobs:
- name: Build greptime bianry
shell: bash
# `cargo gc` will invoke `cargo build` with specified args
run: cargo gc --profile ci -- --bin greptime --features "pg_kvbackend,mysql_kvbackend"
run: cargo gc --profile ci -- --bin greptime --features pg_kvbackend
- name: Pack greptime binary
shell: bash
run: |
@@ -311,8 +295,6 @@ jobs:
echo "Disk space after:"
df -h
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Setup Kind
uses: ./.github/actions/setup-kind
- if: matrix.mode.minio
@@ -455,8 +437,6 @@ jobs:
echo "Disk space after:"
df -h
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Setup Kind
uses: ./.github/actions/setup-kind
- name: Setup Chaos Mesh
@@ -568,7 +548,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-20.04 ]
mode:
- name: "Basic"
opts: ""
@@ -582,8 +562,6 @@ jobs:
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- if: matrix.mode.kafka
name: Setup kafka server
working-directory: tests-integration/fixtures
@@ -607,12 +585,10 @@ jobs:
fmt:
name: Rustfmt
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -624,12 +600,10 @@ jobs:
clippy:
name: Clippy
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -652,8 +626,6 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Merge Conflict Finder
uses: olivernybroe/action-conflict-finder@v4.0
@@ -664,8 +636,6 @@ jobs:
needs: [conflict-check, clippy, fmt]
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -687,7 +657,7 @@ jobs:
working-directory: tests-integration/fixtures
run: docker compose up -d --wait
- name: Run nextest cases
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend -F mysql_kvbackend
run: cargo nextest run --workspace -F dashboard -F pg_kvbackend
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
RUST_BACKTRACE: 1
@@ -704,19 +674,16 @@ jobs:
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
UNITTEST_LOG_DIR: "__unittest_logs"
coverage:
if: github.event_name == 'merge_group'
runs-on: ubuntu-22.04-8-cores
runs-on: ubuntu-20.04-8-cores
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: arduino/setup-protoc@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -740,7 +707,7 @@ jobs:
working-directory: tests-integration/fixtures
run: docker compose up -d --wait
- name: Run nextest cases
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend -F mysql_kvbackend
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info -F dashboard -F pg_kvbackend
env:
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=mold"
RUST_BACKTRACE: 1
@@ -756,7 +723,6 @@ jobs:
GT_MINIO_ENDPOINT_URL: http://127.0.0.1:9000
GT_ETCD_ENDPOINTS: http://127.0.0.1:2379
GT_POSTGRES_ENDPOINTS: postgres://greptimedb:admin@127.0.0.1:5432/postgres
GT_MYSQL_ENDPOINTS: mysql://greptimedb:admin@127.0.0.1:3306/mysql
GT_KAFKA_ENDPOINTS: 127.0.0.1:9092
GT_KAFKA_SASL_ENDPOINTS: 127.0.0.1:9093
UNITTEST_LOG_DIR: "__unittest_logs"
@@ -772,7 +738,7 @@ jobs:
# compat:
# name: Compatibility Test
# needs: build
# runs-on: ubuntu-22.04
# runs-on: ubuntu-20.04
# timeout-minutes: 60
# steps:
# - uses: actions/checkout@v4

View File

@@ -3,21 +3,16 @@ on:
pull_request_target:
types: [opened, edited]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
pull-requests: write
contents: read
jobs:
docbot:
runs-on: ubuntu-latest
permissions:
pull-requests: write
contents: read
runs-on: ubuntu-20.04
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: ./.github/actions/setup-cyborg
- name: Maybe Follow Up Docs Issue
working-directory: cyborg

View File

@@ -31,47 +31,43 @@ name: CI
jobs:
typos:
name: Spell Check with Typos
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: crate-ci/typos@master
license-header-check:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
name: Check License Header
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: korandoru/hawkeye@v5
check:
name: Check
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
fmt:
name: Rustfmt
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
clippy:
name: Clippy
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
coverage:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
test:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- run: 'echo "No action required"'
@@ -80,7 +76,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest ]
os: [ ubuntu-20.04 ]
mode:
- name: "Basic"
- name: "Remote WAL"

View File

@@ -1,52 +0,0 @@
name: Check Grafana Panels
on:
pull_request:
branches:
- main
paths:
- 'grafana/**' # Trigger only when files under the grafana/ directory change
jobs:
check-panels:
runs-on: ubuntu-latest
steps:
# Check out the repository
- name: Checkout repository
uses: actions/checkout@v4
# Install jq (required for the script)
- name: Install jq
run: sudo apt-get install -y jq
# Make the check.sh script executable
- name: Make check.sh executable
run: chmod +x grafana/check.sh
# Run the check.sh script
- name: Run check.sh
run: ./grafana/check.sh
# Only run summary.sh for pull_request events (not for merge queues or final pushes)
- name: Check if this is a pull request
id: check-pr
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
echo "is_pull_request=true" >> $GITHUB_OUTPUT
else
echo "is_pull_request=false" >> $GITHUB_OUTPUT
fi
# Make the summary.sh script executable
- name: Make summary.sh executable
if: steps.check-pr.outputs.is_pull_request == 'true'
run: chmod +x grafana/summary.sh
# Run the summary.sh script and add its output to the GitHub Job Summary
- name: Run summary.sh and add to Job Summary
if: steps.check-pr.outputs.is_pull_request == 'true'
run: |
SUMMARY=$(./grafana/summary.sh)
echo "### Summary of Grafana Panels" >> $GITHUB_STEP_SUMMARY
echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY

View File

@@ -14,11 +14,11 @@ on:
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-22.04
- ubuntu-22.04-8-cores
- ubuntu-22.04-16-cores
- ubuntu-22.04-32-cores
- ubuntu-22.04-64-cores
- ubuntu-20.04
- ubuntu-20.04-8-cores
- ubuntu-20.04-16-cores
- ubuntu-20.04-32-cores
- ubuntu-20.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
@@ -66,11 +66,18 @@ env:
NIGHTLY_RELEASE_PREFIX: nightly
# Use the different image name to avoid conflict with the release images.
# The DockerHub image will be greptime/greptimedb-nightly.
IMAGE_NAME: greptimedb-nightly
permissions:
issues: write
jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -88,7 +95,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Create version
id: create-version
@@ -141,7 +147,6 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/build-linux-artifacts
with:
@@ -163,7 +168,6 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/build-linux-artifacts
with:
@@ -182,25 +186,24 @@ jobs:
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Build and push images to dockerhub
uses: ./.github/actions/build-images
with:
image-registry: docker.io
image-namespace: ${{ vars.IMAGE_NAMESPACE }}
image-name: ${{ vars.NIGHTLY_BUILD_IMAGE_NAME }}
image-name: ${{ env.IMAGE_NAME }}
image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: false
push-latest-tag: true
- name: Set nightly build result
id: set-nightly-build-result
@@ -214,7 +217,7 @@ jobs:
allocate-runners,
release-images-to-dockerhub,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
# When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -223,14 +226,13 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Release artifacts to CN region
uses: ./.github/actions/release-cn-artifacts
with:
src-image-registry: docker.io
src-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
src-image-name: ${{ vars.NIGHTLY_BUILD_IMAGE_NAME }}
src-image-name: ${{ env.IMAGE_NAME }}
dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
@@ -240,16 +242,15 @@ jobs:
aws-cn-access-key-id: ${{ secrets.AWS_CN_ACCESS_KEY_ID }}
aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
upload-to-s3: false
dev-mode: false
update-version-info: false # Don't update version info in S3.
push-latest-tag: false
push-latest-tag: true
stop-linux-amd64-runner: # It's always run as the last job in the workflow to make sure that the runner is released.
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-amd64-artifacts,
@@ -259,7 +260,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Stop EC2 runner
uses: ./.github/actions/stop-runner
@@ -275,7 +275,7 @@ jobs:
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-arm64-artifacts,
@@ -285,7 +285,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Stop EC2 runner
uses: ./.github/actions/stop-runner
@@ -303,15 +302,11 @@ jobs:
needs: [
release-images-to-dockerhub
]
runs-on: ubuntu-latest
permissions:
issues: write
runs-on: ubuntu-20.04
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: ./.github/actions/setup-cyborg
- name: Report CI status
id: report-ci-status

View File

@@ -9,17 +9,19 @@ concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
issues: write
jobs:
sqlness-test:
name: Run sqlness test
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Check install.sh
run: ./.github/scripts/check-install-script.sh
@@ -44,14 +46,9 @@ jobs:
name: Sqlness tests on Windows
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: windows-2022-8-cores
permissions:
issues: write
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/setup-cyborg
- uses: arduino/setup-protoc@v3
with:
@@ -79,9 +76,6 @@ jobs:
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/setup-cyborg
- uses: arduino/setup-protoc@v3
with:
@@ -117,13 +111,9 @@ jobs:
cleanbuild-linux-nix:
name: Run clean build on Linux
runs-on: ubuntu-latest
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: cachix/install-nix-action@v27
with:
nix_path: nixpkgs=channel:nixos-24.11
@@ -133,7 +123,7 @@ jobs:
name: Check status
needs: [sqlness-test, sqlness-windows, test-on-windows]
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
check-result: ${{ steps.set-check-result.outputs.check-result }}
steps:
@@ -146,14 +136,11 @@ jobs:
if: ${{ github.repository == 'GreptimeTeam/greptimedb' && always() }} # Not requiring successful dependent jobs, always run.
name: Send notification to Greptime team
needs: [check-status]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/setup-cyborg
- name: Report CI status
id: report-ci-status

View File

@@ -29,7 +29,7 @@ jobs:
release-dev-builder-images:
name: Release dev builder images
if: ${{ inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }} # Only manually trigger this job.
runs-on: ubuntu-latest
runs-on: ubuntu-20.04-16-cores
outputs:
version: ${{ steps.set-version.outputs.version }}
steps:
@@ -37,7 +37,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Configure build image version
id: set-version
@@ -63,7 +62,7 @@ jobs:
release-dev-builder-images-ecr:
name: Release dev builder images to AWS ECR
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
release-dev-builder-images
]
@@ -86,69 +85,51 @@ jobs:
- name: Push dev-builder-ubuntu image
shell: bash
if: ${{ inputs.release_dev_builder_ubuntu_image }}
env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
ECR_IMAGE_REGISTRY: ${{ vars.ECR_IMAGE_REGISTRY }}
ECR_IMAGE_NAMESPACE: ${{ vars.ECR_IMAGE_NAMESPACE }}
run: |
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION \
docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }}
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-ubuntu:latest \
docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-ubuntu:latest
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:latest \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-ubuntu:latest
- name: Push dev-builder-centos image
shell: bash
if: ${{ inputs.release_dev_builder_centos_image }}
env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
ECR_IMAGE_REGISTRY: ${{ vars.ECR_IMAGE_REGISTRY }}
ECR_IMAGE_NAMESPACE: ${{ vars.ECR_IMAGE_NAMESPACE }}
run: |
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION \
docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }}
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-centos:latest \
docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-centos:latest
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:latest \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-centos:latest
- name: Push dev-builder-android image
shell: bash
if: ${{ inputs.release_dev_builder_android_image }}
env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
ECR_IMAGE_REGISTRY: ${{ vars.ECR_IMAGE_REGISTRY }}
ECR_IMAGE_NAMESPACE: ${{ vars.ECR_IMAGE_NAMESPACE }}
run: |
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION \
docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }}
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:latest \
docker://$ECR_IMAGE_REGISTRY/$ECR_IMAGE_NAMESPACE/dev-builder-android:latest
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:latest \
docker://${{ vars.ECR_IMAGE_REGISTRY }}/${{ vars.ECR_IMAGE_NAMESPACE }}/dev-builder-android:latest
release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
name: Release dev builder images to CN region
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
release-dev-builder-images
]
@@ -163,41 +144,29 @@ jobs:
- name: Push dev-builder-ubuntu image
shell: bash
if: ${{ inputs.release_dev_builder_ubuntu_image }}
env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
ACR_IMAGE_REGISTRY: ${{ vars.ACR_IMAGE_REGISTRY }}
run: |
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION \
docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-ubuntu:$IMAGE_VERSION
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-ubuntu:${{ needs.release-dev-builder-images.outputs.version }}
- name: Push dev-builder-centos image
shell: bash
if: ${{ inputs.release_dev_builder_centos_image }}
env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
ACR_IMAGE_REGISTRY: ${{ vars.ACR_IMAGE_REGISTRY }}
run: |
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION \
docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-centos:$IMAGE_VERSION
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-centos:${{ needs.release-dev-builder-images.outputs.version }}
- name: Push dev-builder-android image
shell: bash
if: ${{ inputs.release_dev_builder_android_image }}
env:
IMAGE_VERSION: ${{ needs.release-dev-builder-images.outputs.version }}
IMAGE_NAMESPACE: ${{ vars.IMAGE_NAMESPACE }}
ACR_IMAGE_REGISTRY: ${{ vars.ACR_IMAGE_REGISTRY }}
run: |
docker run -v "${DOCKER_CONFIG:-$HOME/.docker}:/root/.docker:ro" \
-e "REGISTRY_AUTH_FILE=/root/.docker/config.json" \
quay.io/skopeo/stable:latest \
copy -a docker://docker.io/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION \
docker://$ACR_IMAGE_REGISTRY/$IMAGE_NAMESPACE/dev-builder-android:$IMAGE_VERSION
copy -a docker://docker.io/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }} \
docker://${{ vars.ACR_IMAGE_REGISTRY }}/${{ vars.IMAGE_NAMESPACE }}/dev-builder-android:${{ needs.release-dev-builder-images.outputs.version }}

View File

@@ -18,11 +18,11 @@ on:
description: The runner uses to build linux-amd64 artifacts
default: ec2-c6i.4xlarge-amd64
options:
- ubuntu-22.04
- ubuntu-22.04-8-cores
- ubuntu-22.04-16-cores
- ubuntu-22.04-32-cores
- ubuntu-22.04-64-cores
- ubuntu-20.04
- ubuntu-20.04-8-cores
- ubuntu-20.04-16-cores
- ubuntu-20.04-32-cores
- ubuntu-20.04-64-cores
- ec2-c6i.xlarge-amd64 # 4C8G
- ec2-c6i.2xlarge-amd64 # 8C16G
- ec2-c6i.4xlarge-amd64 # 16C32G
@@ -91,13 +91,18 @@ env:
# The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313;
NIGHTLY_RELEASE_PREFIX: nightly
# Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release.
NEXT_RELEASE_VERSION: v0.14.0
NEXT_RELEASE_VERSION: v0.12.0
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:
issues: write # Allows the action to create issues for cyborg.
contents: write # Allows the action to create a release.
jobs:
allocate-runners:
name: Allocate runners
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
outputs:
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
@@ -117,7 +122,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Check Rust toolchain version
shell: bash
@@ -177,7 +181,6 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/build-linux-artifacts
with:
@@ -199,7 +202,6 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/build-linux-artifacts
with:
@@ -235,7 +237,6 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/build-macos-artifacts
with:
@@ -275,7 +276,6 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/build-windows-artifacts
with:
@@ -299,25 +299,22 @@ jobs:
build-linux-amd64-artifacts,
build-linux-arm64-artifacts,
]
runs-on: ubuntu-latest
runs-on: ubuntu-2004-16-cores
outputs:
build-image-result: ${{ steps.set-build-image-result.outputs.build-image-result }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Build and push images to dockerhub
uses: ./.github/actions/build-images
with:
image-registry: docker.io
image-namespace: ${{ vars.IMAGE_NAMESPACE }}
image-name: ${{ vars.GREPTIMEDB_IMAGE_NAME }}
image-registry-username: ${{ secrets.DOCKERHUB_USERNAME }}
image-registry-password: ${{ secrets.DOCKERHUB_TOKEN }}
version: ${{ needs.allocate-runners.outputs.version }}
push-latest-tag: true
- name: Set build image result
id: set-build-image-result
@@ -335,7 +332,7 @@ jobs:
build-windows-artifacts,
release-images-to-dockerhub,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
# When we push to ACR, it's easy to fail due to some unknown network issues.
# However, we don't want to fail the whole workflow because of this.
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
@@ -344,14 +341,13 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Release artifacts to CN region
uses: ./.github/actions/release-cn-artifacts
with:
src-image-registry: docker.io
src-image-namespace: ${{ vars.IMAGE_NAMESPACE }}
src-image-name: ${{ vars.GREPTIMEDB_IMAGE_NAME }}
src-image-name: greptimedb
dst-image-registry-username: ${{ secrets.ALICLOUD_USERNAME }}
dst-image-registry-password: ${{ secrets.ALICLOUD_PASSWORD }}
dst-image-registry: ${{ vars.ACR_IMAGE_REGISTRY }}
@@ -362,7 +358,6 @@ jobs:
aws-cn-secret-access-key: ${{ secrets.AWS_CN_SECRET_ACCESS_KEY }}
aws-cn-region: ${{ vars.AWS_RELEASE_BUCKET_REGION }}
dev-mode: false
upload-to-s3: true
update-version-info: true
push-latest-tag: true
@@ -377,12 +372,11 @@ jobs:
build-windows-artifacts,
release-images-to-dockerhub,
]
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Publish GitHub release
uses: ./.github/actions/publish-github-release
@@ -396,7 +390,7 @@ jobs:
name: Stop linux-amd64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-amd64-artifacts,
@@ -406,7 +400,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Stop EC2 runner
uses: ./.github/actions/stop-runner
@@ -422,7 +415,7 @@ jobs:
name: Stop linux-arm64 runner
# Only run this job when the runner is allocated.
if: ${{ always() }}
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
needs: [
allocate-runners,
build-linux-arm64-artifacts,
@@ -432,7 +425,6 @@ jobs:
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- name: Stop EC2 runner
uses: ./.github/actions/stop-runner
@@ -448,16 +440,9 @@ jobs:
name: Bump doc version
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
needs: [allocate-runners]
runs-on: ubuntu-latest
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:
issues: write # Allows the action to create issues for cyborg.
contents: write # Allows the action to create a release.
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/setup-cyborg
- name: Bump doc version
working-directory: cyborg
@@ -475,18 +460,11 @@ jobs:
build-macos-artifacts,
build-windows-artifacts,
]
runs-on: ubuntu-latest
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
permissions:
issues: write # Allows the action to create issues for cyborg.
contents: write # Allows the action to create a release.
runs-on: ubuntu-20.04
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: false
- uses: ./.github/actions/setup-cyborg
- name: Report CI status
id: report-ci-status

View File

@@ -4,20 +4,18 @@ on:
- cron: '4 2 * * *'
workflow_dispatch:
permissions:
contents: read
issues: write
pull-requests: write
jobs:
maintenance:
name: Periodic Maintenance
runs-on: ubuntu-latest
permissions:
contents: read
issues: write
pull-requests: write
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: ./.github/actions/setup-cyborg
- name: Do Maintenance
working-directory: cyborg

View File

@@ -1,24 +1,18 @@
name: "Semantic Pull Request"
on:
pull_request:
pull_request_target:
types:
- opened
- reopened
- edited
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
check:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: ./.github/actions/setup-cyborg
- name: Check Pull Request
working-directory: cyborg

3
.gitignore vendored
View File

@@ -54,6 +54,3 @@ tests-fuzz/corpus/
# Nix
.direnv
.envrc
## default data home
greptimedb_data

View File

@@ -3,28 +3,30 @@
## Individual Committers (in alphabetical order)
* [CookiePieWw](https://github.com/CookiePieWw)
* [KKould](https://github.com/KKould)
* [NiwakaDev](https://github.com/NiwakaDev)
* [etolbakov](https://github.com/etolbakov)
* [irenjj](https://github.com/irenjj)
* [KKould](https://github.com/KKould)
* [Lanqing Yang](https://github.com/lyang24)
* [NiwakaDev](https://github.com/NiwakaDev)
* [tisonkun](https://github.com/tisonkun)
* [Lanqing Yang](https://github.com/lyang24)
## Team Members (in alphabetical order)
* [Breeze-P](https://github.com/Breeze-P)
* [GrepTime](https://github.com/GrepTime)
* [MichaelScofield](https://github.com/MichaelScofield)
* [Wenjie0329](https://github.com/Wenjie0329)
* [WenyXu](https://github.com/WenyXu)
* [ZonaHex](https://github.com/ZonaHex)
* [apdong2022](https://github.com/apdong2022)
* [beryl678](https://github.com/beryl678)
* [Breeze-P](https://github.com/Breeze-P)
* [daviderli614](https://github.com/daviderli614)
* [discord9](https://github.com/discord9)
* [evenyag](https://github.com/evenyag)
* [fengjiachun](https://github.com/fengjiachun)
* [fengys1996](https://github.com/fengys1996)
* [GrepTime](https://github.com/GrepTime)
* [holalengyu](https://github.com/holalengyu)
* [killme2008](https://github.com/killme2008)
* [MichaelScofield](https://github.com/MichaelScofield)
* [nicecui](https://github.com/nicecui)
* [paomian](https://github.com/paomian)
* [shuiyisong](https://github.com/shuiyisong)
@@ -32,14 +34,11 @@
* [sunng87](https://github.com/sunng87)
* [v0y4g3r](https://github.com/v0y4g3r)
* [waynexia](https://github.com/waynexia)
* [Wenjie0329](https://github.com/Wenjie0329)
* [WenyXu](https://github.com/WenyXu)
* [xtang](https://github.com/xtang)
* [zhaoyingnan01](https://github.com/zhaoyingnan01)
* [zhongzc](https://github.com/zhongzc)
* [ZonaHex](https://github.com/ZonaHex)
* [zyy17](https://github.com/zyy17)
## All Contributors
To see the full list of contributors, please visit our [Contributors page](https://github.com/GreptimeTeam/greptimedb/graphs/contributors)
[![All Contributors](https://contrib.rocks/image?repo=GreptimeTeam/greptimedb)](https://github.com/GreptimeTeam/greptimedb/graphs/contributors)

1002
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -67,7 +67,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.14.0"
version = "0.12.0"
edition = "2021"
license = "Apache-2.0"
@@ -81,7 +81,6 @@ rust.unknown_lints = "deny"
rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] }
[workspace.dependencies]
# DO_NOT_REMOVE_THIS: BEGIN_OF_EXTERNAL_DEPENDENCIES
# We turn off default-features for some dependencies here so the workspaces which inherit them can
# selectively turn them on if needed, since we can override default-features = true (from false)
# for the inherited dependency but cannot do the reverse (override from true to false).
@@ -107,7 +106,6 @@ bitflags = "2.4.1"
bytemuck = "1.12"
bytes = { version = "1.7", features = ["serde"] }
chrono = { version = "0.4", features = ["serde"] }
chrono-tz = "0.10.1"
clap = { version = "4.4", features = ["derive"] }
config = "0.13.0"
crossbeam-utils = "0.8"
@@ -126,11 +124,11 @@ deadpool-postgres = "0.12"
derive_builder = "0.12"
dotenv = "0.15"
etcd-client = "0.14"
flate2 = { version = "1.1.0", default-features = false, features = ["zlib-rs"] }
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "c5419bbd20cb42e568ec325a4d71a3c94cc327e1" }
# branch: poc-write-path
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "1915576b113a494f5352fd61f211d899b7f87aab" }
hex = "0.4"
http = "1"
humantime = "2.1"
@@ -141,8 +139,8 @@ itertools = "0.10"
jsonb = { git = "https://github.com/databendlabs/jsonb.git", rev = "8c8d2fc294a39f3ff08909d60f718639cfba3875", default-features = false }
lazy_static = "1.4"
local-ip-address = "0.6"
loki-proto = { git = "https://github.com/GreptimeTeam/loki-proto.git", rev = "1434ecf23a2654025d86188fb5205e7a74b225d3" }
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "5618e779cf2bb4755b499c630fba4c35e91898cb" }
loki-api = { git = "https://github.com/shuiyisong/tracing-loki", branch = "chore/prost_version" }
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "a10facb353b41460eeb98578868ebf19c2084fac" }
mockall = "0.11.4"
moka = "0.12"
nalgebra = "0.33"
@@ -161,7 +159,7 @@ parquet = { version = "53.0.0", default-features = false, features = ["arrow", "
paste = "1.0"
pin-project = "1.0"
prometheus = { version = "0.13.3", features = ["process"] }
promql-parser = { version = "0.5", features = ["ser"] }
promql-parser = { version = "0.4.3", features = ["ser"] }
prost = "0.13"
raft-engine = { version = "0.4.1", default-features = false }
rand = "0.8"
@@ -189,10 +187,6 @@ shadow-rs = "0.38"
similar-asserts = "1.6.0"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.8"
sqlx = { version = "0.8", features = [
"runtime-tokio-rustls",
"mysql",
] }
sysinfo = "0.30"
# on branch v0.52.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "71dd86058d2af97b9925093d40c4e03360403170", features = [
@@ -214,7 +208,6 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"]
typetag = "0.2"
uuid = { version = "1.7", features = ["serde", "v4", "fast-rng"] }
zstd = "0.13"
# DO_NOT_REMOVE_THIS: END_OF_EXTERNAL_DEPENDENCIES
## workspaces members
api = { path = "src/api" }
@@ -286,10 +279,12 @@ tokio-rustls = { git = "https://github.com/GreptimeTeam/tokio-rustls", rev = "46
# This is commented, since we are not using aws-lc-sys, if we need to use it, we need to uncomment this line or use a release after this commit, or it wouldn't compile with gcc < 8.1
# see https://github.com/aws/aws-lc-rs/pull/526
# aws-lc-sys = { git ="https://github.com/aws/aws-lc-rs", rev = "556558441e3494af4b156ae95ebc07ebc2fd38aa" }
# Apply a fix for pprof for unaligned pointer access
pprof = { git = "https://github.com/GreptimeTeam/pprof-rs", rev = "1bd1e21" }
[workspace.dependencies.meter-macros]
git = "https://github.com/GreptimeTeam/greptime-meter.git"
rev = "5618e779cf2bb4755b499c630fba4c35e91898cb"
rev = "a10facb353b41460eeb98578868ebf19c2084fac"
[profile.release]
debug = 1

View File

@@ -1,6 +1,3 @@
[target.aarch64-unknown-linux-gnu]
image = "ghcr.io/cross-rs/aarch64-unknown-linux-gnu:0.2.5"
[build]
pre-build = [
"dpkg --add-architecture $CROSS_DEB_ARCH",
@@ -8,8 +5,3 @@ pre-build = [
"curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v3.15.8/protoc-3.15.8-linux-x86_64.zip && unzip protoc-3.15.8-linux-x86_64.zip -d /usr/",
"chmod a+x /usr/bin/protoc && chmod -R a+rx /usr/include/google",
]
[build.env]
passthrough = [
"JEMALLOC_SYS_WITH_LG_PAGE",
]

View File

@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
IMAGE_REGISTRY ?= docker.io
IMAGE_NAMESPACE ?= greptime
IMAGE_TAG ?= latest
DEV_BUILDER_IMAGE_TAG ?= 2024-12-25-a71b93dd-20250305072908
DEV_BUILDER_IMAGE_TAG ?= 2024-12-25-9d0fa5d5-20250124085746
BUILDX_MULTI_PLATFORM_BUILD ?= false
BUILDX_BUILDER_NAME ?= gtbuilder
BASE_IMAGE ?= ubuntu
@@ -60,8 +60,6 @@ ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), all)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64,linux/arm64 --push
else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), amd64)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64 --push
else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), arm64)
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/arm64 --push
else
BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
endif

View File

@@ -112,11 +112,11 @@ Start a GreptimeDB container with:
```shell
docker run -p 127.0.0.1:4000-4003:4000-4003 \
-v "$(pwd)/greptimedb:./greptimedb_data" \
-v "$(pwd)/greptimedb:/tmp/greptimedb" \
--name greptime --rm \
greptime/greptimedb:latest standalone start \
--http-addr 0.0.0.0:4000 \
--rpc-bind-addr 0.0.0.0:4001 \
--rpc-addr 0.0.0.0:4001 \
--mysql-addr 0.0.0.0:4002 \
--postgres-addr 0.0.0.0:4003
```

View File

@@ -29,7 +29,7 @@
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
| `grpc` | -- | -- | The gRPC server options. |
| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
| `grpc.tls.mode` | String | `disable` | TLS mode. |
@@ -40,7 +40,6 @@
| `mysql.enable` | Bool | `true` | Whether to enable. |
| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
| `mysql.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
| `mysql.tls` | -- | -- | -- |
| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
| `mysql.tls.cert_path` | String | Unset | Certificate file path. |
@@ -50,7 +49,6 @@
| `postgres.enable` | Bool | `true` | Whether to enable |
| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
| `postgres.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql.tls` section. |
| `postgres.tls.mode` | String | `disable` | TLS mode. |
| `postgres.tls.cert_path` | String | Unset | Certificate file path. |
@@ -60,8 +58,6 @@
| `opentsdb.enable` | Bool | `true` | Whether to enable OpenTSDB put in HTTP API. |
| `influxdb` | -- | -- | InfluxDB protocol options. |
| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
| `jaeger` | -- | -- | Jaeger protocol options. |
| `jaeger.enable` | Bool | `true` | Whether to enable Jaeger protocol in HTTP API. |
| `prom_store` | -- | -- | Prometheus remote storage options |
| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
@@ -69,8 +65,8 @@
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.purge_interval` | String | `1m` | The interval to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.read_batch_size` | Integer | `128` | The read batch size.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.sync_write` | Bool | `false` | Whether to use sync write.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.<br/>**It's only used when the provider is `raft_engine`**. |
@@ -92,16 +88,15 @@
| `wal.backoff_deadline` | String | `5mins` | The deadline of retries.<br/>**It's only used when the provider is `kafka`**. |
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
| `metadata_store` | -- | -- | Metadata storage options. |
| `metadata_store.file_size` | String | `64MB` | The size of the metadata store log file. |
| `metadata_store.purge_threshold` | String | `256MB` | The threshold of the metadata store size to trigger a purge. |
| `metadata_store.purge_interval` | String | `1m` | The interval of the metadata store to trigger a purge. |
| `metadata_store.file_size` | String | `256MB` | Kv file size in bytes. |
| `metadata_store.purge_threshold` | String | `4GB` | Kv purge threshold. |
| `procedure` | -- | -- | Procedure storage options. |
| `procedure.max_retry_times` | Integer | `3` | Procedure max retry time. |
| `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially |
| `flow` | -- | -- | flow engine options. |
| `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
| `storage.cache_path` | String | Unset | Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.<br/>A local file directory, defaults to `{data_home}`. An empty string means disabling. |
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
@@ -152,7 +147,6 @@
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
| `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for<br/>creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.<br/>The default name for this directory is `index_intermediate` for backward compatibility.<br/><br/>This path contains two subdirectories:<br/>- `__intm`: for storing intermediate files used during creating index.<br/>- `staging`: for storing staging files used during searching index. |
| `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
| `region_engine.mito.index.staging_ttl` | String | `7d` | The TTL of the staging directory.<br/>Defaults to 7 days.<br/>Setting it to "0s" to disable TTL. |
| `region_engine.mito.index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
| `region_engine.mito.index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
| `region_engine.mito.index.content_cache_page_size` | String | `64KiB` | Page size for inverted index content cache. |
@@ -181,7 +175,7 @@
| `region_engine.metric` | -- | -- | Metric engine options. |
| `region_engine.metric.experimental_sparse_primary_key_encoding` | Bool | `false` | Whether to enable the experimental sparse primary key encoding. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
@@ -227,8 +221,8 @@
| `http.enable_cors` | Bool | `true` | HTTP CORS support, it's turned on by default<br/>This allows browser to access http APIs without CORS restrictions |
| `http.cors_allowed_origins` | Array | Unset | Customize allowed origins for HTTP CORS. |
| `grpc` | -- | -- | The gRPC server options. |
| `grpc.bind_addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
| `grpc.server_addr` | String | `127.0.0.1:4001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
| `grpc.addr` | String | `127.0.0.1:4001` | The address to bind the gRPC server. |
| `grpc.hostname` | String | `127.0.0.1:4001` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
| `grpc.tls` | -- | -- | gRPC server TLS options, see `mysql.tls` section. |
| `grpc.tls.mode` | String | `disable` | TLS mode. |
@@ -239,7 +233,6 @@
| `mysql.enable` | Bool | `true` | Whether to enable. |
| `mysql.addr` | String | `127.0.0.1:4002` | The addr to bind the MySQL server. |
| `mysql.runtime_size` | Integer | `2` | The number of server worker threads. |
| `mysql.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
| `mysql.tls` | -- | -- | -- |
| `mysql.tls.mode` | String | `disable` | TLS mode, refer to https://www.postgresql.org/docs/current/libpq-ssl.html<br/>- `disable` (default value)<br/>- `prefer`<br/>- `require`<br/>- `verify-ca`<br/>- `verify-full` |
| `mysql.tls.cert_path` | String | Unset | Certificate file path. |
@@ -249,7 +242,6 @@
| `postgres.enable` | Bool | `true` | Whether to enable |
| `postgres.addr` | String | `127.0.0.1:4003` | The addr to bind the PostgresSQL server. |
| `postgres.runtime_size` | Integer | `2` | The number of server worker threads. |
| `postgres.keep_alive` | String | `0s` | Server-side keep-alive time.<br/>Set to 0 (default) to disable. |
| `postgres.tls` | -- | -- | PostgresSQL server TLS options, see `mysql.tls` section. |
| `postgres.tls.mode` | String | `disable` | TLS mode. |
| `postgres.tls.cert_path` | String | Unset | Certificate file path. |
@@ -259,8 +251,6 @@
| `opentsdb.enable` | Bool | `true` | Whether to enable OpenTSDB put in HTTP API. |
| `influxdb` | -- | -- | InfluxDB protocol options. |
| `influxdb.enable` | Bool | `true` | Whether to enable InfluxDB protocol in HTTP API. |
| `jaeger` | -- | -- | Jaeger protocol options. |
| `jaeger.enable` | Bool | `true` | Whether to enable Jaeger protocol in HTTP API. |
| `prom_store` | -- | -- | Prometheus remote storage options |
| `prom_store.enable` | Bool | `true` | Whether to enable Prometheus remote write and read in HTTP API. |
| `prom_store.with_metric_engine` | Bool | `true` | Whether to store the data from Prometheus remote write in metric engine. |
@@ -279,7 +269,7 @@
| `datanode.client.connect_timeout` | String | `10s` | -- |
| `datanode.client.tcp_nodelay` | Bool | `true` | -- |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
@@ -308,9 +298,9 @@
| Key | Type | Default | Descriptions |
| --- | -----| ------- | ----------- |
| `data_home` | String | `./greptimedb_data/metasrv/` | The working home directory. |
| `data_home` | String | `/tmp/metasrv/` | The working home directory. |
| `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. |
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for the frontend and datanode to connect to metasrv.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `bind_addr`. |
| `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. |
| `store_addrs` | Array | -- | Store server address default to etcd store.<br/>For postgres store, the format is:<br/>"password=password dbname=postgres user=postgres host=localhost port=5432"<br/>For etcd store, the format is:<br/>"127.0.0.1:2379" |
| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. |
| `backend` | String | `etcd_store` | The datastore for meta server.<br/>Available values:<br/>- `etcd_store` (default value)<br/>- `memory_store`<br/>- `postgres_store` |
@@ -319,7 +309,6 @@
| `selector` | String | `round_robin` | Datanode selector type.<br/>- `round_robin` (default value)<br/>- `lease_based`<br/>- `load_based`<br/>For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". |
| `use_memory_store` | Bool | `false` | Store data in memory. |
| `enable_region_failover` | Bool | `false` | Whether to enable region failover.<br/>This feature is only available on GreptimeDB running on cluster mode and<br/>- Using Remote WAL<br/>- Using shared storage (e.g., s3). |
| `node_max_idle_time` | String | `24hours` | Max allowed idle time before removing node info from metasrv memory. |
| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. |
| `runtime` | -- | -- | The runtime options. |
| `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. |
@@ -352,7 +341,7 @@
| `wal.backoff_base` | Integer | `2` | Exponential backoff rate, i.e. next backoff = base * current backoff. |
| `wal.backoff_deadline` | String | `5mins` | Stop reconnecting if the total wait time reaches the deadline. If this config is missing, the reconnecting won't terminate. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
@@ -387,14 +376,19 @@
| `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.<br/>By default, it provides services after all regions have been initialized. |
| `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. |
| `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. |
| `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. |
| `rpc_hostname` | String | Unset | Deprecated, use `grpc.hostname` instead. |
| `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. |
| `rpc_max_recv_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_recv_message_size` instead. |
| `rpc_max_send_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_send_message_size` instead. |
| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. |
| `http` | -- | -- | The HTTP server options. |
| `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. |
| `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. |
| `http.body_limit` | String | `64MB` | HTTP request body limit.<br/>The following units are supported: `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, `GiB`, `TB`, `TiB`, `PB`, `PiB`.<br/>Set to 0 to disable limit. |
| `grpc` | -- | -- | The gRPC server options. |
| `grpc.bind_addr` | String | `127.0.0.1:3001` | The address to bind the gRPC server. |
| `grpc.server_addr` | String | `127.0.0.1:3001` | The address advertised to the metasrv, and used for connections from outside the host.<br/>If left empty or unset, the server will automatically use the IP address of the first network interface<br/>on the host, with the same port number as the one specified in `grpc.bind_addr`. |
| `grpc.addr` | String | `127.0.0.1:3001` | The address to bind the gRPC server. |
| `grpc.hostname` | String | `127.0.0.1:3001` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
| `grpc.runtime_size` | Integer | `8` | The number of server worker threads. |
| `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
@@ -442,7 +436,7 @@
| `wal.dump_index_interval` | String | `60s` | The interval for dumping WAL indexes.<br/>**It's only used when the provider is `kafka`**. |
| `wal.overwrite_entry_start_id` | Bool | `false` | Ignore missing entries during read WAL.<br/>**It's only used when the provider is `kafka`**.<br/><br/>This option ensures that when Kafka messages are deleted, the system<br/>can still successfully replay memtable data without throwing an<br/>out-of-range error.<br/>However, enabling this option might lead to unexpected data loss,<br/>as the system will skip over missing entries instead of treating<br/>them as critical errors. |
| `storage` | -- | -- | The data storage options. |
| `storage.data_home` | String | `./greptimedb_data/` | The working home directory. |
| `storage.data_home` | String | `/tmp/greptimedb/` | The working home directory. |
| `storage.type` | String | `File` | The storage type used to store the data.<br/>- `File`: the data is stored in the local file system.<br/>- `S3`: the data is stored in the S3 object storage.<br/>- `Gcs`: the data is stored in the Google Cloud Storage.<br/>- `Azblob`: the data is stored in the Azure Blob Storage.<br/>- `Oss`: the data is stored in the Aliyun OSS. |
| `storage.cache_path` | String | Unset | Read cache configuration for object storage such as 'S3' etc, it's configured by default when using object storage. It is recommended to configure it when using object storage for better performance.<br/>A local file directory, defaults to `{data_home}`. An empty string means disabling. |
| `storage.cache_capacity` | String | Unset | The local file cache capacity in bytes. If your disk space is sufficient, it is recommended to set it larger. |
@@ -493,7 +487,6 @@
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
| `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for<br/>creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.<br/>The default name for this directory is `index_intermediate` for backward compatibility.<br/><br/>This path contains two subdirectories:<br/>- `__intm`: for storing intermediate files used during creating index.<br/>- `staging`: for storing staging files used during searching index. |
| `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
| `region_engine.mito.index.staging_ttl` | String | `7d` | The TTL of the staging directory.<br/>Defaults to 7 days.<br/>Setting it to "0s" to disable TTL. |
| `region_engine.mito.index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. |
| `region_engine.mito.index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. |
| `region_engine.mito.index.content_cache_page_size` | String | `64KiB` | Page size for inverted index content cache. |
@@ -522,7 +515,7 @@
| `region_engine.metric` | -- | -- | Metric engine options. |
| `region_engine.metric.experimental_sparse_primary_key_encoding` | Bool | `false` | Whether to enable the experimental sparse primary key encoding. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |
@@ -556,8 +549,8 @@
| `flow` | -- | -- | flow engine options. |
| `flow.num_workers` | Integer | `0` | The number of flow worker in flownode.<br/>Not setting(or set to 0) this value will use the number of CPU cores divided by 2. |
| `grpc` | -- | -- | The gRPC server options. |
| `grpc.bind_addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
| `grpc.server_addr` | String | `127.0.0.1:6800` | The address advertised to the metasrv,<br/>and used for connections from outside the host |
| `grpc.addr` | String | `127.0.0.1:6800` | The address to bind the gRPC server. |
| `grpc.hostname` | String | `127.0.0.1` | The hostname advertised to the metasrv,<br/>and used for connections from outside the host |
| `grpc.runtime_size` | Integer | `2` | The number of server worker threads. |
| `grpc.max_recv_message_size` | String | `512MB` | The maximum receive message size for gRPC server. |
| `grpc.max_send_message_size` | String | `512MB` | The maximum send message size for gRPC server. |
@@ -579,7 +572,7 @@
| `heartbeat.interval` | String | `3s` | Interval for sending heartbeat messages to the metasrv. |
| `heartbeat.retry_interval` | String | `3s` | Interval for retrying to send heartbeat messages to the metasrv. |
| `logging` | -- | -- | The logging options. |
| `logging.dir` | String | `./greptimedb_data/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.dir` | String | `/tmp/greptimedb/logs` | The directory to store the log files. If set to empty, logs will not be written to files. |
| `logging.level` | String | Unset | The log level. Can be `info`/`debug`/`warn`/`error`. |
| `logging.enable_otlp_tracing` | Bool | `false` | Enable OTLP tracing. |
| `logging.otlp_endpoint` | String | `http://localhost:4317` | The OTLP tracing endpoint. |

View File

@@ -19,6 +19,26 @@ init_regions_parallelism = 16
## The maximum current queries allowed to be executed. Zero means unlimited.
max_concurrent_queries = 0
## Deprecated, use `grpc.addr` instead.
## @toml2docs:none-default
rpc_addr = "127.0.0.1:3001"
## Deprecated, use `grpc.hostname` instead.
## @toml2docs:none-default
rpc_hostname = "127.0.0.1"
## Deprecated, use `grpc.runtime_size` instead.
## @toml2docs:none-default
rpc_runtime_size = 8
## Deprecated, use `grpc.rpc_max_recv_message_size` instead.
## @toml2docs:none-default
rpc_max_recv_message_size = "512MB"
## Deprecated, use `grpc.rpc_max_send_message_size` instead.
## @toml2docs:none-default
rpc_max_send_message_size = "512MB"
## Enable telemetry to collect anonymous usage data. Enabled by default.
#+ enable_telemetry = true
@@ -36,11 +56,10 @@ body_limit = "64MB"
## The gRPC server options.
[grpc]
## The address to bind the gRPC server.
bind_addr = "127.0.0.1:3001"
## The address advertised to the metasrv, and used for connections from outside the host.
## If left empty or unset, the server will automatically use the IP address of the first network interface
## on the host, with the same port number as the one specified in `grpc.bind_addr`.
server_addr = "127.0.0.1:3001"
addr = "127.0.0.1:3001"
## The hostname advertised to the metasrv,
## and used for connections from outside the host
hostname = "127.0.0.1:3001"
## The number of server worker threads.
runtime_size = 8
## The maximum receive message size for gRPC server.
@@ -119,7 +138,7 @@ provider = "raft_engine"
## The directory to store the WAL files.
## **It's only used when the provider is `raft_engine`**.
## @toml2docs:none-default
dir = "./greptimedb_data/wal"
dir = "/tmp/greptimedb/wal"
## The size of the WAL segment file.
## **It's only used when the provider is `raft_engine`**.
@@ -231,7 +250,6 @@ overwrite_entry_start_id = false
# secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com"
# region = "us-west-2"
# enable_virtual_host_style = false
# Example of using Oss as the storage.
# [storage]
@@ -265,7 +283,7 @@ overwrite_entry_start_id = false
## The data storage options.
[storage]
## The working home directory.
data_home = "./greptimedb_data/"
data_home = "/tmp/greptimedb/"
## The storage type used to store the data.
## - `File`: the data is stored in the local file system.
@@ -498,11 +516,6 @@ aux_path = ""
## The max capacity of the staging directory.
staging_size = "2GB"
## The TTL of the staging directory.
## Defaults to 7 days.
## Setting it to "0s" to disable TTL.
staging_ttl = "7d"
## Cache size for inverted index metadata.
metadata_cache_size = "64MiB"
@@ -618,7 +631,7 @@ experimental_sparse_primary_key_encoding = false
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "./greptimedb_data/logs"
dir = "/tmp/greptimedb/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default

View File

@@ -14,10 +14,10 @@ node_id = 14
## The gRPC server options.
[grpc]
## The address to bind the gRPC server.
bind_addr = "127.0.0.1:6800"
## The address advertised to the metasrv,
addr = "127.0.0.1:6800"
## The hostname advertised to the metasrv,
## and used for connections from outside the host
server_addr = "127.0.0.1:6800"
hostname = "127.0.0.1"
## The number of server worker threads.
runtime_size = 2
## The maximum receive message size for gRPC server.
@@ -76,7 +76,7 @@ retry_interval = "3s"
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "./greptimedb_data/logs"
dir = "/tmp/greptimedb/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default
@@ -121,3 +121,4 @@ sample_ratio = 1.0
## The tokio console address.
## @toml2docs:none-default
#+ tokio_console_addr = "127.0.0.1"

View File

@@ -41,11 +41,10 @@ cors_allowed_origins = ["https://example.com"]
## The gRPC server options.
[grpc]
## The address to bind the gRPC server.
bind_addr = "127.0.0.1:4001"
## The address advertised to the metasrv, and used for connections from outside the host.
## If left empty or unset, the server will automatically use the IP address of the first network interface
## on the host, with the same port number as the one specified in `grpc.bind_addr`.
server_addr = "127.0.0.1:4001"
addr = "127.0.0.1:4001"
## The hostname advertised to the metasrv,
## and used for connections from outside the host
hostname = "127.0.0.1:4001"
## The number of server worker threads.
runtime_size = 8
@@ -74,9 +73,6 @@ enable = true
addr = "127.0.0.1:4002"
## The number of server worker threads.
runtime_size = 2
## Server-side keep-alive time.
## Set to 0 (default) to disable.
keep_alive = "0s"
# MySQL server TLS options.
[mysql.tls]
@@ -108,9 +104,6 @@ enable = true
addr = "127.0.0.1:4003"
## The number of server worker threads.
runtime_size = 2
## Server-side keep-alive time.
## Set to 0 (default) to disable.
keep_alive = "0s"
## PostgresSQL server TLS options, see `mysql.tls` section.
[postgres.tls]
@@ -138,11 +131,6 @@ enable = true
## Whether to enable InfluxDB protocol in HTTP API.
enable = true
## Jaeger protocol options.
[jaeger]
## Whether to enable Jaeger protocol in HTTP API.
enable = true
## Prometheus remote storage options
[prom_store]
## Whether to enable Prometheus remote write and read in HTTP API.
@@ -189,7 +177,7 @@ tcp_nodelay = true
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "./greptimedb_data/logs"
dir = "/tmp/greptimedb/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default

View File

@@ -1,12 +1,10 @@
## The working home directory.
data_home = "./greptimedb_data/metasrv/"
data_home = "/tmp/metasrv/"
## The bind address of metasrv.
bind_addr = "127.0.0.1:3002"
## The communication server address for the frontend and datanode to connect to metasrv.
## If left empty or unset, the server will automatically use the IP address of the first network interface
## on the host, with the same port number as the one specified in `bind_addr`.
## The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost.
server_addr = "127.0.0.1:3002"
## Store server address default to etcd store.
@@ -50,9 +48,6 @@ use_memory_store = false
## - Using shared storage (e.g., s3).
enable_region_failover = false
## Max allowed idle time before removing node info from metasrv memory.
node_max_idle_time = "24hours"
## Whether to enable greptimedb telemetry. Enabled by default.
#+ enable_telemetry = true
@@ -177,7 +172,7 @@ backoff_deadline = "5mins"
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "./greptimedb_data/logs"
dir = "/tmp/greptimedb/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default

View File

@@ -49,7 +49,7 @@ cors_allowed_origins = ["https://example.com"]
## The gRPC server options.
[grpc]
## The address to bind the gRPC server.
bind_addr = "127.0.0.1:4001"
addr = "127.0.0.1:4001"
## The number of server worker threads.
runtime_size = 8
@@ -78,9 +78,6 @@ enable = true
addr = "127.0.0.1:4002"
## The number of server worker threads.
runtime_size = 2
## Server-side keep-alive time.
## Set to 0 (default) to disable.
keep_alive = "0s"
# MySQL server TLS options.
[mysql.tls]
@@ -112,9 +109,6 @@ enable = true
addr = "127.0.0.1:4003"
## The number of server worker threads.
runtime_size = 2
## Server-side keep-alive time.
## Set to 0 (default) to disable.
keep_alive = "0s"
## PostgresSQL server TLS options, see `mysql.tls` section.
[postgres.tls]
@@ -142,11 +136,6 @@ enable = true
## Whether to enable InfluxDB protocol in HTTP API.
enable = true
## Jaeger protocol options.
[jaeger]
## Whether to enable Jaeger protocol in HTTP API.
enable = true
## Prometheus remote storage options
[prom_store]
## Whether to enable Prometheus remote write and read in HTTP API.
@@ -164,17 +153,17 @@ provider = "raft_engine"
## The directory to store the WAL files.
## **It's only used when the provider is `raft_engine`**.
## @toml2docs:none-default
dir = "./greptimedb_data/wal"
dir = "/tmp/greptimedb/wal"
## The size of the WAL segment file.
## **It's only used when the provider is `raft_engine`**.
file_size = "128MB"
## The threshold of the WAL size to trigger a purge.
## The threshold of the WAL size to trigger a flush.
## **It's only used when the provider is `raft_engine`**.
purge_threshold = "1GB"
## The interval to trigger a purge.
## The interval to trigger a flush.
## **It's only used when the provider is `raft_engine`**.
purge_interval = "1m"
@@ -289,12 +278,10 @@ overwrite_entry_start_id = false
## Metadata storage options.
[metadata_store]
## The size of the metadata store log file.
file_size = "64MB"
## The threshold of the metadata store size to trigger a purge.
purge_threshold = "256MB"
## The interval of the metadata store to trigger a purge.
purge_interval = "1m"
## Kv file size in bytes.
file_size = "256MB"
## Kv purge threshold.
purge_threshold = "4GB"
## Procedure storage options.
[procedure]
@@ -318,7 +305,6 @@ retry_delay = "500ms"
# secret_access_key = "123456"
# endpoint = "https://s3.amazonaws.com"
# region = "us-west-2"
# enable_virtual_host_style = false
# Example of using Oss as the storage.
# [storage]
@@ -352,7 +338,7 @@ retry_delay = "500ms"
## The data storage options.
[storage]
## The working home directory.
data_home = "./greptimedb_data/"
data_home = "/tmp/greptimedb/"
## The storage type used to store the data.
## - `File`: the data is stored in the local file system.
@@ -585,11 +571,6 @@ aux_path = ""
## The max capacity of the staging directory.
staging_size = "2GB"
## The TTL of the staging directory.
## Defaults to 7 days.
## Setting it to "0s" to disable TTL.
staging_ttl = "7d"
## Cache size for inverted index metadata.
metadata_cache_size = "64MiB"
@@ -705,7 +686,7 @@ experimental_sparse_primary_key_encoding = false
## The logging options.
[logging]
## The directory to store the log files. If set to empty, logs will not be written to files.
dir = "./greptimedb_data/logs"
dir = "/tmp/greptimedb/logs"
## The log level. Can be `info`/`debug`/`warn`/`error`.
## @toml2docs:none-default

View File

@@ -1,4 +1,4 @@
FROM ubuntu:22.04 as builder
FROM ubuntu:20.04 as builder
ARG CARGO_PROFILE
ARG FEATURES

View File

@@ -1,4 +1,4 @@
FROM ubuntu:latest
FROM ubuntu:22.04
# The binary name of GreptimeDB executable.
# Defaults to "greptime", but sometimes in other projects it might be different.

View File

@@ -1,4 +1,4 @@
FROM ubuntu:22.04
FROM ubuntu:20.04
# The root path under which contains all the dependencies to build this Dockerfile.
ARG DOCKER_BUILD_ROOT=.
@@ -41,7 +41,7 @@ RUN mv protoc3/include/* /usr/local/include/
# and the repositories are pulled from trusted sources (still us, of course). Doing so does not violate the intention
# of the Git's addition to the "safe.directory" at the first place (see the commit message here:
# https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9).
# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using
# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using
# wildcard here. However, that requires the git's config files and the submodules all owned by the very same user.
# It's troublesome to do this since the dev build runs in Docker, which is under user "root"; while outside the Docker,
# it can be a different user that have prepared the submodules.

View File

@@ -0,0 +1,51 @@
# Use the legacy glibc 2.28.
FROM ubuntu:18.10
ENV LANG en_US.utf8
WORKDIR /greptimedb
# Use old-releases.ubuntu.com to avoid 404s: https://help.ubuntu.com/community/EOLUpgrades.
RUN echo "deb http://old-releases.ubuntu.com/ubuntu/ cosmic main restricted universe multiverse\n\
deb http://old-releases.ubuntu.com/ubuntu/ cosmic-updates main restricted universe multiverse\n\
deb http://old-releases.ubuntu.com/ubuntu/ cosmic-security main restricted universe multiverse" > /etc/apt/sources.list
# Install dependencies.
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libssl-dev \
tzdata \
curl \
ca-certificates \
git \
build-essential \
unzip \
pkg-config
# Install protoc.
ENV PROTOC_VERSION=29.3
RUN if [ "$(uname -m)" = "x86_64" ]; then \
PROTOC_ZIP=protoc-${PROTOC_VERSION}-linux-x86_64.zip; \
elif [ "$(uname -m)" = "aarch64" ]; then \
PROTOC_ZIP=protoc-${PROTOC_VERSION}-linux-aarch_64.zip; \
else \
echo "Unsupported architecture"; exit 1; \
fi && \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP} && \
unzip -o ${PROTOC_ZIP} -d /usr/local bin/protoc && \
unzip -o ${PROTOC_ZIP} -d /usr/local 'include/*' && \
rm -f ${PROTOC_ZIP}
# Install Rust.
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /root/.cargo/bin/:$PATH
# Install Rust toolchains.
ARG RUST_TOOLCHAIN
RUN rustup toolchain install ${RUST_TOOLCHAIN}
# Install cargo-binstall with a specific version to adapt the current rust toolchain.
# Note: if we use the latest version, we may encounter the following `use of unstable library feature 'io_error_downcast'` error.
RUN cargo install cargo-binstall --version 1.6.6 --locked
# Install nextest.
RUN cargo binstall cargo-nextest --no-confirm

View File

@@ -1,66 +0,0 @@
FROM ubuntu:20.04
# The root path under which contains all the dependencies to build this Dockerfile.
ARG DOCKER_BUILD_ROOT=.
ENV LANG en_US.utf8
WORKDIR /greptimedb
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y software-properties-common
# Install dependencies.
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libssl-dev \
tzdata \
curl \
unzip \
ca-certificates \
git \
build-essential \
pkg-config
ARG TARGETPLATFORM
RUN echo "target platform: $TARGETPLATFORM"
ARG PROTOBUF_VERSION=29.3
# Install protobuf, because the one in the apt is too old (v3.12).
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-aarch_64.zip && \
unzip protoc-${PROTOBUF_VERSION}-linux-aarch_64.zip -d protoc3; \
elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOBUF_VERSION}/protoc-${PROTOBUF_VERSION}-linux-x86_64.zip && \
unzip protoc-${PROTOBUF_VERSION}-linux-x86_64.zip -d protoc3; \
fi
RUN mv protoc3/bin/* /usr/local/bin/
RUN mv protoc3/include/* /usr/local/include/
# Silence all `safe.directory` warnings, to avoid the "detect dubious repository" error when building with submodules.
# Disabling the safe directory check here won't pose extra security issues, because in our usage for this dev build
# image, we use it solely on our own environment (that github action's VM, or ECS created dynamically by ourselves),
# and the repositories are pulled from trusted sources (still us, of course). Doing so does not violate the intention
# of the Git's addition to the "safe.directory" at the first place (see the commit message here:
# https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9).
# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using
# wildcard here. However, that requires the git's config files and the submodules all owned by the very same user.
# It's troublesome to do this since the dev build runs in Docker, which is under user "root"; while outside the Docker,
# it can be a different user that have prepared the submodules.
RUN git config --global --add safe.directory '*'
# Install Rust.
SHELL ["/bin/bash", "-c"]
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --no-modify-path --default-toolchain none -y
ENV PATH /root/.cargo/bin/:$PATH
# Install Rust toolchains.
ARG RUST_TOOLCHAIN
RUN rustup toolchain install ${RUST_TOOLCHAIN}
# Install cargo-binstall with a specific version to adapt the current rust toolchain.
# Note: if we use the latest version, we may encounter the following `use of unstable library feature 'io_error_downcast'` error.
# compile from source take too long, so we use the precompiled binary instead
COPY $DOCKER_BUILD_ROOT/docker/dev-builder/binstall/pull_binstall.sh /usr/local/bin/pull_binstall.sh
RUN chmod +x /usr/local/bin/pull_binstall.sh && /usr/local/bin/pull_binstall.sh
# Install nextest.
RUN cargo binstall cargo-nextest --no-confirm

View File

@@ -25,7 +25,7 @@ services:
- --initial-cluster-state=new
- *etcd_initial_cluster_token
volumes:
- ./greptimedb-cluster-docker-compose/etcd0:/var/lib/etcd
- /tmp/greptimedb-cluster-docker-compose/etcd0:/var/lib/etcd
healthcheck:
test: [ "CMD", "etcdctl", "--endpoints=http://etcd0:2379", "endpoint", "health" ]
interval: 5s
@@ -43,8 +43,8 @@ services:
command:
- metasrv
- start
- --rpc-bind-addr=0.0.0.0:3002
- --rpc-server-addr=metasrv:3002
- --bind-addr=0.0.0.0:3002
- --server-addr=metasrv:3002
- --store-addrs=etcd0:2379
- --http-addr=0.0.0.0:3000
healthcheck:
@@ -68,13 +68,12 @@ services:
- datanode
- start
- --node-id=0
- --data-home=/greptimedb_data
- --rpc-bind-addr=0.0.0.0:3001
- --rpc-server-addr=datanode0:3001
- --rpc-addr=0.0.0.0:3001
- --rpc-hostname=datanode0:3001
- --metasrv-addrs=metasrv:3002
- --http-addr=0.0.0.0:5000
volumes:
- ./greptimedb-cluster-docker-compose/datanode0:/greptimedb_data
- /tmp/greptimedb-cluster-docker-compose/datanode0:/tmp/greptimedb
healthcheck:
test: [ "CMD", "curl", "-fv", "http://datanode0:5000/health" ]
interval: 5s
@@ -99,7 +98,7 @@ services:
- start
- --metasrv-addrs=metasrv:3002
- --http-addr=0.0.0.0:4000
- --rpc-bind-addr=0.0.0.0:4001
- --rpc-addr=0.0.0.0:4001
- --mysql-addr=0.0.0.0:4002
- --postgres-addr=0.0.0.0:4003
healthcheck:
@@ -124,8 +123,8 @@ services:
- start
- --node-id=0
- --metasrv-addrs=metasrv:3002
- --rpc-bind-addr=0.0.0.0:4004
- --rpc-server-addr=flownode0:4004
- --rpc-addr=0.0.0.0:4004
- --rpc-hostname=flownode0:4004
- --http-addr=0.0.0.0:4005
depends_on:
frontend0:

View File

@@ -1,40 +0,0 @@
# TSBS benchmark - v0.12.0
## Environment
### Amazon EC2
| | |
|---------|-------------------------|
| Machine | c5d.2xlarge |
| CPU | 8 core |
| Memory | 16GB |
| Disk | 100GB (GP3) |
| OS | Ubuntu Server 24.04 LTS |
## Write performance
| Environment | Ingest rate (rows/s) |
|-----------------|----------------------|
| EC2 c5d.2xlarge | 326839.28 |
## Query performance
| Query type | EC2 c5d.2xlarge (ms) |
|-----------------------|----------------------|
| cpu-max-all-1 | 12.46 |
| cpu-max-all-8 | 24.20 |
| double-groupby-1 | 673.08 |
| double-groupby-5 | 963.99 |
| double-groupby-all | 1330.05 |
| groupby-orderby-limit | 952.46 |
| high-cpu-1 | 5.08 |
| high-cpu-all | 4638.57 |
| lastpoint | 591.02 |
| single-groupby-1-1-1 | 4.06 |
| single-groupby-1-1-12 | 4.73 |
| single-groupby-1-8-1 | 8.23 |
| single-groupby-5-1-1 | 4.61 |
| single-groupby-5-1-12 | 5.61 |
| single-groupby-5-8-1 | 9.74 |

View File

@@ -4,16 +4,6 @@ This crate provides an easy approach to dump memory profiling info.
## Prerequisites
### jemalloc
jeprof is already compiled in the target directory of GreptimeDB. You can find the binary and use it.
```
# find jeprof binary
find . -name 'jeprof'
# add executable permission
chmod +x <path_to_jeprof>
```
The path is usually under `./target/${PROFILE}/build/tikv-jemalloc-sys-${HASH}/out/build/bin/jeprof`.
The default version of jemalloc installed from the package manager may not have the `--collapsed` option.
You may need to check the whether the `jeprof` version is >= `5.3.0` if you want to install it from the package manager.
```bash
# for macOS
brew install jemalloc
@@ -33,11 +23,7 @@ curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph
Start GreptimeDB instance with environment variables:
```bash
# for Linux
MALLOC_CONF=prof:true ./target/debug/greptime standalone start
# for macOS
_RJEM_MALLOC_CONF=prof:true ./target/debug/greptime standalone start
```
Dump memory profiling data through HTTP API:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

After

Width:  |  Height:  |  Size: 36 KiB

BIN
docs/logo-text-padding.png Normal file → Executable file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 21 KiB

After

Width:  |  Height:  |  Size: 25 KiB

View File

@@ -1,19 +0,0 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
# Use jq to check for panels with empty or missing descriptions
invalid_panels=$(cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels[]
| select((.type == "stats" or .type == "timeseries") and (.description == "" or .description == null))
')
# Check if any invalid panels were found
if [[ -n "$invalid_panels" ]]; then
echo "Error: The following panels have empty or missing descriptions:"
echo "$invalid_panels"
exit 1
else
echo "All panels with type 'stats' or 'timeseries' have valid descriptions."
exit 0
fi

File diff suppressed because it is too large Load Diff

View File

@@ -384,8 +384,8 @@
"rowHeight": 0.9,
"showValue": "auto",
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "none",
"sort": "none"
}
},
"targets": [
@@ -483,8 +483,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.2.3",
@@ -578,8 +578,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "10.2.3",
@@ -601,7 +601,7 @@
"type": "timeseries"
},
{
"collapsed": false,
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
@@ -684,8 +684,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -878,8 +878,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -1124,8 +1124,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -1223,8 +1223,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -1322,8 +1322,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -1456,8 +1456,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -1573,8 +1573,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -1673,8 +1673,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -1773,8 +1773,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -1890,8 +1890,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -2002,8 +2002,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -2120,8 +2120,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -2233,8 +2233,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -2334,8 +2334,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -2435,8 +2435,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -2548,8 +2548,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -2661,8 +2661,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -2788,8 +2788,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -2889,8 +2889,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -2990,8 +2990,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -3091,8 +3091,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -3191,8 +3191,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -3302,8 +3302,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -3432,8 +3432,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -3543,8 +3543,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -3657,8 +3657,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -3808,8 +3808,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -3909,8 +3909,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -4011,8 +4011,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [
@@ -4113,8 +4113,8 @@
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "desc"
"mode": "single",
"sort": "none"
}
},
"targets": [

View File

@@ -1,11 +0,0 @@
#!/usr/bin/env bash
BASEDIR=$(dirname "$0")
echo '| Title | Description | Expressions |
|---|---|---|'
cat $BASEDIR/greptimedb-cluster.json | jq -r '
.panels |
map(select(.type == "stat" or .type == "timeseries")) |
.[] | "| \(.title) | \(.description | gsub("\n"; "<br>")) | \(.targets | map(.expr // .rawSql | "`\(.|gsub("\n"; "<br>"))`") | join("<br>")) |"
'

View File

@@ -15,10 +15,13 @@ common-macro.workspace = true
common-time.workspace = true
datatypes.workspace = true
greptime-proto.workspace = true
paste.workspace = true
paste = "1.0"
prost.workspace = true
serde_json.workspace = true
snafu.workspace = true
[build-dependencies]
tonic-build = "0.11"
[dev-dependencies]
paste = "1.0"

View File

@@ -19,7 +19,9 @@ use common_decimal::decimal128::{DECIMAL128_DEFAULT_SCALE, DECIMAL128_MAX_PRECIS
use common_decimal::Decimal128;
use common_time::time::Time;
use common_time::timestamp::TimeUnit;
use common_time::{Date, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp};
use common_time::{
Date, DateTime, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp,
};
use datatypes::prelude::{ConcreteDataType, ValueRef};
use datatypes::scalars::ScalarVector;
use datatypes::types::{
@@ -27,8 +29,8 @@ use datatypes::types::{
};
use datatypes::value::{OrderedF32, OrderedF64, Value};
use datatypes::vectors::{
BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector,
Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
BinaryVector, BooleanVector, DateTimeVector, DateVector, Decimal128Vector, Float32Vector,
Float64Vector, Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
IntervalYearMonthVector, PrimitiveVector, StringVector, TimeMicrosecondVector,
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt32Vector,
@@ -116,7 +118,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
ColumnDataType::Json => ConcreteDataType::json_datatype(),
ColumnDataType::String => ConcreteDataType::string_datatype(),
ColumnDataType::Date => ConcreteDataType::date_datatype(),
ColumnDataType::Datetime => ConcreteDataType::timestamp_microsecond_datatype(),
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
ColumnDataType::TimestampMillisecond => {
ConcreteDataType::timestamp_millisecond_datatype()
@@ -269,6 +271,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
ConcreteDataType::Binary(_) => ColumnDataType::Binary,
ConcreteDataType::String(_) => ColumnDataType::String,
ConcreteDataType::Date(_) => ColumnDataType::Date,
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
ConcreteDataType::Timestamp(t) => match t {
TimestampType::Second(_) => ColumnDataType::TimestampSecond,
TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
@@ -473,6 +476,7 @@ pub fn push_vals(column: &mut Column, origin_count: usize, vector: VectorRef) {
Value::String(val) => values.string_values.push(val.as_utf8().to_string()),
Value::Binary(val) => values.binary_values.push(val.to_vec()),
Value::Date(val) => values.date_values.push(val.val()),
Value::DateTime(val) => values.datetime_values.push(val.val()),
Value::Timestamp(val) => match val.unit() {
TimeUnit::Second => values.timestamp_second_values.push(val.value()),
TimeUnit::Millisecond => values.timestamp_millisecond_values.push(val.value()),
@@ -573,11 +577,12 @@ pub fn pb_value_to_value_ref<'a>(
ValueData::BinaryValue(bytes) => ValueRef::Binary(bytes.as_slice()),
ValueData::StringValue(string) => ValueRef::String(string.as_str()),
ValueData::DateValue(d) => ValueRef::Date(Date::from(*d)),
ValueData::DatetimeValue(d) => ValueRef::DateTime(DateTime::new(*d)),
ValueData::TimestampSecondValue(t) => ValueRef::Timestamp(Timestamp::new_second(*t)),
ValueData::TimestampMillisecondValue(t) => {
ValueRef::Timestamp(Timestamp::new_millisecond(*t))
}
ValueData::DatetimeValue(t) | ValueData::TimestampMicrosecondValue(t) => {
ValueData::TimestampMicrosecondValue(t) => {
ValueRef::Timestamp(Timestamp::new_microsecond(*t))
}
ValueData::TimestampNanosecondValue(t) => {
@@ -646,6 +651,7 @@ pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) ->
ConcreteDataType::Binary(_) => Arc::new(BinaryVector::from(values.binary_values)),
ConcreteDataType::String(_) => Arc::new(StringVector::from_vec(values.string_values)),
ConcreteDataType::Date(_) => Arc::new(DateVector::from_vec(values.date_values)),
ConcreteDataType::DateTime(_) => Arc::new(DateTimeVector::from_vec(values.datetime_values)),
ConcreteDataType::Timestamp(unit) => match unit {
TimestampType::Second(_) => Arc::new(TimestampSecondVector::from_vec(
values.timestamp_second_values,
@@ -781,6 +787,11 @@ pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::DateTime(_) => values
.datetime_values
.into_iter()
.map(|v| Value::DateTime(v.into()))
.collect(),
ConcreteDataType::Date(_) => values
.date_values
.into_iter()
@@ -936,6 +947,9 @@ pub fn to_proto_value(value: Value) -> Option<v1::Value> {
Value::Date(v) => v1::Value {
value_data: Some(ValueData::DateValue(v.val())),
},
Value::DateTime(v) => v1::Value {
value_data: Some(ValueData::DatetimeValue(v.val())),
},
Value::Timestamp(v) => match v.unit() {
TimeUnit::Second => v1::Value {
value_data: Some(ValueData::TimestampSecondValue(v.value())),
@@ -1052,6 +1066,7 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
Value::String(v) => Some(ValueData::StringValue(v.as_utf8().to_string())),
Value::Binary(v) => Some(ValueData::BinaryValue(v.to_vec())),
Value::Date(v) => Some(ValueData::DateValue(v.val())),
Value::DateTime(v) => Some(ValueData::DatetimeValue(v.val())),
Value::Timestamp(v) => Some(match v.unit() {
TimeUnit::Second => ValueData::TimestampSecondValue(v.value()),
TimeUnit::Millisecond => ValueData::TimestampMillisecondValue(v.value()),
@@ -1233,7 +1248,7 @@ mod tests {
ColumnDataTypeWrapper::date_datatype().into()
);
assert_eq!(
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::datetime_datatype(),
ColumnDataTypeWrapper::datetime_datatype().into()
);
assert_eq!(
@@ -1324,6 +1339,10 @@ mod tests {
ColumnDataTypeWrapper::date_datatype(),
ConcreteDataType::date_datatype().try_into().unwrap()
);
assert_eq!(
ColumnDataTypeWrapper::datetime_datatype(),
ConcreteDataType::datetime_datatype().try_into().unwrap()
);
assert_eq!(
ColumnDataTypeWrapper::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype()
@@ -1811,6 +1830,17 @@ mod tests {
]
);
test_convert_values!(
datetime,
vec![1.into(), 2.into(), 3.into()],
datetime,
vec![
Value::DateTime(1.into()),
Value::DateTime(2.into()),
Value::DateTime(3.into())
]
);
#[test]
fn test_vectors_to_rows_for_different_types() {
let boolean_vec = BooleanVector::from_vec(vec![true, false, true]);

View File

@@ -15,10 +15,10 @@
use std::collections::HashMap;
use datatypes::schema::{
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, SkippingIndexOptions,
SkippingIndexType, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY,
FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
};
use greptime_proto::v1::{Analyzer, SkippingIndexType as PbSkippingIndexType};
use greptime_proto::v1::Analyzer;
use snafu::ResultExt;
use crate::error::{self, Result};
@@ -103,13 +103,6 @@ pub fn contains_fulltext(options: &Option<ColumnOptions>) -> bool {
.is_some_and(|o| o.options.contains_key(FULLTEXT_GRPC_KEY))
}
/// Checks if the `ColumnOptions` contains skipping index options.
pub fn contains_skipping(options: &Option<ColumnOptions>) -> bool {
options
.as_ref()
.is_some_and(|o| o.options.contains_key(SKIPPING_INDEX_GRPC_KEY))
}
/// Tries to construct a `ColumnOptions` from the given `FulltextOptions`.
pub fn options_from_fulltext(fulltext: &FulltextOptions) -> Result<Option<ColumnOptions>> {
let mut options = ColumnOptions::default();
@@ -120,27 +113,6 @@ pub fn options_from_fulltext(fulltext: &FulltextOptions) -> Result<Option<Column
Ok((!options.options.is_empty()).then_some(options))
}
/// Tries to construct a `ColumnOptions` from the given `SkippingIndexOptions`.
pub fn options_from_skipping(skipping: &SkippingIndexOptions) -> Result<Option<ColumnOptions>> {
let mut options = ColumnOptions::default();
let v = serde_json::to_string(skipping).context(error::SerializeJsonSnafu)?;
options
.options
.insert(SKIPPING_INDEX_GRPC_KEY.to_string(), v);
Ok((!options.options.is_empty()).then_some(options))
}
/// Tries to construct a `ColumnOptions` for inverted index.
pub fn options_from_inverted() -> ColumnOptions {
let mut options = ColumnOptions::default();
options
.options
.insert(INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string());
options
}
/// Tries to construct a `FulltextAnalyzer` from the given analyzer.
pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer {
match analyzer {
@@ -149,13 +121,6 @@ pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer {
}
}
/// Tries to construct a `SkippingIndexType` from the given skipping index type.
pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> SkippingIndexType {
match skipping_index_type {
PbSkippingIndexType::BloomFilter => SkippingIndexType::BloomFilter,
}
}
#[cfg(test)]
mod tests {

View File

@@ -15,7 +15,7 @@ api.workspace = true
arrow.workspace = true
arrow-schema.workspace = true
async-stream.workspace = true
async-trait.workspace = true
async-trait = "0.1"
bytes.workspace = true
common-catalog.workspace = true
common-error.workspace = true
@@ -31,7 +31,7 @@ common-version.workspace = true
dashmap.workspace = true
datafusion.workspace = true
datatypes.workspace = true
futures.workspace = true
futures = "0.3"
futures-util.workspace = true
humantime.workspace = true
itertools.workspace = true
@@ -39,7 +39,7 @@ lazy_static.workspace = true
meta-client.workspace = true
moka = { workspace = true, features = ["future", "sync"] }
partition.workspace = true
paste.workspace = true
paste = "1.0"
prometheus.workspace = true
rustc-hash.workspace = true
serde_json.workspace = true
@@ -49,7 +49,7 @@ sql.workspace = true
store-api.workspace = true
table.workspace = true
tokio.workspace = true
tokio-stream.workspace = true
tokio-stream = "0.1"
[dev-dependencies]
cache.workspace = true

View File

@@ -38,7 +38,6 @@ use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
use session::context::{Channel, QueryContext};
use snafu::prelude::*;
use table::dist_table::DistTable;
use table::metadata::TableId;
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
use table::table_name::TableName;
use table::TableRef;
@@ -287,28 +286,6 @@ impl CatalogManager for KvBackendCatalogManager {
return Ok(None);
}
async fn tables_by_ids(
&self,
catalog: &str,
schema: &str,
table_ids: &[TableId],
) -> Result<Vec<TableRef>> {
let table_info_values = self
.table_metadata_manager
.table_info_manager()
.batch_get(table_ids)
.await
.context(TableMetadataManagerSnafu)?;
let tables = table_info_values
.into_values()
.filter(|t| t.table_info.catalog_name == catalog && t.table_info.schema_name == schema)
.map(build_table)
.collect::<Result<Vec<_>>>()?;
Ok(tables)
}
fn tables<'a>(
&'a self,
catalog: &'a str,

View File

@@ -87,14 +87,6 @@ pub trait CatalogManager: Send + Sync {
query_ctx: Option<&QueryContext>,
) -> Result<Option<TableRef>>;
/// Returns the tables by table ids.
async fn tables_by_ids(
&self,
catalog: &str,
schema: &str,
table_ids: &[TableId],
) -> Result<Vec<TableRef>>;
/// Returns all tables with a stream by catalog and schema.
fn tables<'a>(
&'a self,

View File

@@ -14,7 +14,7 @@
use std::any::Any;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::sync::{Arc, RwLock, Weak};
use async_stream::{stream, try_stream};
@@ -28,7 +28,6 @@ use common_meta::kv_backend::memory::MemoryKvBackend;
use futures_util::stream::BoxStream;
use session::context::QueryContext;
use snafu::OptionExt;
use table::metadata::TableId;
use table::TableRef;
use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
@@ -144,33 +143,6 @@ impl CatalogManager for MemoryCatalogManager {
Ok(result)
}
async fn tables_by_ids(
&self,
catalog: &str,
schema: &str,
table_ids: &[TableId],
) -> Result<Vec<TableRef>> {
let catalogs = self.catalogs.read().unwrap();
let schemas = catalogs.get(catalog).context(CatalogNotFoundSnafu {
catalog_name: catalog,
})?;
let tables = schemas
.get(schema)
.context(SchemaNotFoundSnafu { catalog, schema })?;
let filter_ids: HashSet<_> = table_ids.iter().collect();
// It is very inefficient, but we do not need to optimize it since it will not be called in `MemoryCatalogManager`.
let tables = tables
.values()
.filter(|t| filter_ids.contains(&t.table_info().table_id()))
.cloned()
.collect::<Vec<_>>();
Ok(tables)
}
fn tables<'a>(
&'a self,
catalog: &'a str,

View File

@@ -77,7 +77,7 @@ trait SystemSchemaProviderInner {
fn system_table(&self, name: &str) -> Option<SystemTableRef>;
fn table_info(catalog_name: String, table: &SystemTableRef) -> TableInfoRef {
let table_meta = TableMetaBuilder::empty()
let table_meta = TableMetaBuilder::default()
.schema(table.schema())
.primary_key_indices(vec![])
.next_column_id(0)

View File

@@ -365,6 +365,10 @@ impl InformationSchemaColumnsBuilder {
self.numeric_scales.push(None);
match &column_schema.data_type {
ConcreteDataType::DateTime(datetime_type) => {
self.datetime_precisions
.push(Some(datetime_type.precision() as i64));
}
ConcreteDataType::Timestamp(ts_type) => {
self.datetime_precisions
.push(Some(ts_type.precision() as i64));

View File

@@ -28,19 +28,16 @@ use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datatypes::prelude::ConcreteDataType as CDT;
use datatypes::scalars::ScalarVectorBuilder;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::timestamp::TimestampMillisecond;
use datatypes::value::Value;
use datatypes::vectors::{
Int64VectorBuilder, StringVectorBuilder, TimestampMillisecondVectorBuilder,
UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
Int64VectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder, VectorRef,
};
use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt};
use store_api::storage::{ScanRequest, TableId};
use crate::error::{
CreateRecordBatchSnafu, FlowInfoNotFoundSnafu, InternalSnafu, JsonSnafu, ListFlowsSnafu,
Result, UpgradeWeakCatalogManagerRefSnafu,
CreateRecordBatchSnafu, FlowInfoNotFoundSnafu, InternalSnafu, JsonSnafu, ListFlowsSnafu, Result,
};
use crate::information_schema::{Predicates, FLOWS};
use crate::system_schema::information_schema::InformationTable;
@@ -62,10 +59,6 @@ pub const SOURCE_TABLE_IDS: &str = "source_table_ids";
pub const SINK_TABLE_NAME: &str = "sink_table_name";
pub const FLOWNODE_IDS: &str = "flownode_ids";
pub const OPTIONS: &str = "options";
pub const CREATED_TIME: &str = "created_time";
pub const UPDATED_TIME: &str = "updated_time";
pub const LAST_EXECUTION_TIME: &str = "last_execution_time";
pub const SOURCE_TABLE_NAMES: &str = "source_table_names";
/// The `information_schema.flows` to provides information about flows in databases.
#[derive(Debug)]
@@ -106,14 +99,6 @@ impl InformationSchemaFlows {
(SINK_TABLE_NAME, CDT::string_datatype(), false),
(FLOWNODE_IDS, CDT::string_datatype(), true),
(OPTIONS, CDT::string_datatype(), true),
(CREATED_TIME, CDT::timestamp_millisecond_datatype(), false),
(UPDATED_TIME, CDT::timestamp_millisecond_datatype(), false),
(
LAST_EXECUTION_TIME,
CDT::timestamp_millisecond_datatype(),
true,
),
(SOURCE_TABLE_NAMES, CDT::string_datatype(), true),
]
.into_iter()
.map(|(name, ty, nullable)| ColumnSchema::new(name, ty, nullable))
@@ -185,10 +170,6 @@ struct InformationSchemaFlowsBuilder {
sink_table_names: StringVectorBuilder,
flownode_id_groups: StringVectorBuilder,
option_groups: StringVectorBuilder,
created_time: TimestampMillisecondVectorBuilder,
updated_time: TimestampMillisecondVectorBuilder,
last_execution_time: TimestampMillisecondVectorBuilder,
source_table_names: StringVectorBuilder,
}
impl InformationSchemaFlowsBuilder {
@@ -215,10 +196,6 @@ impl InformationSchemaFlowsBuilder {
sink_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
flownode_id_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY),
option_groups: StringVectorBuilder::with_capacity(INIT_CAPACITY),
created_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
updated_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
last_execution_time: TimestampMillisecondVectorBuilder::with_capacity(INIT_CAPACITY),
source_table_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
@@ -258,14 +235,13 @@ impl InformationSchemaFlowsBuilder {
catalog_name: catalog_name.to_string(),
flow_name: flow_name.to_string(),
})?;
self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)
.await?;
self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)?;
}
self.finish()
}
async fn add_flow(
fn add_flow(
&mut self,
predicates: &Predicates,
flow_id: FlowId,
@@ -314,36 +290,6 @@ impl InformationSchemaFlowsBuilder {
input: format!("{:?}", flow_info.options()),
},
)?));
self.created_time
.push(Some(flow_info.created_time().timestamp_millis().into()));
self.updated_time
.push(Some(flow_info.updated_time().timestamp_millis().into()));
self.last_execution_time
.push(flow_stat.as_ref().and_then(|state| {
state
.last_exec_time_map
.get(&flow_id)
.map(|v| TimestampMillisecond::new(*v))
}));
let mut source_table_names = vec![];
let catalog_name = self.catalog_name.clone();
let catalog_manager = self
.catalog_manager
.upgrade()
.context(UpgradeWeakCatalogManagerRefSnafu)?;
for schema_name in catalog_manager.schema_names(&catalog_name, None).await? {
source_table_names.extend(
catalog_manager
.tables_by_ids(&catalog_name, &schema_name, flow_info.source_table_ids())
.await?
.into_iter()
.map(|table| table.table_info().full_table_name()),
);
}
let source_table_names = source_table_names.join(",");
self.source_table_names.push(Some(&source_table_names));
Ok(())
}
@@ -361,10 +307,6 @@ impl InformationSchemaFlowsBuilder {
Arc::new(self.sink_table_names.finish()),
Arc::new(self.flownode_id_groups.finish()),
Arc::new(self.option_groups.finish()),
Arc::new(self.created_time.finish()),
Arc::new(self.updated_time.finish()),
Arc::new(self.last_execution_time.finish()),
Arc::new(self.source_table_names.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}

View File

@@ -20,7 +20,7 @@ use datatypes::vectors::{Int64Vector, StringVector, VectorRef};
use super::table_names::*;
use crate::system_schema::utils::tables::{
bigint_column, string_column, string_columns, timestamp_micro_column,
bigint_column, datetime_column, string_column, string_columns,
};
const NO_VALUE: &str = "NO";
@@ -163,17 +163,17 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
string_column("EVENT_BODY"),
string_column("EVENT_DEFINITION"),
string_column("EVENT_TYPE"),
timestamp_micro_column("EXECUTE_AT"),
datetime_column("EXECUTE_AT"),
bigint_column("INTERVAL_VALUE"),
string_column("INTERVAL_FIELD"),
string_column("SQL_MODE"),
timestamp_micro_column("STARTS"),
timestamp_micro_column("ENDS"),
datetime_column("STARTS"),
datetime_column("ENDS"),
string_column("STATUS"),
string_column("ON_COMPLETION"),
timestamp_micro_column("CREATED"),
timestamp_micro_column("LAST_ALTERED"),
timestamp_micro_column("LAST_EXECUTED"),
datetime_column("CREATED"),
datetime_column("LAST_ALTERED"),
datetime_column("LAST_EXECUTED"),
string_column("EVENT_COMMENT"),
bigint_column("ORIGINATOR"),
string_column("CHARACTER_SET_CLIENT"),
@@ -204,10 +204,10 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
bigint_column("INITIAL_SIZE"),
bigint_column("MAXIMUM_SIZE"),
bigint_column("AUTOEXTEND_SIZE"),
timestamp_micro_column("CREATION_TIME"),
timestamp_micro_column("LAST_UPDATE_TIME"),
timestamp_micro_column("LAST_ACCESS_TIME"),
timestamp_micro_column("RECOVER_TIME"),
datetime_column("CREATION_TIME"),
datetime_column("LAST_UPDATE_TIME"),
datetime_column("LAST_ACCESS_TIME"),
datetime_column("RECOVER_TIME"),
bigint_column("TRANSACTION_COUNTER"),
string_column("VERSION"),
string_column("ROW_FORMAT"),
@@ -217,9 +217,9 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
bigint_column("MAX_DATA_LENGTH"),
bigint_column("INDEX_LENGTH"),
bigint_column("DATA_FREE"),
timestamp_micro_column("CREATE_TIME"),
timestamp_micro_column("UPDATE_TIME"),
timestamp_micro_column("CHECK_TIME"),
datetime_column("CREATE_TIME"),
datetime_column("UPDATE_TIME"),
datetime_column("CHECK_TIME"),
string_column("CHECKSUM"),
string_column("STATUS"),
string_column("EXTRA"),
@@ -330,8 +330,8 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
string_column("SQL_DATA_ACCESS"),
string_column("SQL_PATH"),
string_column("SECURITY_TYPE"),
timestamp_micro_column("CREATED"),
timestamp_micro_column("LAST_ALTERED"),
datetime_column("CREATED"),
datetime_column("LAST_ALTERED"),
string_column("SQL_MODE"),
string_column("ROUTINE_COMMENT"),
string_column("DEFINER"),
@@ -383,7 +383,7 @@ pub(super) fn get_schema_columns(table_name: &str) -> (SchemaRef, Vec<VectorRef>
string_column("ACTION_REFERENCE_NEW_TABLE"),
string_column("ACTION_REFERENCE_OLD_ROW"),
string_column("ACTION_REFERENCE_NEW_ROW"),
timestamp_micro_column("CREATED"),
datetime_column("CREATED"),
string_column("SQL_MODE"),
string_column("DEFINER"),
string_column("CHARACTER_SET_CLIENT"),

View File

@@ -228,6 +228,12 @@ impl InformationSchemaKeyColumnUsageBuilder {
let keys = &table_info.meta.primary_key_indices;
let schema = table.schema();
// For compatibility, use primary key columns as inverted index columns.
let pk_as_inverted_index = !schema
.column_schemas()
.iter()
.any(|c| c.has_inverted_index_key());
for (idx, column) in schema.column_schemas().iter().enumerate() {
let mut constraints = vec![];
if column.is_time_index() {
@@ -245,6 +251,10 @@ impl InformationSchemaKeyColumnUsageBuilder {
// TODO(dimbtp): foreign key constraint not supported yet
if keys.contains(&idx) {
constraints.push(PRI_CONSTRAINT_NAME);
if pk_as_inverted_index {
constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
}
}
if column.is_inverted_indexed() {
constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);

View File

@@ -20,18 +20,17 @@ use common_catalog::consts::INFORMATION_SCHEMA_PARTITIONS_TABLE_ID;
use common_error::ext::BoxedError;
use common_recordbatch::adapter::RecordBatchStreamAdapter;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::datetime::DateTime;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatchStreamAdapter;
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::timestamp::TimestampMicrosecond;
use datatypes::value::Value;
use datatypes::vectors::{
ConstantVector, Int64Vector, Int64VectorBuilder, MutableVector, StringVector,
StringVectorBuilder, TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder,
UInt64VectorBuilder,
ConstantVector, DateTimeVector, DateTimeVectorBuilder, Int64Vector, Int64VectorBuilder,
MutableVector, StringVector, StringVectorBuilder, UInt64VectorBuilder,
};
use futures::{StreamExt, TryStreamExt};
use partition::manager::PartitionInfo;
@@ -128,21 +127,9 @@ impl InformationSchemaPartitions {
ColumnSchema::new("max_data_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("index_length", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new("data_free", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new(
"create_time",
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(
"update_time",
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(
"check_time",
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new("create_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("update_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("check_time", ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new("checksum", ConcreteDataType::int64_datatype(), true),
ColumnSchema::new(
"partition_comment",
@@ -213,7 +200,7 @@ struct InformationSchemaPartitionsBuilder {
partition_names: StringVectorBuilder,
partition_ordinal_positions: Int64VectorBuilder,
partition_expressions: StringVectorBuilder,
create_times: TimestampMicrosecondVectorBuilder,
create_times: DateTimeVectorBuilder,
partition_ids: UInt64VectorBuilder,
}
@@ -233,7 +220,7 @@ impl InformationSchemaPartitionsBuilder {
partition_names: StringVectorBuilder::with_capacity(INIT_CAPACITY),
partition_ordinal_positions: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
partition_expressions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
create_times: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
create_times: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
partition_ids: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
}
}
@@ -337,7 +324,7 @@ impl InformationSchemaPartitionsBuilder {
};
self.partition_expressions.push(expressions.as_deref());
self.create_times.push(Some(TimestampMicrosecond::from(
self.create_times.push(Some(DateTime::from(
table_info.meta.created_on.timestamp_millis(),
)));
self.partition_ids.push(Some(partition.id.as_u64()));
@@ -355,8 +342,8 @@ impl InformationSchemaPartitionsBuilder {
Arc::new(Int64Vector::from(vec![None])),
rows_num,
));
let null_timestampmicrosecond_vector = Arc::new(ConstantVector::new(
Arc::new(TimestampMicrosecondVector::from(vec![None])),
let null_datetime_vector = Arc::new(ConstantVector::new(
Arc::new(DateTimeVector::from(vec![None])),
rows_num,
));
let partition_methods = Arc::new(ConstantVector::new(
@@ -386,8 +373,8 @@ impl InformationSchemaPartitionsBuilder {
null_i64_vector.clone(),
Arc::new(self.create_times.finish()),
// TODO(dennis): supports update_time
null_timestampmicrosecond_vector.clone(),
null_timestampmicrosecond_vector,
null_datetime_vector.clone(),
null_datetime_vector,
null_i64_vector,
null_string_vector.clone(),
null_string_vector.clone(),

View File

@@ -30,8 +30,7 @@ use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{
StringVectorBuilder, TimestampMicrosecondVectorBuilder, UInt32VectorBuilder,
UInt64VectorBuilder,
DateTimeVectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder,
};
use futures::TryStreamExt;
use snafu::{OptionExt, ResultExt};
@@ -106,21 +105,9 @@ impl InformationSchemaTables {
ColumnSchema::new(TABLE_ROWS, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(DATA_FREE, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(AUTO_INCREMENT, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(
CREATE_TIME,
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(
UPDATE_TIME,
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(
CHECK_TIME,
ConcreteDataType::timestamp_microsecond_datatype(),
true,
),
ColumnSchema::new(CREATE_TIME, ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new(UPDATE_TIME, ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new(CHECK_TIME, ConcreteDataType::datetime_datatype(), true),
ColumnSchema::new(TABLE_COLLATION, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(CHECKSUM, ConcreteDataType::uint64_datatype(), true),
ColumnSchema::new(CREATE_OPTIONS, ConcreteDataType::string_datatype(), true),
@@ -195,9 +182,9 @@ struct InformationSchemaTablesBuilder {
max_index_length: UInt64VectorBuilder,
data_free: UInt64VectorBuilder,
auto_increment: UInt64VectorBuilder,
create_time: TimestampMicrosecondVectorBuilder,
update_time: TimestampMicrosecondVectorBuilder,
check_time: TimestampMicrosecondVectorBuilder,
create_time: DateTimeVectorBuilder,
update_time: DateTimeVectorBuilder,
check_time: DateTimeVectorBuilder,
table_collation: StringVectorBuilder,
checksum: UInt64VectorBuilder,
create_options: StringVectorBuilder,
@@ -232,9 +219,9 @@ impl InformationSchemaTablesBuilder {
max_index_length: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
data_free: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
auto_increment: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
create_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
update_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
check_time: TimestampMicrosecondVectorBuilder::with_capacity(INIT_CAPACITY),
create_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
update_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
check_time: DateTimeVectorBuilder::with_capacity(INIT_CAPACITY),
table_collation: StringVectorBuilder::with_capacity(INIT_CAPACITY),
checksum: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
create_options: StringVectorBuilder::with_capacity(INIT_CAPACITY),

View File

@@ -51,10 +51,10 @@ pub fn bigint_column(name: &str) -> ColumnSchema {
)
}
pub fn timestamp_micro_column(name: &str) -> ColumnSchema {
pub fn datetime_column(name: &str) -> ColumnSchema {
ColumnSchema::new(
str::to_lowercase(name),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::datetime_datatype(),
false,
)
}

View File

@@ -6,7 +6,6 @@ license.workspace = true
[features]
pg_kvbackend = ["common-meta/pg_kvbackend"]
mysql_kvbackend = ["common-meta/mysql_kvbackend"]
[lints]
workspace = true
@@ -44,10 +43,6 @@ futures.workspace = true
humantime.workspace = true
meta-client.workspace = true
nu-ansi-term = "0.46"
opendal = { version = "0.51.1", features = [
"services-fs",
"services-s3",
] }
query.workspace = true
rand.workspace = true
reqwest.workspace = true

View File

@@ -23,14 +23,11 @@ use common_error::ext::BoxedError;
use common_meta::key::{TableMetadataManager, TableMetadataManagerRef};
use common_meta::kv_backend::etcd::EtcdStore;
use common_meta::kv_backend::memory::MemoryKvBackend;
#[cfg(feature = "mysql_kvbackend")]
use common_meta::kv_backend::rds::MySqlStore;
#[cfg(feature = "pg_kvbackend")]
use common_meta::kv_backend::rds::PgStore;
use common_meta::kv_backend::postgres::PgStore;
use common_meta::peer::Peer;
use common_meta::rpc::router::{Region, RegionRoute};
use common_telemetry::info;
use common_wal::options::WalOptions;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, RawSchema};
use rand::Rng;
@@ -65,9 +62,6 @@ pub struct BenchTableMetadataCommand {
#[cfg(feature = "pg_kvbackend")]
#[clap(long)]
postgres_addr: Option<String>,
#[cfg(feature = "mysql_kvbackend")]
#[clap(long)]
mysql_addr: Option<String>,
#[clap(long)]
count: u32,
}
@@ -91,16 +85,6 @@ impl BenchTableMetadataCommand {
kv_backend
};
#[cfg(feature = "mysql_kvbackend")]
let kv_backend = if let Some(mysql_addr) = &self.mysql_addr {
info!("Using mysql as kv backend");
MySqlStore::with_url(mysql_addr, "greptime_metakv", 128)
.await
.unwrap()
} else {
kv_backend
};
let table_metadata_manager = Arc::new(TableMetadataManager::new(kv_backend));
let tool = BenchTableMetadata {
@@ -200,7 +184,7 @@ fn create_region_routes(regions: Vec<RegionNumber>) -> Vec<RegionRoute> {
region_routes
}
fn create_region_wal_options(regions: Vec<RegionNumber>) -> HashMap<RegionNumber, WalOptions> {
fn create_region_wal_options(regions: Vec<RegionNumber>) -> HashMap<RegionNumber, String> {
// TODO(niebayes): construct region wal options for benchmark.
let _ = regions;
HashMap::default()

View File

@@ -49,12 +49,7 @@ impl TableMetadataBencher {
let regions: Vec<_> = (0..64).collect();
let region_routes = create_region_routes(regions.clone());
let region_wal_options = create_region_wal_options(regions)
.into_iter()
.map(|(region_id, wal_options)| {
(region_id, serde_json::to_string(&wal_options).unwrap())
})
.collect();
let region_wal_options = create_region_wal_options(regions);
let start = Instant::now();
@@ -114,17 +109,9 @@ impl TableMetadataBencher {
let table_info = table_info.unwrap();
let table_route = table_route.unwrap();
let table_id = table_info.table_info.ident.table_id;
let regions: Vec<_> = (0..64).collect();
let region_wal_options = create_region_wal_options(regions);
let _ = self
.table_metadata_manager
.delete_table_metadata(
table_id,
&table_info.table_name(),
&table_route,
&region_wal_options,
)
.delete_table_metadata(table_id, &table_info.table_name(), &table_route)
.await;
start.elapsed()
},

View File

@@ -276,24 +276,6 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("OpenDAL operator failed"))]
OpenDal {
#[snafu(implicit)]
location: Location,
#[snafu(source)]
error: opendal::Error,
},
#[snafu(display("S3 config need be set"))]
S3ConfigNotSet {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Output directory not set"))]
OutputDirNotSet {
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -337,9 +319,6 @@ impl ErrorExt for Error {
| Error::BuildClient { .. } => StatusCode::Unexpected,
Error::Other { source, .. } => source.status_code(),
Error::OpenDal { .. } => StatusCode::Internal,
Error::S3ConfigNotSet { .. } => StatusCode::InvalidArguments,
Error::OutputDirNotSet { .. } => StatusCode::InvalidArguments,
Error::BuildRuntime { source, .. } => source.status_code(),

View File

@@ -21,18 +21,15 @@ use async_trait::async_trait;
use clap::{Parser, ValueEnum};
use common_error::ext::BoxedError;
use common_telemetry::{debug, error, info};
use opendal::layers::LoggingLayer;
use opendal::{services, Operator};
use serde_json::Value;
use snafu::{OptionExt, ResultExt};
use tokio::fs::File;
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::sync::Semaphore;
use tokio::time::Instant;
use crate::database::{parse_proxy_opts, DatabaseClient};
use crate::error::{
EmptyResultSnafu, Error, OpenDalSnafu, OutputDirNotSetSnafu, Result, S3ConfigNotSetSnafu,
SchemaNotFoundSnafu,
};
use crate::error::{EmptyResultSnafu, Error, FileIoSnafu, Result, SchemaNotFoundSnafu};
use crate::{database, Tool};
type TableReference = (String, String, String);
@@ -55,9 +52,8 @@ pub struct ExportCommand {
addr: String,
/// Directory to put the exported data. E.g.: /tmp/greptimedb-export
/// for local export.
#[clap(long)]
output_dir: Option<String>,
output_dir: String,
/// The name of the catalog to export.
#[clap(long, default_value = "greptime-*")]
@@ -105,51 +101,10 @@ pub struct ExportCommand {
/// Disable proxy server, if set, will not use any proxy.
#[clap(long)]
no_proxy: bool,
/// if export data to s3
#[clap(long)]
s3: bool,
/// The s3 bucket name
/// if s3 is set, this is required
#[clap(long)]
s3_bucket: Option<String>,
/// The s3 endpoint
/// if s3 is set, this is required
#[clap(long)]
s3_endpoint: Option<String>,
/// The s3 access key
/// if s3 is set, this is required
#[clap(long)]
s3_access_key: Option<String>,
/// The s3 secret key
/// if s3 is set, this is required
#[clap(long)]
s3_secret_key: Option<String>,
/// The s3 region
/// if s3 is set, this is required
#[clap(long)]
s3_region: Option<String>,
}
impl ExportCommand {
pub async fn build(&self) -> std::result::Result<Box<dyn Tool>, BoxedError> {
if self.s3
&& (self.s3_bucket.is_none()
|| self.s3_endpoint.is_none()
|| self.s3_access_key.is_none()
|| self.s3_secret_key.is_none()
|| self.s3_region.is_none())
{
return Err(BoxedError::new(S3ConfigNotSetSnafu {}.build()));
}
if !self.s3 && self.output_dir.is_none() {
return Err(BoxedError::new(OutputDirNotSetSnafu {}.build()));
}
let (catalog, schema) =
database::split_database(&self.database).map_err(BoxedError::new)?;
let proxy = parse_proxy_opts(self.proxy.clone(), self.no_proxy)?;
@@ -171,43 +126,24 @@ impl ExportCommand {
target: self.target.clone(),
start_time: self.start_time.clone(),
end_time: self.end_time.clone(),
s3: self.s3,
s3_bucket: self.s3_bucket.clone(),
s3_endpoint: self.s3_endpoint.clone(),
s3_access_key: self.s3_access_key.clone(),
s3_secret_key: self.s3_secret_key.clone(),
s3_region: self.s3_region.clone(),
}))
}
}
#[derive(Clone)]
pub struct Export {
catalog: String,
schema: Option<String>,
database_client: DatabaseClient,
output_dir: Option<String>,
output_dir: String,
parallelism: usize,
target: ExportTarget,
start_time: Option<String>,
end_time: Option<String>,
s3: bool,
s3_bucket: Option<String>,
s3_endpoint: Option<String>,
s3_access_key: Option<String>,
s3_secret_key: Option<String>,
s3_region: Option<String>,
}
impl Export {
fn catalog_path(&self) -> PathBuf {
if self.s3 {
PathBuf::from(&self.catalog)
} else if let Some(dir) = &self.output_dir {
PathBuf::from(dir).join(&self.catalog)
} else {
unreachable!("catalog_path: output_dir must be set when not using s3")
}
PathBuf::from(&self.output_dir).join(&self.catalog)
}
async fn get_db_names(&self) -> Result<Vec<String>> {
@@ -364,23 +300,19 @@ impl Export {
let timer = Instant::now();
let db_names = self.get_db_names().await?;
let db_count = db_names.len();
let operator = self.build_operator().await?;
for schema in db_names {
let db_dir = self.catalog_path().join(format!("{schema}/"));
tokio::fs::create_dir_all(&db_dir)
.await
.context(FileIoSnafu)?;
let file = db_dir.join("create_database.sql");
let mut file = File::create(file).await.context(FileIoSnafu)?;
let create_database = self
.show_create("DATABASE", &self.catalog, &schema, None)
.await?;
let file_path = self.get_file_path(&schema, "create_database.sql");
self.write_to_storage(&operator, &file_path, create_database.into_bytes())
.await?;
info!(
"Exported {}.{} database creation SQL to {}",
self.catalog,
schema,
self.format_output_path(&file_path)
);
file.write_all(create_database.as_bytes())
.await
.context(FileIoSnafu)?;
}
let elapsed = timer.elapsed();
@@ -394,267 +326,149 @@ impl Export {
let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.get_db_names().await?;
let db_count = db_names.len();
let operator = Arc::new(self.build_operator().await?);
let mut tasks = Vec::with_capacity(db_names.len());
for schema in db_names {
let semaphore_moved = semaphore.clone();
let export_self = self.clone();
let operator = operator.clone();
tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap();
let (metric_physical_tables, remaining_tables, views) = export_self
.get_table_list(&export_self.catalog, &schema)
.await?;
// Create directory if needed for file system storage
if !export_self.s3 {
let db_dir = format!("{}/{}/", export_self.catalog, schema);
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
let (metric_physical_tables, remaining_tables, views) =
self.get_table_list(&self.catalog, &schema).await?;
let table_count =
metric_physical_tables.len() + remaining_tables.len() + views.len();
let db_dir = self.catalog_path().join(format!("{schema}/"));
tokio::fs::create_dir_all(&db_dir)
.await
.context(FileIoSnafu)?;
let file = db_dir.join("create_tables.sql");
let mut file = File::create(file).await.context(FileIoSnafu)?;
for (c, s, t) in metric_physical_tables.into_iter().chain(remaining_tables) {
let create_table = self.show_create("TABLE", &c, &s, Some(&t)).await?;
file.write_all(create_table.as_bytes())
.await
.context(FileIoSnafu)?;
}
let file_path = export_self.get_file_path(&schema, "create_tables.sql");
let mut content = Vec::new();
// Add table creation SQL
for (c, s, t) in metric_physical_tables.iter().chain(&remaining_tables) {
let create_table = export_self.show_create("TABLE", c, s, Some(t)).await?;
content.extend_from_slice(create_table.as_bytes());
for (c, s, v) in views {
let create_view = self.show_create("VIEW", &c, &s, Some(&v)).await?;
file.write_all(create_view.as_bytes())
.await
.context(FileIoSnafu)?;
}
// Add view creation SQL
for (c, s, v) in &views {
let create_view = export_self.show_create("VIEW", c, s, Some(v)).await?;
content.extend_from_slice(create_view.as_bytes());
}
// Write to storage
export_self
.write_to_storage(&operator, &file_path, content)
.await?;
info!(
"Finished exporting {}.{schema} with {} table schemas to path: {}",
export_self.catalog,
metric_physical_tables.len() + remaining_tables.len() + views.len(),
export_self.format_output_path(&file_path)
"Finished exporting {}.{schema} with {table_count} table schemas to path: {}",
self.catalog,
db_dir.to_string_lossy()
);
Ok::<(), Error>(())
});
}
let success = self.execute_tasks(tasks).await;
let success = futures::future::join_all(tasks)
.await
.into_iter()
.filter(|r| match r {
Ok(_) => true,
Err(e) => {
error!(e; "export schema job failed");
false
}
})
.count();
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, cost: {elapsed:?}");
Ok(())
}
async fn build_operator(&self) -> Result<Operator> {
if self.s3 {
self.build_s3_operator().await
} else {
self.build_fs_operator().await
}
}
async fn build_s3_operator(&self) -> Result<Operator> {
let mut builder = services::S3::default().root("").bucket(
self.s3_bucket
.as_ref()
.expect("s3_bucket must be provided when s3 is enabled"),
);
if let Some(endpoint) = self.s3_endpoint.as_ref() {
builder = builder.endpoint(endpoint);
}
if let Some(region) = self.s3_region.as_ref() {
builder = builder.region(region);
}
if let Some(key_id) = self.s3_access_key.as_ref() {
builder = builder.access_key_id(key_id);
}
if let Some(secret_key) = self.s3_secret_key.as_ref() {
builder = builder.secret_access_key(secret_key);
}
let op = Operator::new(builder)
.context(OpenDalSnafu)?
.layer(LoggingLayer::default())
.finish();
Ok(op)
}
async fn build_fs_operator(&self) -> Result<Operator> {
let root = self
.output_dir
.as_ref()
.context(OutputDirNotSetSnafu)?
.clone();
let op = Operator::new(services::Fs::default().root(&root))
.context(OpenDalSnafu)?
.layer(LoggingLayer::default())
.finish();
Ok(op)
}
async fn export_database_data(&self) -> Result<()> {
let timer = Instant::now();
let semaphore = Arc::new(Semaphore::new(self.parallelism));
let db_names = self.get_db_names().await?;
let db_count = db_names.len();
let mut tasks = Vec::with_capacity(db_count);
let operator = Arc::new(self.build_operator().await?);
let with_options = build_with_options(&self.start_time, &self.end_time);
for schema in db_names {
let semaphore_moved = semaphore.clone();
let export_self = self.clone();
let with_options_clone = with_options.clone();
let operator = operator.clone();
tasks.push(async move {
let _permit = semaphore_moved.acquire().await.unwrap();
let db_dir = self.catalog_path().join(format!("{schema}/"));
tokio::fs::create_dir_all(&db_dir)
.await
.context(FileIoSnafu)?;
// Create directory if not using S3
if !export_self.s3 {
let db_dir = format!("{}/{}/", export_self.catalog, schema);
operator.create_dir(&db_dir).await.context(OpenDalSnafu)?;
}
let with_options = match (&self.start_time, &self.end_time) {
(Some(start_time), Some(end_time)) => {
format!(
"WITH (FORMAT='parquet', start_time='{}', end_time='{}')",
start_time, end_time
)
}
(Some(start_time), None) => {
format!("WITH (FORMAT='parquet', start_time='{}')", start_time)
}
(None, Some(end_time)) => {
format!("WITH (FORMAT='parquet', end_time='{}')", end_time)
}
(None, None) => "WITH (FORMAT='parquet')".to_string(),
};
let (path, connection_part) = export_self.get_storage_params(&schema);
// Execute COPY DATABASE TO command
let sql = format!(
r#"COPY DATABASE "{}"."{}" TO '{}' WITH ({}){};"#,
export_self.catalog, schema, path, with_options_clone, connection_part
);
info!("Executing sql: {sql}");
export_self.database_client.sql_in_public(&sql).await?;
info!(
"Finished exporting {}.{} data to {}",
export_self.catalog, schema, path
);
// Create copy_from.sql file
let copy_database_from_sql = format!(
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH ({}){};"#,
export_self.catalog, schema, path, with_options_clone, connection_part
);
let copy_from_path = export_self.get_file_path(&schema, "copy_from.sql");
export_self
.write_to_storage(
&operator,
&copy_from_path,
copy_database_from_sql.into_bytes(),
)
.await?;
info!(
"Finished exporting {}.{} copy_from.sql to {}",
export_self.catalog,
r#"COPY DATABASE "{}"."{}" TO '{}' {};"#,
self.catalog,
schema,
export_self.format_output_path(&copy_from_path)
db_dir.to_str().unwrap(),
with_options
);
info!("Executing sql: {sql}");
self.database_client.sql_in_public(&sql).await?;
info!(
"Finished exporting {}.{schema} data into path: {}",
self.catalog,
db_dir.to_string_lossy()
);
// The export copy from sql
let copy_from_file = db_dir.join("copy_from.sql");
let mut writer =
BufWriter::new(File::create(copy_from_file).await.context(FileIoSnafu)?);
let copy_database_from_sql = format!(
r#"COPY DATABASE "{}"."{}" FROM '{}' WITH (FORMAT='parquet');"#,
self.catalog,
schema,
db_dir.to_str().unwrap()
);
writer
.write(copy_database_from_sql.as_bytes())
.await
.context(FileIoSnafu)?;
writer.flush().await.context(FileIoSnafu)?;
info!("Finished exporting {}.{schema} copy_from.sql", self.catalog);
Ok::<(), Error>(())
});
})
}
let success = self.execute_tasks(tasks).await;
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
Ok(())
}
fn get_file_path(&self, schema: &str, file_name: &str) -> String {
format!("{}/{}/{}", self.catalog, schema, file_name)
}
fn format_output_path(&self, file_path: &str) -> String {
if self.s3 {
format!(
"s3://{}/{}",
self.s3_bucket.as_ref().unwrap_or(&String::new()),
file_path
)
} else {
format!(
"{}/{}",
self.output_dir.as_ref().unwrap_or(&String::new()),
file_path
)
}
}
async fn write_to_storage(
&self,
op: &Operator,
file_path: &str,
content: Vec<u8>,
) -> Result<()> {
op.write(file_path, content).await.context(OpenDalSnafu)
}
fn get_storage_params(&self, schema: &str) -> (String, String) {
if self.s3 {
let s3_path = format!(
"s3://{}/{}/{}/",
// Safety: s3_bucket is required when s3 is enabled
self.s3_bucket.as_ref().unwrap(),
self.catalog,
schema
);
// endpoint is optional
let endpoint_option = if let Some(endpoint) = self.s3_endpoint.as_ref() {
format!(", ENDPOINT='{}'", endpoint)
} else {
String::new()
};
// Safety: All s3 options are required
let connection_options = format!(
"ACCESS_KEY_ID='{}', SECRET_ACCESS_KEY='{}', REGION='{}'{}",
self.s3_access_key.as_ref().unwrap(),
self.s3_secret_key.as_ref().unwrap(),
self.s3_region.as_ref().unwrap(),
endpoint_option
);
(s3_path, format!(" CONNECTION ({})", connection_options))
} else {
(
self.catalog_path()
.join(format!("{schema}/"))
.to_string_lossy()
.to_string(),
String::new(),
)
}
}
async fn execute_tasks(
&self,
tasks: Vec<impl std::future::Future<Output = Result<()>>>,
) -> usize {
futures::future::join_all(tasks)
let success = futures::future::join_all(tasks)
.await
.into_iter()
.filter(|r| match r {
Ok(_) => true,
Err(e) => {
error!(e; "export job failed");
error!(e; "export database job failed");
false
}
})
.count()
.count();
let elapsed = timer.elapsed();
info!("Success {success}/{db_count} jobs, costs: {elapsed:?}");
Ok(())
}
}
@@ -679,15 +493,3 @@ impl Tool for Export {
}
}
}
/// Builds the WITH options string for SQL commands, assuming consistent syntax across S3 and local exports.
fn build_with_options(start_time: &Option<String>, end_time: &Option<String>) -> String {
let mut options = vec!["format = 'parquet'".to_string()];
if let Some(start) = start_time {
options.push(format!("start_time = '{}'", start));
}
if let Some(end) = end_time {
options.push(format!("end_time = '{}'", end));
}
options.join(", ")
}

View File

@@ -126,14 +126,10 @@ impl SubCommand {
struct StartCommand {
#[clap(long)]
node_id: Option<u64>,
/// The address to bind the gRPC server.
#[clap(long, alias = "rpc-addr")]
rpc_bind_addr: Option<String>,
/// The address advertised to the metasrv, and used for connections from outside the host.
/// If left empty or unset, the server will automatically use the IP address of the first network interface
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
#[clap(long, alias = "rpc-hostname")]
rpc_server_addr: Option<String>,
#[clap(long)]
rpc_addr: Option<String>,
#[clap(long)]
rpc_hostname: Option<String>,
#[clap(long, value_delimiter = ',', num_args = 1..)]
metasrv_addrs: Option<Vec<String>>,
#[clap(short, long)]
@@ -185,18 +181,18 @@ impl StartCommand {
tokio_console_addr: global_options.tokio_console_addr.clone(),
};
if let Some(addr) = &self.rpc_bind_addr {
opts.grpc.bind_addr.clone_from(addr);
if let Some(addr) = &self.rpc_addr {
opts.grpc.addr.clone_from(addr);
} else if let Some(addr) = &opts.rpc_addr {
warn!("Use the deprecated attribute `DatanodeOptions.rpc_addr`, please use `grpc.addr` instead.");
opts.grpc.bind_addr.clone_from(addr);
opts.grpc.addr.clone_from(addr);
}
if let Some(server_addr) = &self.rpc_server_addr {
opts.grpc.server_addr.clone_from(server_addr);
} else if let Some(server_addr) = &opts.rpc_hostname {
if let Some(hostname) = &self.rpc_hostname {
opts.grpc.hostname.clone_from(hostname);
} else if let Some(hostname) = &opts.rpc_hostname {
warn!("Use the deprecated attribute `DatanodeOptions.rpc_hostname`, please use `grpc.hostname` instead.");
opts.grpc.server_addr.clone_from(server_addr);
opts.grpc.hostname.clone_from(hostname);
}
if let Some(runtime_size) = opts.rpc_runtime_size {
@@ -281,12 +277,13 @@ impl StartCommand {
let plugin_opts = opts.plugins;
let mut opts = opts.component;
opts.grpc.detect_server_addr();
opts.grpc.detect_hostname();
let mut plugins = Plugins::new();
plugins::setup_datanode_plugins(&mut plugins, &plugin_opts, &opts)
.await
.context(StartDatanodeSnafu)?;
let cluster_id = 0; // TODO(hl): read from config
let member_id = opts
.node_id
.context(MissingConfigSnafu { msg: "'node_id'" })?;
@@ -295,10 +292,13 @@ impl StartCommand {
msg: "'meta_client_options'",
})?;
let meta_client =
meta_client::create_meta_client(MetaClientType::Datanode { member_id }, meta_config)
.await
.context(MetaClientInitSnafu)?;
let meta_client = meta_client::create_meta_client(
cluster_id,
MetaClientType::Datanode { member_id },
meta_config,
)
.await
.context(MetaClientInitSnafu)?;
let meta_backend = Arc::new(MetaKvBackend {
client: meta_client.clone(),
@@ -357,8 +357,8 @@ mod tests {
rpc_addr = "127.0.0.1:4001"
rpc_hostname = "192.168.0.1"
[grpc]
bind_addr = "127.0.0.1:3001"
server_addr = "127.0.0.1"
addr = "127.0.0.1:3001"
hostname = "127.0.0.1"
runtime_size = 8
"#;
write!(file, "{}", toml_str).unwrap();
@@ -369,8 +369,8 @@ mod tests {
};
let options = cmd.load_options(&Default::default()).unwrap().component;
assert_eq!("127.0.0.1:4001".to_string(), options.grpc.bind_addr);
assert_eq!("192.168.0.1".to_string(), options.grpc.server_addr);
assert_eq!("127.0.0.1:4001".to_string(), options.grpc.addr);
assert_eq!("192.168.0.1".to_string(), options.grpc.hostname);
}
#[test]
@@ -406,7 +406,7 @@ mod tests {
sync_write = false
[storage]
data_home = "./greptimedb_data/"
data_home = "/tmp/greptimedb/"
type = "File"
[[storage.providers]]
@@ -420,7 +420,7 @@ mod tests {
[logging]
level = "debug"
dir = "./greptimedb_data/test/logs"
dir = "/tmp/greptimedb/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();
@@ -431,7 +431,7 @@ mod tests {
let options = cmd.load_options(&Default::default()).unwrap().component;
assert_eq!("127.0.0.1:3001".to_string(), options.grpc.bind_addr);
assert_eq!("127.0.0.1:3001".to_string(), options.grpc.addr);
assert_eq!(Some(42), options.node_id);
let DatanodeWalConfig::RaftEngine(raft_engine_config) = options.wal else {
@@ -467,7 +467,7 @@ mod tests {
assert_eq!(10000, ddl_timeout.as_millis());
assert_eq!(3000, timeout.as_millis());
assert!(tcp_nodelay);
assert_eq!("./greptimedb_data/", options.storage.data_home);
assert_eq!("/tmp/greptimedb/", options.storage.data_home);
assert!(matches!(
&options.storage.store,
ObjectStoreConfig::File(FileConfig { .. })
@@ -483,10 +483,7 @@ mod tests {
));
assert_eq!("debug", options.logging.level.unwrap());
assert_eq!(
"./greptimedb_data/test/logs".to_string(),
options.logging.dir
);
assert_eq!("/tmp/greptimedb/test/logs".to_string(), options.logging.dir);
}
#[test]
@@ -529,7 +526,7 @@ mod tests {
let options = cmd
.load_options(&GlobalOptions {
log_dir: Some("./greptimedb_data/test/logs".to_string()),
log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
log_level: Some("debug".to_string()),
#[cfg(feature = "tokio-console")]
@@ -539,7 +536,7 @@ mod tests {
.component;
let logging_opt = options.logging;
assert_eq!("./greptimedb_data/test/logs", logging_opt.dir);
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
assert_eq!("debug", logging_opt.level.as_ref().unwrap());
}
@@ -568,11 +565,11 @@ mod tests {
[storage]
type = "File"
data_home = "./greptimedb_data/"
data_home = "/tmp/greptimedb/"
[logging]
level = "debug"
dir = "./greptimedb_data/test/logs"
dir = "/tmp/greptimedb/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();
@@ -648,7 +645,7 @@ mod tests {
opts.http.addr,
DatanodeOptions::default().component.http.addr
);
assert_eq!(opts.grpc.server_addr, "10.103.174.219");
assert_eq!(opts.grpc.hostname, "10.103.174.219");
},
);
}

View File

@@ -129,13 +129,11 @@ struct StartCommand {
#[clap(long)]
node_id: Option<u64>,
/// Bind address for the gRPC server.
#[clap(long, alias = "rpc-addr")]
rpc_bind_addr: Option<String>,
/// The address advertised to the metasrv, and used for connections from outside the host.
/// If left empty or unset, the server will automatically use the IP address of the first network interface
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
#[clap(long, alias = "rpc-hostname")]
rpc_server_addr: Option<String>,
#[clap(long)]
rpc_addr: Option<String>,
/// Hostname for the gRPC server.
#[clap(long)]
rpc_hostname: Option<String>,
/// Metasrv address list;
#[clap(long, value_delimiter = ',', num_args = 1..)]
metasrv_addrs: Option<Vec<String>>,
@@ -186,12 +184,12 @@ impl StartCommand {
tokio_console_addr: global_options.tokio_console_addr.clone(),
};
if let Some(addr) = &self.rpc_bind_addr {
opts.grpc.bind_addr.clone_from(addr);
if let Some(addr) = &self.rpc_addr {
opts.grpc.addr.clone_from(addr);
}
if let Some(server_addr) = &self.rpc_server_addr {
opts.grpc.server_addr.clone_from(server_addr);
if let Some(hostname) = &self.rpc_hostname {
opts.grpc.hostname.clone_from(hostname);
}
if let Some(node_id) = self.node_id {
@@ -239,7 +237,10 @@ impl StartCommand {
info!("Flownode options: {:#?}", opts);
let mut opts = opts.component;
opts.grpc.detect_server_addr();
opts.grpc.detect_hostname();
// TODO(discord9): make it not optionale after cluster id is required
let cluster_id = opts.cluster_id.unwrap_or(0);
let member_id = opts
.node_id
@@ -249,10 +250,13 @@ impl StartCommand {
msg: "'meta_client_options'",
})?;
let meta_client =
meta_client::create_meta_client(MetaClientType::Flownode { member_id }, meta_config)
.await
.context(MetaClientInitSnafu)?;
let meta_client = meta_client::create_meta_client(
cluster_id,
MetaClientType::Flownode { member_id },
meta_config,
)
.await
.context(MetaClientInitSnafu)?;
let cache_max_capacity = meta_config.metadata_cache_max_capacity;
let cache_ttl = meta_config.metadata_cache_ttl;

View File

@@ -136,19 +136,13 @@ impl SubCommand {
#[derive(Debug, Default, Parser)]
pub struct StartCommand {
/// The address to bind the gRPC server.
#[clap(long, alias = "rpc-addr")]
rpc_bind_addr: Option<String>,
/// The address advertised to the metasrv, and used for connections from outside the host.
/// If left empty or unset, the server will automatically use the IP address of the first network interface
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
#[clap(long, alias = "rpc-hostname")]
rpc_server_addr: Option<String>,
#[clap(long)]
http_addr: Option<String>,
#[clap(long)]
http_timeout: Option<u64>,
#[clap(long)]
rpc_addr: Option<String>,
#[clap(long)]
mysql_addr: Option<String>,
#[clap(long)]
postgres_addr: Option<String>,
@@ -224,15 +218,11 @@ impl StartCommand {
opts.http.disable_dashboard = disable_dashboard;
}
if let Some(addr) = &self.rpc_bind_addr {
opts.grpc.bind_addr.clone_from(addr);
if let Some(addr) = &self.rpc_addr {
opts.grpc.addr.clone_from(addr);
opts.grpc.tls = tls_opts.clone();
}
if let Some(addr) = &self.rpc_server_addr {
opts.grpc.server_addr.clone_from(addr);
}
if let Some(addr) = &self.mysql_addr {
opts.mysql.enable = true;
opts.mysql.addr.clone_from(addr);
@@ -279,7 +269,7 @@ impl StartCommand {
let plugin_opts = opts.plugins;
let mut opts = opts.component;
opts.grpc.detect_server_addr();
opts.grpc.detect_hostname();
let mut plugins = Plugins::new();
plugins::setup_frontend_plugins(&mut plugins, &plugin_opts, &opts)
.await
@@ -295,10 +285,14 @@ impl StartCommand {
let cache_ttl = meta_client_options.metadata_cache_ttl;
let cache_tti = meta_client_options.metadata_cache_tti;
let meta_client =
meta_client::create_meta_client(MetaClientType::Frontend, meta_client_options)
.await
.context(MetaClientInitSnafu)?;
let cluster_id = 0; // (TODO: jeremy): It is currently a reserved field and has not been enabled.
let meta_client = meta_client::create_meta_client(
cluster_id,
MetaClientType::Frontend,
meta_client_options,
)
.await
.context(MetaClientInitSnafu)?;
// TODO(discord9): add helper function to ease the creation of cache registry&such
let cached_meta_backend =
@@ -419,7 +413,7 @@ mod tests {
let default_opts = FrontendOptions::default().component;
assert_eq!(opts.grpc.bind_addr, default_opts.grpc.bind_addr);
assert_eq!(opts.grpc.addr, default_opts.grpc.addr);
assert!(opts.mysql.enable);
assert_eq!(opts.mysql.runtime_size, default_opts.mysql.runtime_size);
assert!(opts.postgres.enable);
@@ -448,7 +442,7 @@ mod tests {
[logging]
level = "debug"
dir = "./greptimedb_data/test/logs"
dir = "/tmp/greptimedb/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();
@@ -466,10 +460,7 @@ mod tests {
assert_eq!(ReadableSize::gb(2), fe_opts.http.body_limit);
assert_eq!("debug", fe_opts.logging.level.as_ref().unwrap());
assert_eq!(
"./greptimedb_data/test/logs".to_string(),
fe_opts.logging.dir
);
assert_eq!("/tmp/greptimedb/test/logs".to_string(), fe_opts.logging.dir);
assert!(!fe_opts.opentsdb.enable);
}
@@ -508,7 +499,7 @@ mod tests {
let options = cmd
.load_options(&GlobalOptions {
log_dir: Some("./greptimedb_data/test/logs".to_string()),
log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
log_level: Some("debug".to_string()),
#[cfg(feature = "tokio-console")]
@@ -518,7 +509,7 @@ mod tests {
.component;
let logging_opt = options.logging;
assert_eq!("./greptimedb_data/test/logs", logging_opt.dir);
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
assert_eq!("debug", logging_opt.level.as_ref().unwrap());
}
@@ -613,7 +604,7 @@ mod tests {
assert_eq!(fe_opts.http.addr, "127.0.0.1:14000");
// Should be default value.
assert_eq!(fe_opts.grpc.bind_addr, GrpcOptions::default().bind_addr);
assert_eq!(fe_opts.grpc.addr, GrpcOptions::default().addr);
},
);
}

View File

@@ -42,7 +42,7 @@ pub struct Instance {
}
impl Instance {
pub fn new(instance: MetasrvInstance, guard: Vec<WorkerGuard>) -> Self {
fn new(instance: MetasrvInstance, guard: Vec<WorkerGuard>) -> Self {
Self {
instance,
_guard: guard,
@@ -133,15 +133,11 @@ impl SubCommand {
#[derive(Debug, Default, Parser)]
struct StartCommand {
/// The address to bind the gRPC server.
#[clap(long, alias = "bind-addr")]
rpc_bind_addr: Option<String>,
/// The communication server address for the frontend and datanode to connect to metasrv.
/// If left empty or unset, the server will automatically use the IP address of the first network interface
/// on the host, with the same port number as the one specified in `rpc_bind_addr`.
#[clap(long, alias = "server-addr")]
rpc_server_addr: Option<String>,
#[clap(long, alias = "store-addr", value_delimiter = ',', num_args = 1..)]
#[clap(long)]
bind_addr: Option<String>,
#[clap(long)]
server_addr: Option<String>,
#[clap(long, aliases = ["store-addr"], value_delimiter = ',', num_args = 1..)]
store_addrs: Option<Vec<String>>,
#[clap(short, long)]
config_file: Option<String>,
@@ -205,11 +201,11 @@ impl StartCommand {
tokio_console_addr: global_options.tokio_console_addr.clone(),
};
if let Some(addr) = &self.rpc_bind_addr {
if let Some(addr) = &self.bind_addr {
opts.bind_addr.clone_from(addr);
}
if let Some(addr) = &self.rpc_server_addr {
if let Some(addr) = &self.server_addr {
opts.server_addr.clone_from(addr);
}
@@ -273,13 +269,11 @@ impl StartCommand {
log_versions(version(), short_version(), APP_NAME);
info!("Metasrv start command: {:#?}", self);
info!("Metasrv options: {:#?}", opts);
let plugin_opts = opts.plugins;
let mut opts = opts.component;
opts.detect_server_addr();
info!("Metasrv options: {:#?}", opts);
let mut plugins = Plugins::new();
plugins::setup_metasrv_plugins(&mut plugins, &plugin_opts, &opts)
.await
@@ -312,8 +306,8 @@ mod tests {
#[test]
fn test_read_from_cmd() {
let cmd = StartCommand {
rpc_bind_addr: Some("127.0.0.1:3002".to_string()),
rpc_server_addr: Some("127.0.0.1:3002".to_string()),
bind_addr: Some("127.0.0.1:3002".to_string()),
server_addr: Some("127.0.0.1:3002".to_string()),
store_addrs: Some(vec!["127.0.0.1:2380".to_string()]),
selector: Some("LoadBased".to_string()),
..Default::default()
@@ -337,7 +331,7 @@ mod tests {
[logging]
level = "debug"
dir = "./greptimedb_data/test/logs"
dir = "/tmp/greptimedb/test/logs"
[failure_detector]
threshold = 8.0
@@ -358,10 +352,7 @@ mod tests {
assert_eq!(vec!["127.0.0.1:2379".to_string()], options.store_addrs);
assert_eq!(SelectorType::LeaseBased, options.selector);
assert_eq!("debug", options.logging.level.as_ref().unwrap());
assert_eq!(
"./greptimedb_data/test/logs".to_string(),
options.logging.dir
);
assert_eq!("/tmp/greptimedb/test/logs".to_string(), options.logging.dir);
assert_eq!(8.0, options.failure_detector.threshold);
assert_eq!(
100.0,
@@ -390,8 +381,8 @@ mod tests {
#[test]
fn test_load_log_options_from_cli() {
let cmd = StartCommand {
rpc_bind_addr: Some("127.0.0.1:3002".to_string()),
rpc_server_addr: Some("127.0.0.1:3002".to_string()),
bind_addr: Some("127.0.0.1:3002".to_string()),
server_addr: Some("127.0.0.1:3002".to_string()),
store_addrs: Some(vec!["127.0.0.1:2380".to_string()]),
selector: Some("LoadBased".to_string()),
..Default::default()
@@ -399,7 +390,7 @@ mod tests {
let options = cmd
.load_options(&GlobalOptions {
log_dir: Some("./greptimedb_data/test/logs".to_string()),
log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
log_level: Some("debug".to_string()),
#[cfg(feature = "tokio-console")]
@@ -409,7 +400,7 @@ mod tests {
.component;
let logging_opt = options.logging;
assert_eq!("./greptimedb_data/test/logs", logging_opt.dir);
assert_eq!("/tmp/greptimedb/test/logs", logging_opt.dir);
assert_eq!("debug", logging_opt.level.as_ref().unwrap());
}
@@ -427,7 +418,7 @@ mod tests {
[logging]
level = "debug"
dir = "./greptimedb_data/test/logs"
dir = "/tmp/greptimedb/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();

View File

@@ -60,8 +60,7 @@ use frontend::instance::builder::FrontendBuilder;
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
use frontend::server::Services;
use frontend::service_config::{
InfluxdbOptions, JaegerOptions, MysqlOptions, OpentsdbOptions, PostgresOptions,
PromStoreOptions,
InfluxdbOptions, MysqlOptions, OpentsdbOptions, PostgresOptions, PromStoreOptions,
};
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
use mito2::config::MitoConfig;
@@ -141,7 +140,6 @@ pub struct StandaloneOptions {
pub postgres: PostgresOptions,
pub opentsdb: OpentsdbOptions,
pub influxdb: InfluxdbOptions,
pub jaeger: JaegerOptions,
pub prom_store: PromStoreOptions,
pub wal: DatanodeWalConfig,
pub storage: StorageConfig,
@@ -171,7 +169,6 @@ impl Default for StandaloneOptions {
postgres: PostgresOptions::default(),
opentsdb: OpentsdbOptions::default(),
influxdb: InfluxdbOptions::default(),
jaeger: JaegerOptions::default(),
prom_store: PromStoreOptions::default(),
wal: DatanodeWalConfig::default(),
storage: StorageConfig::default(),
@@ -220,7 +217,6 @@ impl StandaloneOptions {
postgres: cloned_opts.postgres,
opentsdb: cloned_opts.opentsdb,
influxdb: cloned_opts.influxdb,
jaeger: cloned_opts.jaeger,
prom_store: cloned_opts.prom_store,
meta_client: None,
logging: cloned_opts.logging,
@@ -333,8 +329,8 @@ impl App for Instance {
pub struct StartCommand {
#[clap(long)]
http_addr: Option<String>,
#[clap(long, alias = "rpc-addr")]
rpc_bind_addr: Option<String>,
#[clap(long)]
rpc_addr: Option<String>,
#[clap(long)]
mysql_addr: Option<String>,
#[clap(long)]
@@ -411,9 +407,9 @@ impl StartCommand {
opts.storage.data_home.clone_from(data_home);
}
if let Some(addr) = &self.rpc_bind_addr {
if let Some(addr) = &self.rpc_addr {
// frontend grpc addr conflict with datanode default grpc addr
let datanode_grpc_addr = DatanodeOptions::default().grpc.bind_addr;
let datanode_grpc_addr = DatanodeOptions::default().grpc.addr;
if addr.eq(&datanode_grpc_addr) {
return IllegalConfigSnafu {
msg: format!(
@@ -421,7 +417,7 @@ impl StartCommand {
),
}.fail();
}
opts.grpc.bind_addr.clone_from(addr)
opts.grpc.addr.clone_from(addr)
}
if let Some(addr) = &self.mysql_addr {
@@ -468,7 +464,7 @@ impl StartCommand {
let mut plugins = Plugins::new();
let plugin_opts = opts.plugins;
let mut opts = opts.component;
opts.grpc.detect_server_addr();
opts.grpc.detect_hostname();
let fe_opts = opts.frontend_options();
let dn_opts = opts.datanode_options();
@@ -490,8 +486,8 @@ impl StartCommand {
let metadata_dir = metadata_store_dir(data_home);
let (kv_backend, procedure_manager) = FeInstance::try_build_standalone_components(
metadata_dir,
opts.metadata_store,
opts.procedure,
opts.metadata_store.clone(),
opts.procedure.clone(),
)
.await
.context(StartFrontendSnafu)?;
@@ -852,7 +848,7 @@ mod tests {
[wal]
provider = "raft_engine"
dir = "./greptimedb_data/test/wal"
dir = "/tmp/greptimedb/test/wal"
file_size = "1GB"
purge_threshold = "50GB"
purge_interval = "10m"
@@ -860,7 +856,7 @@ mod tests {
sync_write = false
[storage]
data_home = "./greptimedb_data/"
data_home = "/tmp/greptimedb/"
type = "File"
[[storage.providers]]
@@ -892,7 +888,7 @@ mod tests {
[logging]
level = "debug"
dir = "./greptimedb_data/test/logs"
dir = "/tmp/greptimedb/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();
let cmd = StartCommand {
@@ -911,7 +907,7 @@ mod tests {
assert_eq!("127.0.0.1:4000".to_string(), fe_opts.http.addr);
assert_eq!(Duration::from_secs(33), fe_opts.http.timeout);
assert_eq!(ReadableSize::mb(128), fe_opts.http.body_limit);
assert_eq!("127.0.0.1:4001".to_string(), fe_opts.grpc.bind_addr);
assert_eq!("127.0.0.1:4001".to_string(), fe_opts.grpc.addr);
assert!(fe_opts.mysql.enable);
assert_eq!("127.0.0.1:4002", fe_opts.mysql.addr);
assert_eq!(2, fe_opts.mysql.runtime_size);
@@ -922,10 +918,7 @@ mod tests {
let DatanodeWalConfig::RaftEngine(raft_engine_config) = dn_opts.wal else {
unreachable!()
};
assert_eq!(
"./greptimedb_data/test/wal",
raft_engine_config.dir.unwrap()
);
assert_eq!("/tmp/greptimedb/test/wal", raft_engine_config.dir.unwrap());
assert!(matches!(
&dn_opts.storage.store,
@@ -949,7 +942,7 @@ mod tests {
}
assert_eq!("debug", logging_opts.level.as_ref().unwrap());
assert_eq!("./greptimedb_data/test/logs".to_string(), logging_opts.dir);
assert_eq!("/tmp/greptimedb/test/logs".to_string(), logging_opts.dir);
}
#[test]
@@ -961,7 +954,7 @@ mod tests {
let opts = cmd
.load_options(&GlobalOptions {
log_dir: Some("./greptimedb_data/test/logs".to_string()),
log_dir: Some("/tmp/greptimedb/test/logs".to_string()),
log_level: Some("debug".to_string()),
#[cfg(feature = "tokio-console")]
@@ -970,7 +963,7 @@ mod tests {
.unwrap()
.component;
assert_eq!("./greptimedb_data/test/logs", opts.logging.dir);
assert_eq!("/tmp/greptimedb/test/logs", opts.logging.dir);
assert_eq!("debug", opts.logging.level.unwrap());
}
@@ -1044,7 +1037,7 @@ mod tests {
assert_eq!(ReadableSize::mb(64), fe_opts.http.body_limit);
// Should be default value.
assert_eq!(fe_opts.grpc.bind_addr, GrpcOptions::default().bind_addr);
assert_eq!(fe_opts.grpc.addr, GrpcOptions::default().addr);
},
);
}

View File

@@ -63,7 +63,7 @@ mod tests {
.args([
"datanode",
"start",
"--rpc-bind-addr=0.0.0.0:4321",
"--rpc-addr=0.0.0.0:4321",
"--node-id=1",
&format!("--data-home={}", data_home.path().display()),
&format!("--wal-dir={}", wal_dir.path().display()),
@@ -80,7 +80,7 @@ mod tests {
"--log-level=off",
"cli",
"attach",
"--grpc-bind-addr=0.0.0.0:4321",
"--grpc-addr=0.0.0.0:4321",
// history commands can sneaky into stdout and mess up our tests, so disable it
"--disable-helper",
]);

View File

@@ -17,6 +17,9 @@ use std::time::Duration;
use cmd::options::GreptimeOptions;
use cmd::standalone::StandaloneOptions;
use common_config::Configurable;
use common_grpc::channel_manager::{
DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE, DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE,
};
use common_options::datanode::{ClientOptions, DatanodeClientOptions};
use common_telemetry::logging::{LoggingOptions, SlowQueryOptions, DEFAULT_OTLP_ENDPOINT};
use common_wal::config::raft_engine::RaftEngineConfig;
@@ -56,13 +59,13 @@ fn test_load_datanode_example_config() {
metadata_cache_tti: Duration::from_secs(300),
}),
wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
dir: Some("./greptimedb_data/wal".to_string()),
dir: Some("/tmp/greptimedb/wal".to_string()),
sync_period: Some(Duration::from_secs(10)),
recovery_parallelism: 2,
..Default::default()
}),
storage: StorageConfig {
data_home: "./greptimedb_data/".to_string(),
data_home: "/tmp/greptimedb/".to_string(),
..Default::default()
},
region_engine: vec![
@@ -88,8 +91,13 @@ fn test_load_datanode_example_config() {
..Default::default()
},
grpc: GrpcOptions::default()
.with_bind_addr("127.0.0.1:3001")
.with_server_addr("127.0.0.1:3001"),
.with_addr("127.0.0.1:3001")
.with_hostname("127.0.0.1:3001"),
rpc_addr: Some("127.0.0.1:3001".to_string()),
rpc_hostname: Some("127.0.0.1".to_string()),
rpc_runtime_size: Some(8),
rpc_max_recv_message_size: Some(DEFAULT_MAX_GRPC_RECV_MESSAGE_SIZE),
rpc_max_send_message_size: Some(DEFAULT_MAX_GRPC_SEND_MESSAGE_SIZE),
..Default::default()
},
..Default::default()
@@ -136,9 +144,7 @@ fn test_load_frontend_example_config() {
remote_write: Some(Default::default()),
..Default::default()
},
grpc: GrpcOptions::default()
.with_bind_addr("127.0.0.1:4001")
.with_server_addr("127.0.0.1:4001"),
grpc: GrpcOptions::default().with_hostname("127.0.0.1:4001"),
http: HttpOptions {
cors_allowed_origins: vec!["https://example.com".to_string()],
..Default::default()
@@ -159,10 +165,10 @@ fn test_load_metasrv_example_config() {
let expected = GreptimeOptions::<MetasrvOptions> {
component: MetasrvOptions {
selector: SelectorType::default(),
data_home: "./greptimedb_data/metasrv/".to_string(),
data_home: "/tmp/metasrv/".to_string(),
server_addr: "127.0.0.1:3002".to_string(),
logging: LoggingOptions {
dir: "./greptimedb_data/logs".to_string(),
dir: "/tmp/greptimedb/logs".to_string(),
level: Some("info".to_string()),
otlp_endpoint: Some(DEFAULT_OTLP_ENDPOINT.to_string()),
tracing_sample_ratio: Some(Default::default()),
@@ -202,7 +208,7 @@ fn test_load_standalone_example_config() {
component: StandaloneOptions {
default_timezone: Some("UTC".to_string()),
wal: DatanodeWalConfig::RaftEngine(RaftEngineConfig {
dir: Some("./greptimedb_data/wal".to_string()),
dir: Some("/tmp/greptimedb/wal".to_string()),
sync_period: Some(Duration::from_secs(10)),
recovery_parallelism: 2,
..Default::default()
@@ -219,7 +225,7 @@ fn test_load_standalone_example_config() {
}),
],
storage: StorageConfig {
data_home: "./greptimedb_data/".to_string(),
data_home: "/tmp/greptimedb/".to_string(),
..Default::default()
},
logging: LoggingOptions {

View File

@@ -18,7 +18,7 @@ bytes.workspace = true
common-error.workspace = true
common-macro.workspace = true
futures.workspace = true
paste.workspace = true
paste = "1.0"
pin-project.workspace = true
rand.workspace = true
serde = { version = "1.0", features = ["derive"] }

View File

@@ -130,10 +130,3 @@ pub const SEMANTIC_TYPE_TIME_INDEX: &str = "TIMESTAMP";
pub fn is_readonly_schema(schema: &str) -> bool {
matches!(schema, INFORMATION_SCHEMA_NAME)
}
// ---- special table and fields ----
pub const TRACE_ID_COLUMN: &str = "trace_id";
pub const SPAN_ID_COLUMN: &str = "span_id";
pub const SPAN_NAME_COLUMN: &str = "span_name";
pub const PARENT_SPAN_ID_COLUMN: &str = "parent_span_id";
// ---- End of special table and fields ----

View File

@@ -12,11 +12,9 @@ common-base.workspace = true
common-error.workspace = true
common-macro.workspace = true
config.workspace = true
humantime-serde.workspace = true
num_cpus.workspace = true
serde.workspace = true
serde_json.workspace = true
serde_with.workspace = true
snafu.workspace = true
sysinfo.workspace = true
toml.workspace = true

View File

@@ -161,7 +161,7 @@ mod tests {
[wal]
provider = "raft_engine"
dir = "./greptimedb_data/wal"
dir = "/tmp/greptimedb/wal"
file_size = "1GB"
purge_threshold = "50GB"
purge_interval = "10m"
@@ -170,7 +170,7 @@ mod tests {
[logging]
level = "debug"
dir = "./greptimedb_data/test/logs"
dir = "/tmp/greptimedb/test/logs"
"#;
write!(file, "{}", toml_str).unwrap();
@@ -246,7 +246,7 @@ mod tests {
let DatanodeWalConfig::RaftEngine(raft_engine_config) = opts.wal else {
unreachable!()
};
assert_eq!(raft_engine_config.dir.unwrap(), "./greptimedb_data/wal");
assert_eq!(raft_engine_config.dir.unwrap(), "/tmp/greptimedb/wal");
// Should be default values.
assert_eq!(opts.node_id, None);

View File

@@ -16,8 +16,6 @@ pub mod config;
pub mod error;
pub mod utils;
use std::time::Duration;
use common_base::readable_size::ReadableSize;
pub use config::*;
use serde::{Deserialize, Serialize};
@@ -36,27 +34,22 @@ pub enum Mode {
Distributed,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct KvBackendConfig {
/// The size of the metadata store backend log file.
// Kv file size in bytes
pub file_size: ReadableSize,
/// The threshold of the metadata store size to trigger a purge.
// Kv purge threshold in bytes
pub purge_threshold: ReadableSize,
/// The interval of the metadata store to trigger a purge.
#[serde(with = "humantime_serde")]
pub purge_interval: Duration,
}
impl Default for KvBackendConfig {
fn default() -> Self {
Self {
// The log file size 64MB
file_size: ReadableSize::mb(64),
// The log purge threshold 256MB
purge_threshold: ReadableSize::mb(256),
// The log purge interval 1m
purge_interval: Duration::from_secs(60),
// log file size 256MB
file_size: ReadableSize::mb(256),
// purge threshold 4GB
purge_threshold: ReadableSize::gb(4),
}
}
}

View File

@@ -35,7 +35,7 @@ orc-rust = { version = "0.5", default-features = false, features = [
"async",
] }
parquet.workspace = true
paste.workspace = true
paste = "1.0"
rand.workspace = true
regex = "1.7"
serde.workspace = true

View File

@@ -12,12 +12,9 @@ default = ["geo"]
geo = ["geohash", "h3o", "s2", "wkt", "geo-types", "dep:geo"]
[dependencies]
ahash = "0.8"
api.workspace = true
arc-swap = "1.0"
async-trait.workspace = true
bincode = "1.3"
chrono.workspace = true
common-base.workspace = true
common-catalog.workspace = true
common-error.workspace = true
@@ -29,21 +26,18 @@ common-telemetry.workspace = true
common-time.workspace = true
common-version.workspace = true
datafusion.workspace = true
datafusion-common.workspace = true
datafusion-expr.workspace = true
datatypes.workspace = true
derive_more = { version = "1", default-features = false, features = ["display"] }
geo = { version = "0.29", optional = true }
geo-types = { version = "0.7", optional = true }
geohash = { version = "0.13", optional = true }
h3o = { version = "0.6", optional = true }
hyperloglogplus = "0.4"
jsonb.workspace = true
nalgebra.workspace = true
num = "0.4"
num-traits = "0.2"
once_cell.workspace = true
paste.workspace = true
paste = "1.0"
s2 = { version = "0.0.12", optional = true }
serde.workspace = true
serde_json.workspace = true
@@ -53,7 +47,6 @@ sql.workspace = true
statrs = "0.16"
store-api.workspace = true
table.workspace = true
uddsketch = { git = "https://github.com/GreptimeTeam/timescaledb-toolkit.git", rev = "84828fe8fb494a6a61412a3da96517fc80f7bb20" }
wkt = { version = "0.11", optional = true }
[dev-dependencies]

View File

@@ -1,433 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use datafusion::arrow::array::{Array, ArrayRef};
use datafusion::common::cast::as_primitive_array;
use datafusion::error::{DataFusionError, Result as DfResult};
use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF, Volatility};
use datafusion::prelude::create_udaf;
use datafusion_common::cast::{as_list_array, as_struct_array};
use datafusion_common::utils::SingleRowListArrayBuilder;
use datafusion_common::ScalarValue;
use datatypes::arrow::array::{Float64Array, Int64Array, ListArray, StructArray};
use datatypes::arrow::datatypes::{
DataType, Field, Float64Type, Int64Type, TimeUnit, TimestampNanosecondType,
};
use datatypes::compute::{self, sort_to_indices};
pub const GEO_PATH_NAME: &str = "geo_path";
const LATITUDE_FIELD: &str = "lat";
const LONGITUDE_FIELD: &str = "lng";
const TIMESTAMP_FIELD: &str = "timestamp";
const DEFAULT_LIST_FIELD_NAME: &str = "item";
#[derive(Debug, Default)]
pub struct GeoPathAccumulator {
lat: Vec<Option<f64>>,
lng: Vec<Option<f64>>,
timestamp: Vec<Option<i64>>,
}
impl GeoPathAccumulator {
pub fn new() -> Self {
Self::default()
}
pub fn udf_impl() -> AggregateUDF {
create_udaf(
GEO_PATH_NAME,
// Input types: lat, lng, timestamp
vec![
DataType::Float64,
DataType::Float64,
DataType::Timestamp(TimeUnit::Nanosecond, None),
],
// Output type: list of points {[lat], [lng]}
Arc::new(DataType::Struct(
vec![
Field::new(
LATITUDE_FIELD,
DataType::List(Arc::new(Field::new(
DEFAULT_LIST_FIELD_NAME,
DataType::Float64,
true,
))),
false,
),
Field::new(
LONGITUDE_FIELD,
DataType::List(Arc::new(Field::new(
DEFAULT_LIST_FIELD_NAME,
DataType::Float64,
true,
))),
false,
),
]
.into(),
)),
Volatility::Immutable,
// Create the accumulator
Arc::new(|_| Ok(Box::new(GeoPathAccumulator::new()))),
// Intermediate state types
Arc::new(vec![DataType::Struct(
vec![
Field::new(
LATITUDE_FIELD,
DataType::List(Arc::new(Field::new(
DEFAULT_LIST_FIELD_NAME,
DataType::Float64,
true,
))),
false,
),
Field::new(
LONGITUDE_FIELD,
DataType::List(Arc::new(Field::new(
DEFAULT_LIST_FIELD_NAME,
DataType::Float64,
true,
))),
false,
),
Field::new(
TIMESTAMP_FIELD,
DataType::List(Arc::new(Field::new(
DEFAULT_LIST_FIELD_NAME,
DataType::Int64,
true,
))),
false,
),
]
.into(),
)]),
)
}
}
impl DfAccumulator for GeoPathAccumulator {
fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion::error::Result<()> {
if values.len() != 3 {
return Err(DataFusionError::Internal(format!(
"Expected 3 columns for geo_path, got {}",
values.len()
)));
}
let lat_array = as_primitive_array::<Float64Type>(&values[0])?;
let lng_array = as_primitive_array::<Float64Type>(&values[1])?;
let ts_array = as_primitive_array::<TimestampNanosecondType>(&values[2])?;
let size = lat_array.len();
self.lat.reserve(size);
self.lng.reserve(size);
for idx in 0..size {
self.lat.push(if lat_array.is_null(idx) {
None
} else {
Some(lat_array.value(idx))
});
self.lng.push(if lng_array.is_null(idx) {
None
} else {
Some(lng_array.value(idx))
});
self.timestamp.push(if ts_array.is_null(idx) {
None
} else {
Some(ts_array.value(idx))
});
}
Ok(())
}
fn evaluate(&mut self) -> DfResult<ScalarValue> {
let unordered_lng_array = Float64Array::from(self.lng.clone());
let unordered_lat_array = Float64Array::from(self.lat.clone());
let ts_array = Int64Array::from(self.timestamp.clone());
let ordered_indices = sort_to_indices(&ts_array, None, None)?;
let lat_array = compute::take(&unordered_lat_array, &ordered_indices, None)?;
let lng_array = compute::take(&unordered_lng_array, &ordered_indices, None)?;
let lat_list = Arc::new(SingleRowListArrayBuilder::new(lat_array).build_list_array());
let lng_list = Arc::new(SingleRowListArrayBuilder::new(lng_array).build_list_array());
let result = ScalarValue::Struct(Arc::new(StructArray::new(
vec![
Field::new(
LATITUDE_FIELD,
DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
false,
),
Field::new(
LONGITUDE_FIELD,
DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
false,
),
]
.into(),
vec![lat_list, lng_list],
None,
)));
Ok(result)
}
fn size(&self) -> usize {
// Base size of GeoPathAccumulator struct fields
let mut total_size = std::mem::size_of::<Self>();
// Size of vectors (approximation)
total_size += self.lat.capacity() * std::mem::size_of::<Option<f64>>();
total_size += self.lng.capacity() * std::mem::size_of::<Option<f64>>();
total_size += self.timestamp.capacity() * std::mem::size_of::<Option<i64>>();
total_size
}
fn state(&mut self) -> datafusion::error::Result<Vec<ScalarValue>> {
let lat_array = Arc::new(ListArray::from_iter_primitive::<Float64Type, _, _>(vec![
Some(self.lat.clone()),
]));
let lng_array = Arc::new(ListArray::from_iter_primitive::<Float64Type, _, _>(vec![
Some(self.lng.clone()),
]));
let ts_array = Arc::new(ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
Some(self.timestamp.clone()),
]));
let state_struct = StructArray::new(
vec![
Field::new(
LATITUDE_FIELD,
DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
false,
),
Field::new(
LONGITUDE_FIELD,
DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
false,
),
Field::new(
TIMESTAMP_FIELD,
DataType::List(Arc::new(Field::new("item", DataType::Int64, true))),
false,
),
]
.into(),
vec![lat_array, lng_array, ts_array],
None,
);
Ok(vec![ScalarValue::Struct(Arc::new(state_struct))])
}
fn merge_batch(&mut self, states: &[ArrayRef]) -> datafusion::error::Result<()> {
if states.len() != 1 {
return Err(DataFusionError::Internal(format!(
"Expected 1 states for geo_path, got {}",
states.len()
)));
}
for state in states {
let state = as_struct_array(state)?;
let lat_list = as_list_array(state.column(0))?.value(0);
let lat_array = as_primitive_array::<Float64Type>(&lat_list)?;
let lng_list = as_list_array(state.column(1))?.value(0);
let lng_array = as_primitive_array::<Float64Type>(&lng_list)?;
let ts_list = as_list_array(state.column(2))?.value(0);
let ts_array = as_primitive_array::<Int64Type>(&ts_list)?;
self.lat.extend(lat_array);
self.lng.extend(lng_array);
self.timestamp.extend(ts_array);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use datafusion::arrow::array::{Float64Array, TimestampNanosecondArray};
use datafusion::scalar::ScalarValue;
use super::*;
#[test]
fn test_geo_path_basic() {
let mut accumulator = GeoPathAccumulator::new();
// Create test data
let lat_array = Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0]));
let lng_array = Arc::new(Float64Array::from(vec![4.0, 5.0, 6.0]));
let ts_array = Arc::new(TimestampNanosecondArray::from(vec![100, 200, 300]));
// Update batch
accumulator
.update_batch(&[lat_array, lng_array, ts_array])
.unwrap();
// Evaluate
let result = accumulator.evaluate().unwrap();
if let ScalarValue::Struct(struct_array) = result {
// Verify structure
let fields = struct_array.fields().clone();
assert_eq!(fields.len(), 2);
assert_eq!(fields[0].name(), LATITUDE_FIELD);
assert_eq!(fields[1].name(), LONGITUDE_FIELD);
// Verify data
let columns = struct_array.columns();
assert_eq!(columns.len(), 2);
// Check latitude values
let lat_list = as_list_array(&columns[0]).unwrap().value(0);
let lat_array = as_primitive_array::<Float64Type>(&lat_list).unwrap();
assert_eq!(lat_array.len(), 3);
assert_eq!(lat_array.value(0), 1.0);
assert_eq!(lat_array.value(1), 2.0);
assert_eq!(lat_array.value(2), 3.0);
// Check longitude values
let lng_list = as_list_array(&columns[1]).unwrap().value(0);
let lng_array = as_primitive_array::<Float64Type>(&lng_list).unwrap();
assert_eq!(lng_array.len(), 3);
assert_eq!(lng_array.value(0), 4.0);
assert_eq!(lng_array.value(1), 5.0);
assert_eq!(lng_array.value(2), 6.0);
} else {
panic!("Expected Struct scalar value");
}
}
#[test]
fn test_geo_path_sort_by_timestamp() {
let mut accumulator = GeoPathAccumulator::new();
// Create test data with unordered timestamps
let lat_array = Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0]));
let lng_array = Arc::new(Float64Array::from(vec![4.0, 5.0, 6.0]));
let ts_array = Arc::new(TimestampNanosecondArray::from(vec![300, 100, 200]));
// Update batch
accumulator
.update_batch(&[lat_array, lng_array, ts_array])
.unwrap();
// Evaluate
let result = accumulator.evaluate().unwrap();
if let ScalarValue::Struct(struct_array) = result {
// Extract arrays
let columns = struct_array.columns();
// Check latitude values
let lat_list = as_list_array(&columns[0]).unwrap().value(0);
let lat_array = as_primitive_array::<Float64Type>(&lat_list).unwrap();
assert_eq!(lat_array.len(), 3);
assert_eq!(lat_array.value(0), 2.0); // timestamp 100
assert_eq!(lat_array.value(1), 3.0); // timestamp 200
assert_eq!(lat_array.value(2), 1.0); // timestamp 300
// Check longitude values (should be sorted by timestamp)
let lng_list = as_list_array(&columns[1]).unwrap().value(0);
let lng_array = as_primitive_array::<Float64Type>(&lng_list).unwrap();
assert_eq!(lng_array.len(), 3);
assert_eq!(lng_array.value(0), 5.0); // timestamp 100
assert_eq!(lng_array.value(1), 6.0); // timestamp 200
assert_eq!(lng_array.value(2), 4.0); // timestamp 300
} else {
panic!("Expected Struct scalar value");
}
}
#[test]
fn test_geo_path_merge() {
let mut accumulator1 = GeoPathAccumulator::new();
let mut accumulator2 = GeoPathAccumulator::new();
// Create test data for first accumulator
let lat_array1 = Arc::new(Float64Array::from(vec![1.0]));
let lng_array1 = Arc::new(Float64Array::from(vec![4.0]));
let ts_array1 = Arc::new(TimestampNanosecondArray::from(vec![100]));
// Create test data for second accumulator
let lat_array2 = Arc::new(Float64Array::from(vec![2.0]));
let lng_array2 = Arc::new(Float64Array::from(vec![5.0]));
let ts_array2 = Arc::new(TimestampNanosecondArray::from(vec![200]));
// Update batches
accumulator1
.update_batch(&[lat_array1, lng_array1, ts_array1])
.unwrap();
accumulator2
.update_batch(&[lat_array2, lng_array2, ts_array2])
.unwrap();
// Get states
let state1 = accumulator1.state().unwrap();
let state2 = accumulator2.state().unwrap();
// Create a merged accumulator
let mut merged = GeoPathAccumulator::new();
// Extract the struct arrays from the states
let state_array1 = match &state1[0] {
ScalarValue::Struct(array) => array.clone(),
_ => panic!("Expected Struct scalar value"),
};
let state_array2 = match &state2[0] {
ScalarValue::Struct(array) => array.clone(),
_ => panic!("Expected Struct scalar value"),
};
// Merge state arrays
merged.merge_batch(&[state_array1]).unwrap();
merged.merge_batch(&[state_array2]).unwrap();
// Evaluate merged result
let result = merged.evaluate().unwrap();
if let ScalarValue::Struct(struct_array) = result {
// Extract arrays
let columns = struct_array.columns();
// Check latitude values
let lat_list = as_list_array(&columns[0]).unwrap().value(0);
let lat_array = as_primitive_array::<Float64Type>(&lat_list).unwrap();
assert_eq!(lat_array.len(), 2);
assert_eq!(lat_array.value(0), 1.0); // timestamp 100
assert_eq!(lat_array.value(1), 2.0); // timestamp 200
// Check longitude values (should be sorted by timestamp)
let lng_list = as_list_array(&columns[1]).unwrap().value(0);
let lng_array = as_primitive_array::<Float64Type>(&lng_list).unwrap();
assert_eq!(lng_array.len(), 2);
assert_eq!(lng_array.value(0), 4.0); // timestamp 100
assert_eq!(lng_array.value(1), 5.0); // timestamp 200
} else {
panic!("Expected Struct scalar value");
}
}
}

View File

@@ -1,329 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Two UDAFs are implemented for HyperLogLog:
//!
//! - `hll`: Accepts a string column and aggregates the values into a
//! HyperLogLog state.
//! - `hll_merge`: Accepts a binary column of states generated by `hll`
//! and merges them into a single state.
//!
//! The states can be then used to estimate the cardinality of the
//! values in the column by `hll_count` UDF.
use std::sync::Arc;
use common_query::prelude::*;
use common_telemetry::trace;
use datafusion::arrow::array::ArrayRef;
use datafusion::common::cast::{as_binary_array, as_string_array};
use datafusion::common::not_impl_err;
use datafusion::error::{DataFusionError, Result as DfResult};
use datafusion::logical_expr::function::AccumulatorArgs;
use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF};
use datafusion::prelude::create_udaf;
use datatypes::arrow::datatypes::DataType;
use hyperloglogplus::{HyperLogLog, HyperLogLogPlus};
use crate::utils::FixedRandomState;
pub const HLL_NAME: &str = "hll";
pub const HLL_MERGE_NAME: &str = "hll_merge";
const DEFAULT_PRECISION: u8 = 14;
pub(crate) type HllStateType = HyperLogLogPlus<String, FixedRandomState>;
pub struct HllState {
hll: HllStateType,
}
impl std::fmt::Debug for HllState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "HllState<Opaque>")
}
}
impl Default for HllState {
fn default() -> Self {
Self::new()
}
}
impl HllState {
pub fn new() -> Self {
Self {
// Safety: the DEFAULT_PRECISION is fixed and valid
hll: HllStateType::new(DEFAULT_PRECISION, FixedRandomState::new()).unwrap(),
}
}
/// Create a UDF for the `hll` function.
///
/// `hll` accepts a string column and aggregates the
/// values into a HyperLogLog state.
pub fn state_udf_impl() -> AggregateUDF {
create_udaf(
HLL_NAME,
vec![DataType::Utf8],
Arc::new(DataType::Binary),
Volatility::Immutable,
Arc::new(Self::create_accumulator),
Arc::new(vec![DataType::Binary]),
)
}
/// Create a UDF for the `hll_merge` function.
///
/// `hll_merge` accepts a binary column of states generated by `hll`
/// and merges them into a single state.
pub fn merge_udf_impl() -> AggregateUDF {
create_udaf(
HLL_MERGE_NAME,
vec![DataType::Binary],
Arc::new(DataType::Binary),
Volatility::Immutable,
Arc::new(Self::create_merge_accumulator),
Arc::new(vec![DataType::Binary]),
)
}
fn update(&mut self, value: &str) {
self.hll.insert(value);
}
fn merge(&mut self, raw: &[u8]) {
if let Ok(serialized) = bincode::deserialize::<HllStateType>(raw) {
if let Ok(()) = self.hll.merge(&serialized) {
return;
}
}
trace!("Warning: Failed to merge HyperLogLog from {:?}", raw);
}
fn create_accumulator(acc_args: AccumulatorArgs) -> DfResult<Box<dyn DfAccumulator>> {
let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
match data_type {
DataType::Utf8 => Ok(Box::new(HllState::new())),
other => not_impl_err!("{HLL_NAME} does not support data type: {other}"),
}
}
fn create_merge_accumulator(acc_args: AccumulatorArgs) -> DfResult<Box<dyn DfAccumulator>> {
let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
match data_type {
DataType::Binary => Ok(Box::new(HllState::new())),
other => not_impl_err!("{HLL_MERGE_NAME} does not support data type: {other}"),
}
}
}
impl DfAccumulator for HllState {
fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
let array = &values[0];
match array.data_type() {
DataType::Utf8 => {
let string_array = as_string_array(array)?;
for value in string_array.iter().flatten() {
self.update(value);
}
}
DataType::Binary => {
let binary_array = as_binary_array(array)?;
for v in binary_array.iter().flatten() {
self.merge(v);
}
}
_ => {
return not_impl_err!(
"HLL functions do not support data type: {}",
array.data_type()
)
}
}
Ok(())
}
fn evaluate(&mut self) -> DfResult<ScalarValue> {
Ok(ScalarValue::Binary(Some(
bincode::serialize(&self.hll).map_err(|e| {
DataFusionError::Internal(format!("Failed to serialize HyperLogLog: {}", e))
})?,
)))
}
fn size(&self) -> usize {
std::mem::size_of_val(&self.hll)
}
fn state(&mut self) -> DfResult<Vec<ScalarValue>> {
Ok(vec![ScalarValue::Binary(Some(
bincode::serialize(&self.hll).map_err(|e| {
DataFusionError::Internal(format!("Failed to serialize HyperLogLog: {}", e))
})?,
))])
}
fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
let array = &states[0];
let binary_array = as_binary_array(array)?;
for v in binary_array.iter().flatten() {
self.merge(v);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use datafusion::arrow::array::{BinaryArray, StringArray};
use super::*;
#[test]
fn test_hll_basic() {
let mut state = HllState::new();
state.update("1");
state.update("2");
state.update("3");
let result = state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
assert_eq!(hll.count().trunc() as u32, 3);
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_hll_roundtrip() {
let mut state = HllState::new();
state.update("1");
state.update("2");
// Serialize
let serialized = state.evaluate().unwrap();
// Create new state and merge the serialized data
let mut new_state = HllState::new();
if let ScalarValue::Binary(Some(bytes)) = &serialized {
new_state.merge(bytes);
// Verify the merged state matches original
let result = new_state.evaluate().unwrap();
if let ScalarValue::Binary(Some(new_bytes)) = result {
let mut original: HllStateType = bincode::deserialize(bytes).unwrap();
let mut merged: HllStateType = bincode::deserialize(&new_bytes).unwrap();
assert_eq!(original.count(), merged.count());
} else {
panic!("Expected binary scalar value");
}
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_hll_batch_update() {
let mut state = HllState::new();
// Test string values
let str_values = vec!["a", "b", "c", "d", "e", "f", "g", "h", "i"];
let str_array = Arc::new(StringArray::from(str_values)) as ArrayRef;
state.update_batch(&[str_array]).unwrap();
let result = state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
assert_eq!(hll.count().trunc() as u32, 9);
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_hll_merge_batch() {
let mut state1 = HllState::new();
state1.update("1");
let state1_binary = state1.evaluate().unwrap();
let mut state2 = HllState::new();
state2.update("2");
let state2_binary = state2.evaluate().unwrap();
let mut merged_state = HllState::new();
if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
(&state1_binary, &state2_binary)
{
let binary_array = Arc::new(BinaryArray::from(vec![
bytes1.as_slice(),
bytes2.as_slice(),
])) as ArrayRef;
merged_state.merge_batch(&[binary_array]).unwrap();
let result = merged_state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
assert_eq!(hll.count().trunc() as u32, 2);
} else {
panic!("Expected binary scalar value");
}
} else {
panic!("Expected binary scalar values");
}
}
#[test]
fn test_hll_merge_function() {
// Create two HLL states with different values
let mut state1 = HllState::new();
state1.update("1");
state1.update("2");
let state1_binary = state1.evaluate().unwrap();
let mut state2 = HllState::new();
state2.update("2");
state2.update("3");
let state2_binary = state2.evaluate().unwrap();
// Create a merge state and merge both states
let mut merge_state = HllState::new();
if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
(&state1_binary, &state2_binary)
{
let binary_array = Arc::new(BinaryArray::from(vec![
bytes1.as_slice(),
bytes2.as_slice(),
])) as ArrayRef;
merge_state.update_batch(&[binary_array]).unwrap();
let result = merge_state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let mut hll: HllStateType = bincode::deserialize(&bytes).unwrap();
// Should have 3 unique values: "1", "2", "3"
assert_eq!(hll.count().trunc() as u32, 3);
} else {
panic!("Expected binary scalar value");
}
} else {
panic!("Expected binary scalar values");
}
}
}

View File

@@ -1,313 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Implementation of the `uddsketch_state` UDAF that generate the state of
//! UDDSketch for a given set of values.
//!
//! The generated state can be used to compute approximate quantiles using
//! `uddsketch_calc` UDF.
use std::sync::Arc;
use common_query::prelude::*;
use common_telemetry::trace;
use datafusion::common::cast::{as_binary_array, as_primitive_array};
use datafusion::common::not_impl_err;
use datafusion::error::{DataFusionError, Result as DfResult};
use datafusion::logical_expr::function::AccumulatorArgs;
use datafusion::logical_expr::{Accumulator as DfAccumulator, AggregateUDF};
use datafusion::physical_plan::expressions::Literal;
use datafusion::prelude::create_udaf;
use datatypes::arrow::array::ArrayRef;
use datatypes::arrow::datatypes::{DataType, Float64Type};
use uddsketch::{SketchHashKey, UDDSketch};
pub const UDDSKETCH_STATE_NAME: &str = "uddsketch_state";
#[derive(Debug)]
pub struct UddSketchState {
uddsketch: UDDSketch,
}
impl UddSketchState {
pub fn new(bucket_size: u64, error_rate: f64) -> Self {
Self {
uddsketch: UDDSketch::new(bucket_size, error_rate),
}
}
pub fn udf_impl() -> AggregateUDF {
create_udaf(
UDDSKETCH_STATE_NAME,
vec![DataType::Int64, DataType::Float64, DataType::Float64],
Arc::new(DataType::Binary),
Volatility::Immutable,
Arc::new(|args| {
let (bucket_size, error_rate) = downcast_accumulator_args(args)?;
Ok(Box::new(UddSketchState::new(bucket_size, error_rate)))
}),
Arc::new(vec![DataType::Binary]),
)
}
fn update(&mut self, value: f64) {
self.uddsketch.add_value(value);
}
fn merge(&mut self, raw: &[u8]) {
if let Ok(uddsketch) = bincode::deserialize::<UDDSketch>(raw) {
if uddsketch.count() != 0 {
self.uddsketch.merge_sketch(&uddsketch);
}
} else {
trace!("Warning: Failed to deserialize UDDSketch from {:?}", raw);
}
}
}
fn downcast_accumulator_args(args: AccumulatorArgs) -> DfResult<(u64, f64)> {
let bucket_size = match args.exprs[0]
.as_any()
.downcast_ref::<Literal>()
.map(|lit| lit.value())
{
Some(ScalarValue::Int64(Some(value))) => *value as u64,
_ => {
return not_impl_err!(
"{} not supported for bucket size: {}",
UDDSKETCH_STATE_NAME,
&args.exprs[0]
)
}
};
let error_rate = match args.exprs[1]
.as_any()
.downcast_ref::<Literal>()
.map(|lit| lit.value())
{
Some(ScalarValue::Float64(Some(value))) => *value,
_ => {
return not_impl_err!(
"{} not supported for error rate: {}",
UDDSKETCH_STATE_NAME,
&args.exprs[1]
)
}
};
Ok((bucket_size, error_rate))
}
impl DfAccumulator for UddSketchState {
fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
let array = &values[2]; // the third column is data value
let f64_array = as_primitive_array::<Float64Type>(array)?;
for v in f64_array.iter().flatten() {
self.update(v);
}
Ok(())
}
fn evaluate(&mut self) -> DfResult<ScalarValue> {
Ok(ScalarValue::Binary(Some(
bincode::serialize(&self.uddsketch).map_err(|e| {
DataFusionError::Internal(format!("Failed to serialize UDDSketch: {}", e))
})?,
)))
}
fn size(&self) -> usize {
// Base size of UDDSketch struct fields
let mut total_size = std::mem::size_of::<f64>() * 3 + // alpha, gamma, values_sum
std::mem::size_of::<u32>() + // compactions
std::mem::size_of::<u64>() * 2; // max_buckets, num_values
// Size of buckets (SketchHashMap)
// Each bucket entry contains:
// - SketchHashKey (enum with i64/Zero/Invalid variants)
// - SketchHashEntry (count: u64, next: SketchHashKey)
let bucket_entry_size = std::mem::size_of::<SketchHashKey>() + // key
std::mem::size_of::<u64>() + // count
std::mem::size_of::<SketchHashKey>(); // next
total_size += self.uddsketch.current_buckets_count() * bucket_entry_size;
total_size
}
fn state(&mut self) -> DfResult<Vec<ScalarValue>> {
Ok(vec![ScalarValue::Binary(Some(
bincode::serialize(&self.uddsketch).map_err(|e| {
DataFusionError::Internal(format!("Failed to serialize UDDSketch: {}", e))
})?,
))])
}
fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
let array = &states[0];
let binary_array = as_binary_array(array)?;
for v in binary_array.iter().flatten() {
self.merge(v);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use datafusion::arrow::array::{BinaryArray, Float64Array};
use super::*;
#[test]
fn test_uddsketch_state_basic() {
let mut state = UddSketchState::new(10, 0.01);
state.update(1.0);
state.update(2.0);
state.update(3.0);
let result = state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
assert_eq!(deserialized.count(), 3);
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_uddsketch_state_roundtrip() {
let mut state = UddSketchState::new(10, 0.01);
state.update(1.0);
state.update(2.0);
// Serialize
let serialized = state.evaluate().unwrap();
// Create new state and merge the serialized data
let mut new_state = UddSketchState::new(10, 0.01);
if let ScalarValue::Binary(Some(bytes)) = &serialized {
new_state.merge(bytes);
// Verify the merged state matches original by comparing deserialized values
let original_sketch: UDDSketch = bincode::deserialize(bytes).unwrap();
let new_result = new_state.evaluate().unwrap();
if let ScalarValue::Binary(Some(new_bytes)) = new_result {
let new_sketch: UDDSketch = bincode::deserialize(&new_bytes).unwrap();
assert_eq!(original_sketch.count(), new_sketch.count());
assert_eq!(original_sketch.sum(), new_sketch.sum());
assert_eq!(original_sketch.mean(), new_sketch.mean());
assert_eq!(original_sketch.max_error(), new_sketch.max_error());
// Compare a few quantiles to ensure statistical equivalence
for q in [0.1, 0.5, 0.9].iter() {
assert!(
(original_sketch.estimate_quantile(*q) - new_sketch.estimate_quantile(*q))
.abs()
< 1e-10,
"Quantile {} mismatch: original={}, new={}",
q,
original_sketch.estimate_quantile(*q),
new_sketch.estimate_quantile(*q)
);
}
} else {
panic!("Expected binary scalar value");
}
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_uddsketch_state_batch_update() {
let mut state = UddSketchState::new(10, 0.01);
let values = vec![1.0f64, 2.0, 3.0];
let array = Arc::new(Float64Array::from(values)) as ArrayRef;
state
.update_batch(&[array.clone(), array.clone(), array])
.unwrap();
let result = state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
assert_eq!(deserialized.count(), 3);
} else {
panic!("Expected binary scalar value");
}
}
#[test]
fn test_uddsketch_state_merge_batch() {
let mut state1 = UddSketchState::new(10, 0.01);
state1.update(1.0);
let state1_binary = state1.evaluate().unwrap();
let mut state2 = UddSketchState::new(10, 0.01);
state2.update(2.0);
let state2_binary = state2.evaluate().unwrap();
let mut merged_state = UddSketchState::new(10, 0.01);
if let (ScalarValue::Binary(Some(bytes1)), ScalarValue::Binary(Some(bytes2))) =
(&state1_binary, &state2_binary)
{
let binary_array = Arc::new(BinaryArray::from(vec![
bytes1.as_slice(),
bytes2.as_slice(),
])) as ArrayRef;
merged_state.merge_batch(&[binary_array]).unwrap();
let result = merged_state.evaluate().unwrap();
if let ScalarValue::Binary(Some(bytes)) = result {
let deserialized: UDDSketch = bincode::deserialize(&bytes).unwrap();
assert_eq!(deserialized.count(), 2);
} else {
panic!("Expected binary scalar value");
}
} else {
panic!("Expected binary scalar values");
}
}
#[test]
fn test_uddsketch_state_size() {
let mut state = UddSketchState::new(10, 0.01);
let initial_size = state.size();
// Add some values to create buckets
state.update(1.0);
state.update(2.0);
state.update(3.0);
let size_with_values = state.size();
assert!(
size_with_values > initial_size,
"Size should increase after adding values: initial={}, with_values={}",
initial_size,
size_with_values
);
// Verify size increases with more buckets
state.update(10.0); // This should create a new bucket
assert!(
state.size() > size_with_values,
"Size should increase after adding new bucket: prev={}, new={}",
size_with_values,
state.size()
);
}
}

View File

@@ -63,7 +63,7 @@ pub trait Function: fmt::Display + Sync + Send {
fn signature(&self) -> Signature;
/// Evaluate the function, e.g. run/execute the function.
fn eval(&self, ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef>;
fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result<VectorRef>;
}
pub type FunctionRef = Arc<dyn Function>;

View File

@@ -18,20 +18,17 @@ use std::sync::{Arc, RwLock};
use once_cell::sync::Lazy;
use crate::admin::AdminFunction;
use crate::function::{AsyncFunctionRef, FunctionRef};
use crate::scalars::aggregate::{AggregateFunctionMetaRef, AggregateFunctions};
use crate::scalars::date::DateFunction;
use crate::scalars::expression::ExpressionFunction;
use crate::scalars::hll_count::HllCalcFunction;
use crate::scalars::ip::IpFunctions;
use crate::scalars::json::JsonFunction;
use crate::scalars::matches::MatchesFunction;
use crate::scalars::math::MathFunction;
use crate::scalars::timestamp::TimestampFunction;
use crate::scalars::uddsketch_calc::UddSketchCalcFunction;
use crate::scalars::vector::VectorFunction;
use crate::system::SystemFunction;
use crate::table::TableFunction;
#[derive(Default)]
pub struct FunctionRegistry {
@@ -108,8 +105,6 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
TimestampFunction::register(&function_registry);
DateFunction::register(&function_registry);
ExpressionFunction::register(&function_registry);
UddSketchCalcFunction::register(&function_registry);
HllCalcFunction::register(&function_registry);
// Aggregate functions
AggregateFunctions::register(&function_registry);
@@ -119,7 +114,7 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
// System and administration functions
SystemFunction::register(&function_registry);
AdminFunction::register(&function_registry);
TableFunction::register(&function_registry);
// Json related functions
JsonFunction::register(&function_registry);
@@ -131,9 +126,6 @@ pub static FUNCTION_REGISTRY: Lazy<Arc<FunctionRegistry>> = Lazy::new(|| {
#[cfg(feature = "geo")]
crate::scalars::geo::GeoFunctions::register(&function_registry);
// Ip functions
IpFunctions::register(&function_registry);
Arc::new(function_registry)
});

View File

@@ -15,13 +15,12 @@
#![feature(let_chains)]
#![feature(try_blocks)]
mod admin;
mod flush_flow;
mod macros;
pub mod scalars;
mod system;
mod table;
pub mod aggr;
pub mod function;
pub mod function_registry;
pub mod handlers;

View File

@@ -22,10 +22,7 @@ pub mod matches;
pub mod math;
pub mod vector;
pub(crate) mod hll_count;
pub mod ip;
#[cfg(test)]
pub(crate) mod test;
pub(crate) mod timestamp;
pub(crate) mod uddsketch_calc;
pub mod udf;

View File

@@ -12,16 +12,24 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! # Deprecate Warning:
//!
//! This module is deprecated and will be removed in the future.
//! All UDAF implementation here are not maintained and should
//! not be used before they are refactored into the `src/aggr`
//! version.
mod argmax;
mod argmin;
mod diff;
mod mean;
mod polyval;
mod scipy_stats_norm_cdf;
mod scipy_stats_norm_pdf;
use std::sync::Arc;
pub use argmax::ArgmaxAccumulatorCreator;
pub use argmin::ArgminAccumulatorCreator;
use common_query::logical_plan::AggregateFunctionCreatorRef;
pub use diff::DiffAccumulatorCreator;
pub use mean::MeanAccumulatorCreator;
pub use polyval::PolyvalAccumulatorCreator;
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
pub use scipy_stats_norm_pdf::ScipyStatsNormPdfAccumulatorCreator;
use crate::function_registry::FunctionRegistry;
use crate::scalars::vector::product::VectorProductCreator;
@@ -68,22 +76,31 @@ pub(crate) struct AggregateFunctions;
impl AggregateFunctions {
pub fn register(registry: &FunctionRegistry) {
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
"vec_sum",
1,
Arc::new(|| Arc::new(VectorSumCreator::default())),
)));
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
"vec_product",
1,
Arc::new(|| Arc::new(VectorProductCreator::default())),
)));
macro_rules! register_aggr_func {
($name :expr, $arg_count :expr, $creator :ty) => {
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
$name,
$arg_count,
Arc::new(|| Arc::new(<$creator>::default())),
)));
};
}
register_aggr_func!("diff", 1, DiffAccumulatorCreator);
register_aggr_func!("mean", 1, MeanAccumulatorCreator);
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
register_aggr_func!("argmax", 1, ArgmaxAccumulatorCreator);
register_aggr_func!("argmin", 1, ArgminAccumulatorCreator);
register_aggr_func!("scipystatsnormcdf", 2, ScipyStatsNormCdfAccumulatorCreator);
register_aggr_func!("scipystatsnormpdf", 2, ScipyStatsNormPdfAccumulatorCreator);
register_aggr_func!("vec_sum", 1, VectorSumCreator);
register_aggr_func!("vec_product", 1, VectorProductCreator);
#[cfg(feature = "geo")]
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
register_aggr_func!(
"json_encode_path",
3,
Arc::new(|| Arc::new(super::geo::encoding::JsonPathEncodeFunctionCreator::default())),
)));
super::geo::encoding::JsonPathEncodeFunctionCreator
);
}
}

View File

@@ -0,0 +1,208 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::cmp::Ordering;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::{LogicalPrimitiveType, WrapperType};
use datatypes::vectors::{ConstantVector, Helper};
use datatypes::with_match_primitive_type_id;
use snafu::ensure;
// https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
// return the index of the max value
#[derive(Debug, Default)]
pub struct Argmax<T> {
max: Option<T>,
n: u64,
}
impl<T> Argmax<T>
where
T: PartialOrd + Copy,
{
fn update(&mut self, value: T, index: u64) {
if let Some(Ordering::Less) = self.max.partial_cmp(&Some(value)) {
self.max = Some(value);
self.n = index;
}
}
}
impl<T> Accumulator for Argmax<T>
where
T: WrapperType + PartialOrd,
{
fn state(&self) -> Result<Vec<Value>> {
match self.max {
Some(max) => Ok(vec![max.into(), self.n.into()]),
_ => Ok(vec![Value::Null, self.n.into()]),
}
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
let column = &values[0];
let column: &<T as Scalar>::VectorType = if column.is_const() {
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
for (i, v) in column.iter_data().enumerate() {
if let Some(value) = v {
self.update(value, i as u64);
}
}
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let max = &states[0];
let index = &states[1];
let max: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(max) };
let index: &<u64 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
index
.iter_data()
.flatten()
.zip(max.iter_data().flatten())
.for_each(|(i, max)| self.update(max, i));
Ok(())
}
fn evaluate(&self) -> Result<Value> {
match self.max {
Some(_) => Ok(self.n.into()),
_ => Ok(Value::Null),
}
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct ArgmaxAccumulatorCreator {}
impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Argmax::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"ARGMAX\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint64_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
Ok(vec![
input_types.into_iter().next().unwrap(),
ConcreteDataType::uint64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut argmax = Argmax::<i32>::default();
argmax.update_batch(&[]).unwrap();
assert_eq!(Value::Null, argmax.evaluate().unwrap());
// test update one not-null value
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
argmax.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
// test update one null value
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
argmax.update_batch(&v).unwrap();
assert_eq!(Value::Null, argmax.evaluate().unwrap());
// test update no null-value batch
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(3),
]))];
argmax.update_batch(&v).unwrap();
assert_eq!(Value::from(2_u64), argmax.evaluate().unwrap());
// test update null-value batch
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(4),
]))];
argmax.update_batch(&v).unwrap();
assert_eq!(Value::from(2_u64), argmax.evaluate().unwrap());
// test update with constant vector
let mut argmax = Argmax::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
10,
))];
argmax.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
}
}

View File

@@ -0,0 +1,216 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::cmp::Ordering;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::vectors::{ConstantVector, Helper};
use datatypes::with_match_primitive_type_id;
use snafu::ensure;
// // https://numpy.org/doc/stable/reference/generated/numpy.argmin.html
#[derive(Debug, Default)]
pub struct Argmin<T> {
min: Option<T>,
n: u32,
}
impl<T> Argmin<T>
where
T: Copy + PartialOrd,
{
fn update(&mut self, value: T, index: u32) {
match self.min {
Some(min) => {
if let Some(Ordering::Greater) = min.partial_cmp(&value) {
self.min = Some(value);
self.n = index;
}
}
None => {
self.min = Some(value);
self.n = index;
}
}
}
}
impl<T> Accumulator for Argmin<T>
where
T: WrapperType + PartialOrd,
{
fn state(&self) -> Result<Vec<Value>> {
match self.min {
Some(min) => Ok(vec![min.into(), self.n.into()]),
_ => Ok(vec![Value::Null, self.n.into()]),
}
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 1, InvalidInputStateSnafu);
let column = &values[0];
let column: &<T as Scalar>::VectorType = if column.is_const() {
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
for (i, v) in column.iter_data().enumerate() {
if let Some(value) = v {
self.update(value, i as u32);
}
}
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let min = &states[0];
let index = &states[1];
let min: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(min) };
let index: &<u32 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
index
.iter_data()
.flatten()
.zip(min.iter_data().flatten())
.for_each(|(i, min)| self.update(min, i));
Ok(())
}
fn evaluate(&self) -> Result<Value> {
match self.min {
Some(_) => Ok(self.n.into()),
_ => Ok(Value::Null),
}
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct ArgminAccumulatorCreator {}
impl AggregateFunctionCreator for ArgminAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Argmin::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"ARGMIN\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::uint32_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
Ok(vec![
input_types.into_iter().next().unwrap(),
ConcreteDataType::uint32_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut argmin = Argmin::<i32>::default();
argmin.update_batch(&[]).unwrap();
assert_eq!(Value::Null, argmin.evaluate().unwrap());
// test update one not-null value
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
argmin.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
// test update one null value
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
argmin.update_batch(&v).unwrap();
assert_eq!(Value::Null, argmin.evaluate().unwrap());
// test update no null-value batch
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(3),
]))];
argmin.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
// test update null-value batch
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(4),
]))];
argmin.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
// test update with constant vector
let mut argmin = Argmin::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
10,
))];
argmin.update_batch(&v).unwrap();
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
}
}

View File

@@ -0,0 +1,252 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::marker::PhantomData;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::value::ListValue;
use datatypes::vectors::{ConstantVector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
// https://numpy.org/doc/stable/reference/generated/numpy.diff.html
// I is the input type, O is the output type.
#[derive(Debug, Default)]
pub struct Diff<I, O> {
values: Vec<I>,
_phantom: PhantomData<O>,
}
impl<I, O> Diff<I, O> {
fn push(&mut self, value: I) {
self.values.push(value);
}
}
impl<I, O> Accumulator for Diff<I, O>
where
I: WrapperType,
O: WrapperType,
I::Native: AsPrimitive<O::Native>,
O::Native: std::ops::Sub<Output = O::Native>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.values
.iter()
.map(|&n| n.into())
.collect::<Vec<Value>>();
Ok(vec![Value::List(ListValue::new(
nums,
I::LogicalType::build_data_type(),
))])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 1, InvalidInputStateSnafu);
let column = &values[0];
let mut len = 1;
let column: &<I as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
let states = &states[0];
let states = states
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
states.vector_type_name()
),
})?;
for state in states.values_iter() {
if let Some(state) = state.context(FromScalarValueSnafu)? {
self.update_batch(&[state])?;
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
if self.values.is_empty() || self.values.len() == 1 {
return Ok(Value::Null);
}
let diff = self
.values
.windows(2)
.map(|x| {
let native = x[1].into_native().as_() - x[0].into_native().as_();
O::from_native(native).into()
})
.collect::<Vec<Value>>();
let diff = Value::List(ListValue::new(diff, O::LogicalType::build_data_type()));
Ok(diff)
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct DiffAccumulatorCreator {}
impl AggregateFunctionCreator for DiffAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Diff::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"DIFF\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
with_match_primitive_type_id!(
input_types[0].logical_type_id(),
|$S| {
Ok(ConcreteDataType::list_datatype($S::default().into()))
},
{
unreachable!()
}
)
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
with_match_primitive_type_id!(
input_types[0].logical_type_id(),
|$S| {
Ok(vec![ConcreteDataType::list_datatype($S::default().into())])
},
{
unreachable!()
}
)
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut diff = Diff::<i32, i64>::default();
diff.update_batch(&[]).unwrap();
assert!(diff.values.is_empty());
assert_eq!(Value::Null, diff.evaluate().unwrap());
// test update one not-null value
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
diff.update_batch(&v).unwrap();
assert_eq!(Value::Null, diff.evaluate().unwrap());
// test update one null value
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
diff.update_batch(&v).unwrap();
assert_eq!(Value::Null, diff.evaluate().unwrap());
// test update no null-value batch
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(2),
]))];
let values = vec![Value::from(2_i64), Value::from(1_i64)];
diff.update_batch(&v).unwrap();
assert_eq!(
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
diff.evaluate().unwrap()
);
// test update null-value batch
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(3),
Some(4),
]))];
let values = vec![Value::from(5_i64), Value::from(1_i64)];
diff.update_batch(&v).unwrap();
assert_eq!(
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
diff.evaluate().unwrap()
);
// test update with constant vector
let mut diff = Diff::<i32, i64>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
4,
))];
let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
diff.update_batch(&v).unwrap();
assert_eq!(
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
diff.evaluate().unwrap()
);
}
}

View File

@@ -0,0 +1,238 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::marker::PhantomData;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::WrapperType;
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, UInt64Vector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt};
#[derive(Debug, Default)]
pub struct Mean<T> {
sum: f64,
n: u64,
_phantom: PhantomData<T>,
}
impl<T> Mean<T>
where
T: WrapperType,
T::Native: AsPrimitive<f64>,
{
#[inline(always)]
fn push(&mut self, value: T) {
self.sum += value.into_native().as_();
self.n += 1;
}
#[inline(always)]
fn update(&mut self, sum: f64, n: u64) {
self.sum += sum;
self.n += n;
}
}
impl<T> Accumulator for Mean<T>
where
T: WrapperType,
T::Native: AsPrimitive<f64>,
{
fn state(&self) -> Result<Vec<Value>> {
Ok(vec![self.sum.into(), self.n.into()])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 1, InvalidInputStateSnafu);
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let sum = &states[0];
let n = &states[1];
let sum = sum
.as_any()
.downcast_ref::<Float64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect Float64Vector, got vector type {}",
sum.vector_type_name()
),
})?;
let n = n
.as_any()
.downcast_ref::<UInt64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect UInt64Vector, got vector type {}",
sum.vector_type_name()
),
})?;
sum.iter_data().zip(n.iter_data()).for_each(|(sum, n)| {
if let (Some(sum), Some(n)) = (sum, n) {
self.update(sum, n);
}
});
Ok(())
}
fn evaluate(&self) -> Result<Value> {
if self.n == 0 {
return Ok(Value::Null);
}
let values = self.sum / self.n as f64;
Ok(values.into())
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct MeanAccumulatorCreator {}
impl AggregateFunctionCreator for MeanAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Mean::<<$S as LogicalPrimitiveType>::Native>::default()))
},
{
let err_msg = format!(
"\"MEAN\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
Ok(ConcreteDataType::float64_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
Ok(vec![
ConcreteDataType::float64_datatype(),
ConcreteDataType::uint64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut mean = Mean::<i32>::default();
mean.update_batch(&[]).unwrap();
assert_eq!(Value::Null, mean.evaluate().unwrap());
// test update one not-null value
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
mean.update_batch(&v).unwrap();
assert_eq!(Value::from(42.0_f64), mean.evaluate().unwrap());
// test update one null value
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
mean.update_batch(&v).unwrap();
assert_eq!(Value::Null, mean.evaluate().unwrap());
// test update no null-value batch
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-1i32),
Some(1),
Some(2),
]))];
mean.update_batch(&v).unwrap();
assert_eq!(Value::from(0.6666666666666666), mean.evaluate().unwrap());
// test update null-value batch
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
Some(-2i32),
None,
Some(3),
Some(4),
]))];
mean.update_batch(&v).unwrap();
assert_eq!(Value::from(1.6666666666666667), mean.evaluate().unwrap());
// test update with constant vector
let mut mean = Mean::<i32>::default();
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
10,
))];
mean.update_batch(&v).unwrap();
assert_eq!(Value::from(4.0), mean.evaluate().unwrap());
}
}

View File

@@ -0,0 +1,329 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::marker::PhantomData;
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, InvalidInputColSnafu, InvalidInputStateSnafu, Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::types::{LogicalPrimitiveType, WrapperType};
use datatypes::value::ListValue;
use datatypes::vectors::{ConstantVector, Helper, Int64Vector, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
// https://numpy.org/doc/stable/reference/generated/numpy.polyval.html
#[derive(Debug, Default)]
pub struct Polyval<T, PolyT>
where
T: WrapperType,
T::Native: AsPrimitive<PolyT::Native>,
PolyT: WrapperType,
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
{
values: Vec<T>,
// DataFusion casts constant in into i64 type.
x: Option<i64>,
_phantom: PhantomData<PolyT>,
}
impl<T, PolyT> Polyval<T, PolyT>
where
T: WrapperType,
T::Native: AsPrimitive<PolyT::Native>,
PolyT: WrapperType,
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
{
fn push(&mut self, value: T) {
self.values.push(value);
}
}
impl<T, PolyT> Accumulator for Polyval<T, PolyT>
where
T: WrapperType,
T::Native: AsPrimitive<PolyT::Native>,
PolyT: WrapperType + std::iter::Sum<<PolyT as WrapperType>::Native>,
PolyT::Native: std::ops::Mul<Output = PolyT::Native> + std::iter::Sum<PolyT::Native>,
i64: AsPrimitive<<PolyT as WrapperType>::Native>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.values
.iter()
.map(|&n| n.into())
.collect::<Vec<Value>>();
Ok(vec![
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
self.x.into(),
])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 2, InvalidInputStateSnafu);
ensure!(values[0].len() == values[1].len(), InvalidInputStateSnafu);
if values[0].len() == 0 {
return Ok(());
}
// This is a unary accumulator, so only one column is provided.
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
let x = &values[1];
let x = Helper::check_get_scalar::<i64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"POLYVAL\" function's second argument to be a positive integer",
})?;
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
let first = x.get(0);
ensure!(!first.is_null(), InvalidInputColSnafu);
for i in 1..x.len() {
ensure!(first == x.get(i), InvalidInputColSnafu);
}
let first = match first {
Value::Int64(v) => v,
// unreachable because we have checked `first` is not null and is i64 above
_ => unreachable!(),
};
if let Some(x) = self.x {
ensure!(x == first, InvalidInputColSnafu);
} else {
self.x = Some(first);
};
Ok(())
}
// DataFusion executes accumulators in partitions. In some execution stage, DataFusion will
// merge states from other accumulators (returned by `state()` method).
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let x = &states[1];
let x = x
.as_any()
.downcast_ref::<Int64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect Int64Vector, got vector type {}",
x.vector_type_name()
),
})?;
let x = x.get(0);
if x.is_null() {
return Ok(());
}
let x = match x {
Value::Int64(x) => x,
_ => unreachable!(),
};
self.x = Some(x);
let values = &states[0];
let values = values
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
values.vector_type_name()
),
})?;
for value in values.values_iter() {
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
// DataFusion expects this function to return the final value of this aggregator.
fn evaluate(&self) -> Result<Value> {
if self.values.is_empty() {
return Ok(Value::Null);
}
let x = if let Some(x) = self.x {
x
} else {
return Ok(Value::Null);
};
let len = self.values.len();
let polyval: PolyT = self
.values
.iter()
.enumerate()
.map(|(i, &value)| value.into_native().as_() * x.pow((len - 1 - i) as u32).as_())
.sum();
Ok(polyval.into())
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct PolyvalAccumulatorCreator {}
impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(Polyval::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"POLYVAL\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
let input_type = self.input_types()?[0].logical_type_id();
with_match_primitive_type_id!(
input_type,
|$S| {
Ok(<<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::build_data_type())
},
{
unreachable!()
}
)
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(vec![
ConcreteDataType::list_datatype(input_types.into_iter().next().unwrap()),
ConcreteDataType::int64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::Int32Vector;
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut polyval = Polyval::<i32, i64>::default();
polyval.update_batch(&[]).unwrap();
assert!(polyval.values.is_empty());
assert_eq!(Value::Null, polyval.evaluate().unwrap());
// test update one not-null value
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(3)])),
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
];
polyval.update_batch(&v).unwrap();
assert_eq!(Value::Int64(3), polyval.evaluate().unwrap());
// test update one null value
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
];
polyval.update_batch(&v).unwrap();
assert_eq!(Value::Null, polyval.evaluate().unwrap());
// test update no null-value batch
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(3), Some(0), Some(1)])),
Arc::new(Int64Vector::from(vec![
Some(2_i64),
Some(2_i64),
Some(2_i64),
])),
];
polyval.update_batch(&v).unwrap();
assert_eq!(Value::Int64(13), polyval.evaluate().unwrap());
// test update null-value batch
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(3), Some(0), None, Some(1)])),
Arc::new(Int64Vector::from(vec![
Some(2_i64),
Some(2_i64),
Some(2_i64),
Some(2_i64),
])),
];
polyval.update_batch(&v).unwrap();
assert_eq!(Value::Int64(13), polyval.evaluate().unwrap());
// test update with constant vector
let mut polyval = Polyval::<i32, i64>::default();
let v: Vec<VectorRef> = vec![
Arc::new(ConstantVector::new(
Arc::new(Int32Vector::from_vec(vec![4])),
2,
)),
Arc::new(Int64Vector::from(vec![Some(5_i64), Some(5_i64)])),
];
polyval.update_batch(&v).unwrap();
assert_eq!(Value::Int64(24), polyval.evaluate().unwrap());
}
}

View File

@@ -0,0 +1,270 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::value::{ListValue, OrderedFloat};
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
use statrs::distribution::{ContinuousCDF, Normal};
use statrs::statistics::Statistics;
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
#[derive(Debug, Default)]
pub struct ScipyStatsNormCdf<T> {
values: Vec<T>,
x: Option<f64>,
}
impl<T> ScipyStatsNormCdf<T> {
fn push(&mut self, value: T) {
self.values.push(value);
}
}
impl<T> Accumulator for ScipyStatsNormCdf<T>
where
T: WrapperType + std::iter::Sum<T>,
T::Native: AsPrimitive<f64>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.values
.iter()
.map(|&x| x.into())
.collect::<Vec<Value>>();
Ok(vec![
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
self.x.into(),
])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 2, InvalidInputStateSnafu);
ensure!(values[1].len() == values[0].len(), InvalidInputStateSnafu);
if values[0].len() == 0 {
return Ok(());
}
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
let x = &values[1];
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"SCIPYSTATSNORMCDF\" function's second argument to be a positive integer",
})?;
let first = x.get(0);
ensure!(!first.is_null(), InvalidInputColSnafu);
let first = match first {
Value::Float64(OrderedFloat(v)) => v,
// unreachable because we have checked `first` is not null and is i64 above
_ => unreachable!(),
};
if let Some(x) = self.x {
ensure!(x == first, InvalidInputColSnafu);
} else {
self.x = Some(first);
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let x = &states[1];
let x = x
.as_any()
.downcast_ref::<Float64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect Float64Vector, got vector type {}",
x.vector_type_name()
),
})?;
let x = x.get(0);
if x.is_null() {
return Ok(());
}
let x = match x {
Value::Float64(OrderedFloat(x)) => x,
_ => unreachable!(),
};
self.x = Some(x);
let values = &states[0];
let values = values
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
values.vector_type_name()
),
})?;
for value in values.values_iter() {
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
if mean.is_nan() || std_dev.is_nan() {
Ok(Value::Null)
} else {
let x = if let Some(x) = self.x {
x
} else {
return Ok(Value::Null);
};
let n = Normal::new(mean, std_dev).context(GenerateFunctionSnafu)?;
Ok(n.cdf(x).into())
}
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct ScipyStatsNormCdfAccumulatorCreator {}
impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(ScipyStatsNormCdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"SCIPYSTATSNORMCDF\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(ConcreteDataType::float64_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(vec![
ConcreteDataType::list_datatype(input_types[0].clone()),
ConcreteDataType::float64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::{Float64Vector, Int32Vector};
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
scipy_stats_norm_cdf.update_batch(&[]).unwrap();
assert!(scipy_stats_norm_cdf.values.is_empty());
assert_eq!(Value::Null, scipy_stats_norm_cdf.evaluate().unwrap());
// test update no null-value batch
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
Some(2.0_f64),
Some(2.0_f64),
])),
];
scipy_stats_norm_cdf.update_batch(&v).unwrap();
assert_eq!(
Value::from(0.8086334555398362),
scipy_stats_norm_cdf.evaluate().unwrap()
);
// test update null-value batch
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
None,
Some(2.0_f64),
Some(2.0_f64),
])),
];
scipy_stats_norm_cdf.update_batch(&v).unwrap();
assert_eq!(
Value::from(0.5412943699039795),
scipy_stats_norm_cdf.evaluate().unwrap()
);
}
}

View File

@@ -0,0 +1,271 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
use common_query::error::{
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
Result,
};
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
use common_query::prelude::*;
use datatypes::prelude::*;
use datatypes::value::{ListValue, OrderedFloat};
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
use datatypes::with_match_primitive_type_id;
use num_traits::AsPrimitive;
use snafu::{ensure, OptionExt, ResultExt};
use statrs::distribution::{Continuous, Normal};
use statrs::statistics::Statistics;
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
#[derive(Debug, Default)]
pub struct ScipyStatsNormPdf<T> {
values: Vec<T>,
x: Option<f64>,
}
impl<T> ScipyStatsNormPdf<T> {
fn push(&mut self, value: T) {
self.values.push(value);
}
}
impl<T> Accumulator for ScipyStatsNormPdf<T>
where
T: WrapperType,
T::Native: AsPrimitive<f64> + std::iter::Sum<T>,
{
fn state(&self) -> Result<Vec<Value>> {
let nums = self
.values
.iter()
.map(|&x| x.into())
.collect::<Vec<Value>>();
Ok(vec![
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
self.x.into(),
])
}
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
if values.is_empty() {
return Ok(());
}
ensure!(values.len() == 2, InvalidInputStateSnafu);
ensure!(values[1].len() == values[0].len(), InvalidInputStateSnafu);
if values[0].len() == 0 {
return Ok(());
}
let column = &values[0];
let mut len = 1;
let column: &<T as Scalar>::VectorType = if column.is_const() {
len = column.len();
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
unsafe { Helper::static_cast(column.inner()) }
} else {
unsafe { Helper::static_cast(column) }
};
let x = &values[1];
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
err_msg: "expecting \"SCIPYSTATSNORMPDF\" function's second argument to be a positive integer",
})?;
let first = x.get(0);
ensure!(!first.is_null(), InvalidInputColSnafu);
let first = match first {
Value::Float64(OrderedFloat(v)) => v,
// unreachable because we have checked `first` is not null and is i64 above
_ => unreachable!(),
};
if let Some(x) = self.x {
ensure!(x == first, InvalidInputColSnafu);
} else {
self.x = Some(first);
};
(0..len).for_each(|_| {
for v in column.iter_data().flatten() {
self.push(v);
}
});
Ok(())
}
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
if states.is_empty() {
return Ok(());
}
ensure!(
states.len() == 2,
BadAccumulatorImplSnafu {
err_msg: "expect 2 states in `merge_batch`",
}
);
let x = &states[1];
let x = x
.as_any()
.downcast_ref::<Float64Vector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect Float64Vector, got vector type {}",
x.vector_type_name()
),
})?;
let x = x.get(0);
if x.is_null() {
return Ok(());
}
let x = match x {
Value::Float64(OrderedFloat(x)) => x,
_ => unreachable!(),
};
self.x = Some(x);
let values = &states[0];
let values = values
.as_any()
.downcast_ref::<ListVector>()
.with_context(|| DowncastVectorSnafu {
err_msg: format!(
"expect ListVector, got vector type {}",
values.vector_type_name()
),
})?;
for value in values.values_iter() {
if let Some(value) = value.context(FromScalarValueSnafu)? {
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
for v in column.iter_data().flatten() {
self.push(v);
}
}
}
Ok(())
}
fn evaluate(&self) -> Result<Value> {
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
if mean.is_nan() || std_dev.is_nan() {
Ok(Value::Null)
} else {
let x = if let Some(x) = self.x {
x
} else {
return Ok(Value::Null);
};
let n = Normal::new(mean, std_dev).context(GenerateFunctionSnafu)?;
Ok(n.pdf(x).into())
}
}
}
#[as_aggr_func_creator]
#[derive(Debug, Default, AggrFuncTypeStore)]
pub struct ScipyStatsNormPdfAccumulatorCreator {}
impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
fn creator(&self) -> AccumulatorCreatorFunction {
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
let input_type = &types[0];
with_match_primitive_type_id!(
input_type.logical_type_id(),
|$S| {
Ok(Box::new(ScipyStatsNormPdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
},
{
let err_msg = format!(
"\"SCIPYSTATSNORMpdf\" aggregate function not support data type {:?}",
input_type.logical_type_id(),
);
CreateAccumulatorSnafu { err_msg }.fail()?
}
)
});
creator
}
fn output_type(&self) -> Result<ConcreteDataType> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(ConcreteDataType::float64_datatype())
}
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
let input_types = self.input_types()?;
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
Ok(vec![
ConcreteDataType::list_datatype(input_types[0].clone()),
ConcreteDataType::float64_datatype(),
])
}
}
#[cfg(test)]
mod test {
use datatypes::vectors::{Float64Vector, Int32Vector};
use super::*;
#[test]
fn test_update_batch() {
// test update empty batch, expect not updating anything
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
scipy_stats_norm_pdf.update_batch(&[]).unwrap();
assert!(scipy_stats_norm_pdf.values.is_empty());
assert_eq!(Value::Null, scipy_stats_norm_pdf.evaluate().unwrap());
// test update no null-value batch
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
Some(2.0_f64),
Some(2.0_f64),
])),
];
scipy_stats_norm_pdf.update_batch(&v).unwrap();
assert_eq!(
Value::from(0.17843340219081558),
scipy_stats_norm_pdf.evaluate().unwrap()
);
// test update null-value batch
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
let v: Vec<VectorRef> = vec![
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
Arc::new(Float64Vector::from(vec![
Some(2.0_f64),
None,
Some(2.0_f64),
Some(2.0_f64),
])),
];
scipy_stats_norm_pdf.update_batch(&v).unwrap();
assert_eq!(
Value::from(0.12343972049858312),
scipy_stats_norm_pdf.evaluate().unwrap()
);
}
}

View File

@@ -58,7 +58,7 @@ impl Function for DateAddFunction {
)
}
fn eval(&self, _func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
@@ -146,7 +146,7 @@ mod tests {
let time_vector = TimestampSecondVector::from(times.clone());
let interval_vector = IntervalDayTimeVector::from_vec(intervals);
let args: Vec<VectorRef> = vec![Arc::new(time_vector), Arc::new(interval_vector)];
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in times.iter().enumerate() {
@@ -178,7 +178,7 @@ mod tests {
let date_vector = DateVector::from(dates.clone());
let interval_vector = IntervalYearMonthVector::from_vec(intervals);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {

View File

@@ -43,6 +43,7 @@ impl Function for DateFormatFunction {
helper::one_of_sigs2(
vec![
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
@@ -52,7 +53,7 @@ impl Function for DateFormatFunction {
)
}
fn eval(&self, func_ctx: &FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
fn eval(&self, func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
ensure!(
columns.len() == 2,
InvalidFuncArgsSnafu {
@@ -104,6 +105,22 @@ impl Function for DateFormatFunction {
results.push(result.as_deref());
}
}
ConcreteDataType::DateTime(_) => {
for i in 0..size {
let datetime = left.get(i).as_datetime();
let format = formats.get(i).as_string();
let result = match (datetime, format) {
(Some(datetime), Some(fmt)) => datetime
.as_formatted_string(&fmt, Some(&func_ctx.query_ctx.timezone()))
.map_err(BoxedError::new)
.context(error::ExecuteSnafu)?,
_ => None,
};
results.push(result.as_deref());
}
}
_ => {
return UnsupportedInputDataTypeSnafu {
function: NAME,
@@ -130,7 +147,7 @@ mod tests {
use common_query::prelude::{TypeSignature, Volatility};
use datatypes::prelude::{ConcreteDataType, ScalarVector};
use datatypes::value::Value;
use datatypes::vectors::{DateVector, StringVector, TimestampSecondVector};
use datatypes::vectors::{DateTimeVector, DateVector, StringVector, TimestampSecondVector};
use super::{DateFormatFunction, *};
@@ -152,11 +169,16 @@ mod tests {
ConcreteDataType::string_datatype(),
f.return_type(&[ConcreteDataType::date_datatype()]).unwrap()
);
assert_eq!(
ConcreteDataType::string_datatype(),
f.return_type(&[ConcreteDataType::datetime_datatype()])
.unwrap()
);
assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::OneOf(sigs),
volatility: Volatility::Immutable
} if sigs.len() == 5));
} if sigs.len() == 6));
}
#[test]
@@ -180,7 +202,7 @@ mod tests {
let time_vector = TimestampSecondVector::from(times.clone());
let interval_vector = StringVector::from_vec(formats);
let args: Vec<VectorRef> = vec![Arc::new(time_vector), Arc::new(interval_vector)];
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in times.iter().enumerate() {
@@ -221,7 +243,48 @@ mod tests {
let date_vector = DateVector::from(dates.clone());
let interval_vector = StringVector::from_vec(formats);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(&FunctionContext::default(), &args).unwrap();
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {
let v = vector.get(i);
let result = results.get(i).unwrap();
if result.is_none() {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::String(s) => {
assert_eq!(s.as_utf8(), result.unwrap());
}
_ => unreachable!(),
}
}
}
#[test]
fn test_datetime_date_format() {
let f = DateFormatFunction;
let dates = vec![Some(123), None, Some(42), None];
let formats = vec![
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
"%Y-%m-%d %T.%3f",
];
let results = [
Some("1970-01-01 00:00:00.123"),
None,
Some("1970-01-01 00:00:00.042"),
None,
];
let date_vector = DateTimeVector::from(dates.clone());
let interval_vector = StringVector::from_vec(formats);
let args: Vec<VectorRef> = vec![Arc::new(date_vector), Arc::new(interval_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in dates.iter().enumerate() {

Some files were not shown because too many files have changed in this diff Show More