mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 23:49:58 +00:00
Compare commits
13 Commits
recording_
...
test/dev-b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
96187618c4 | ||
|
|
57695ea21f | ||
|
|
3b7ff55b7c | ||
|
|
6b6cbe852a | ||
|
|
61c3842db5 | ||
|
|
79dfc2f9ea | ||
|
|
f4ec1cf201 | ||
|
|
f91a183e83 | ||
|
|
f1bd2d51fe | ||
|
|
312c174d89 | ||
|
|
9b3157b27d | ||
|
|
7f48184e35 | ||
|
|
6456d4bdb5 |
@@ -41,6 +41,13 @@ runs:
|
||||
username: ${{ inputs.dockerhub-image-registry-username }}
|
||||
password: ${{ inputs.dockerhub-image-registry-token }}
|
||||
|
||||
- name: Set up qemu for multi-platform builds
|
||||
uses: docker/setup-qemu-action@v3
|
||||
with:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
# The latest version will lead to segmentation fault.
|
||||
image: tonistiigi/binfmt:qemu-v7.0.0-28
|
||||
|
||||
- name: Build and push dev-builder-ubuntu image
|
||||
shell: bash
|
||||
if: ${{ inputs.build-dev-builder-ubuntu == 'true' }}
|
||||
@@ -69,8 +76,8 @@ runs:
|
||||
run: |
|
||||
make dev-builder \
|
||||
BASE_IMAGE=android \
|
||||
BUILDX_MULTI_PLATFORM_BUILD=amd64 \
|
||||
IMAGE_REGISTRY=${{ inputs.dockerhub-image-registry }} \
|
||||
IMAGE_NAMESPACE=${{ inputs.dockerhub-image-namespace }} \
|
||||
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }} && \
|
||||
DEV_BUILDER_IMAGE_TAG=${{ inputs.version }}
|
||||
|
||||
docker push ${{ inputs.dockerhub-image-registry }}/${{ inputs.dockerhub-image-namespace }}/dev-builder-android:${{ inputs.version }}
|
||||
|
||||
2
.github/actions/start-runner/action.yml
vendored
2
.github/actions/start-runner/action.yml
vendored
@@ -56,7 +56,7 @@ runs:
|
||||
|
||||
- name: Start EC2 runner
|
||||
if: startsWith(inputs.runner, 'ec2')
|
||||
uses: machulav/ec2-github-runner@v2.3.8
|
||||
uses: machulav/ec2-github-runner@v2
|
||||
id: start-linux-arm64-ec2-runner
|
||||
with:
|
||||
mode: start
|
||||
|
||||
2
.github/actions/stop-runner/action.yml
vendored
2
.github/actions/stop-runner/action.yml
vendored
@@ -33,7 +33,7 @@ runs:
|
||||
|
||||
- name: Stop EC2 runner
|
||||
if: ${{ inputs.label && inputs.ec2-instance-id }}
|
||||
uses: machulav/ec2-github-runner@v2.3.8
|
||||
uses: machulav/ec2-github-runner@v2
|
||||
with:
|
||||
mode: stop
|
||||
label: ${{ inputs.label }}
|
||||
|
||||
2
.github/workflows/apidoc.yml
vendored
2
.github/workflows/apidoc.yml
vendored
@@ -14,7 +14,7 @@ name: Build API docs
|
||||
|
||||
jobs:
|
||||
apidoc:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
|
||||
22
.github/workflows/dev-build.yml
vendored
22
.github/workflows/dev-build.yml
vendored
@@ -16,11 +16,11 @@ on:
|
||||
description: The runner uses to build linux-amd64 artifacts
|
||||
default: ec2-c6i.4xlarge-amd64
|
||||
options:
|
||||
- ubuntu-22.04
|
||||
- ubuntu-22.04-8-cores
|
||||
- ubuntu-22.04-16-cores
|
||||
- ubuntu-22.04-32-cores
|
||||
- ubuntu-22.04-64-cores
|
||||
- ubuntu-20.04
|
||||
- ubuntu-20.04-8-cores
|
||||
- ubuntu-20.04-16-cores
|
||||
- ubuntu-20.04-32-cores
|
||||
- ubuntu-20.04-64-cores
|
||||
- ec2-c6i.xlarge-amd64 # 4C8G
|
||||
- ec2-c6i.2xlarge-amd64 # 8C16G
|
||||
- ec2-c6i.4xlarge-amd64 # 16C32G
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
allocate-runners:
|
||||
name: Allocate runners
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
outputs:
|
||||
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
|
||||
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
|
||||
@@ -218,7 +218,7 @@ jobs:
|
||||
build-linux-amd64-artifacts,
|
||||
build-linux-arm64-artifacts,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
outputs:
|
||||
build-result: ${{ steps.set-build-result.outputs.build-result }}
|
||||
steps:
|
||||
@@ -251,7 +251,7 @@ jobs:
|
||||
allocate-runners,
|
||||
release-images-to-dockerhub,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -283,7 +283,7 @@ jobs:
|
||||
name: Stop linux-amd64 runner
|
||||
# Only run this job when the runner is allocated.
|
||||
if: ${{ always() }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [
|
||||
allocate-runners,
|
||||
build-linux-amd64-artifacts,
|
||||
@@ -309,7 +309,7 @@ jobs:
|
||||
name: Stop linux-arm64 runner
|
||||
# Only run this job when the runner is allocated.
|
||||
if: ${{ always() }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [
|
||||
allocate-runners,
|
||||
build-linux-arm64-artifacts,
|
||||
@@ -337,7 +337,7 @@ jobs:
|
||||
needs: [
|
||||
release-images-to-dockerhub
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
|
||||
22
.github/workflows/develop.yml
vendored
22
.github/workflows/develop.yml
vendored
@@ -23,7 +23,7 @@ concurrency:
|
||||
jobs:
|
||||
check-typos-and-docs:
|
||||
name: Check typos and docs
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@@ -36,7 +36,7 @@ jobs:
|
||||
|| (echo "'config/config.md' is not up-to-date, please run 'make config-docs'." && exit 1)
|
||||
|
||||
license-header-check:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
name: Check License Header
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -49,7 +49,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ ubuntu-latest ]
|
||||
os: [ ubuntu-20.04 ]
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
|
||||
toml:
|
||||
name: Toml Check
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -89,7 +89,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ ubuntu-latest ]
|
||||
os: [ ubuntu-20.04 ]
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -248,7 +248,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ ubuntu-latest ]
|
||||
os: [ ubuntu-20.04 ]
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -568,7 +568,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ ubuntu-latest ]
|
||||
os: [ ubuntu-20.04 ]
|
||||
mode:
|
||||
- name: "Basic"
|
||||
opts: ""
|
||||
@@ -607,7 +607,7 @@ jobs:
|
||||
|
||||
fmt:
|
||||
name: Rustfmt
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -624,7 +624,7 @@ jobs:
|
||||
|
||||
clippy:
|
||||
name: Clippy
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -710,7 +710,7 @@ jobs:
|
||||
|
||||
coverage:
|
||||
if: github.event_name == 'merge_group'
|
||||
runs-on: ubuntu-22.04-8-cores
|
||||
runs-on: ubuntu-20.04-8-cores
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -770,7 +770,7 @@ jobs:
|
||||
# compat:
|
||||
# name: Compatibility Test
|
||||
# needs: build
|
||||
# runs-on: ubuntu-22.04
|
||||
# runs-on: ubuntu-20.04
|
||||
# timeout-minutes: 60
|
||||
# steps:
|
||||
# - uses: actions/checkout@v4
|
||||
|
||||
2
.github/workflows/docbot.yml
vendored
2
.github/workflows/docbot.yml
vendored
@@ -9,7 +9,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
docbot:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: read
|
||||
|
||||
16
.github/workflows/docs.yml
vendored
16
.github/workflows/docs.yml
vendored
@@ -31,7 +31,7 @@ name: CI
|
||||
jobs:
|
||||
typos:
|
||||
name: Spell Check with Typos
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
- uses: crate-ci/typos@master
|
||||
|
||||
license-header-check:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
name: Check License Header
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -49,29 +49,29 @@ jobs:
|
||||
|
||||
check:
|
||||
name: Check
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- run: 'echo "No action required"'
|
||||
|
||||
fmt:
|
||||
name: Rustfmt
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- run: 'echo "No action required"'
|
||||
|
||||
clippy:
|
||||
name: Clippy
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- run: 'echo "No action required"'
|
||||
|
||||
coverage:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- run: 'echo "No action required"'
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- run: 'echo "No action required"'
|
||||
|
||||
@@ -80,7 +80,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ ubuntu-latest ]
|
||||
os: [ ubuntu-20.04 ]
|
||||
mode:
|
||||
- name: "Basic"
|
||||
- name: "Remote WAL"
|
||||
|
||||
22
.github/workflows/nightly-build.yml
vendored
22
.github/workflows/nightly-build.yml
vendored
@@ -14,11 +14,11 @@ on:
|
||||
description: The runner uses to build linux-amd64 artifacts
|
||||
default: ec2-c6i.4xlarge-amd64
|
||||
options:
|
||||
- ubuntu-22.04
|
||||
- ubuntu-22.04-8-cores
|
||||
- ubuntu-22.04-16-cores
|
||||
- ubuntu-22.04-32-cores
|
||||
- ubuntu-22.04-64-cores
|
||||
- ubuntu-20.04
|
||||
- ubuntu-20.04-8-cores
|
||||
- ubuntu-20.04-16-cores
|
||||
- ubuntu-20.04-32-cores
|
||||
- ubuntu-20.04-64-cores
|
||||
- ec2-c6i.xlarge-amd64 # 4C8G
|
||||
- ec2-c6i.2xlarge-amd64 # 8C16G
|
||||
- ec2-c6i.4xlarge-amd64 # 16C32G
|
||||
@@ -70,7 +70,7 @@ jobs:
|
||||
allocate-runners:
|
||||
name: Allocate runners
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
outputs:
|
||||
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
|
||||
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
|
||||
@@ -182,7 +182,7 @@ jobs:
|
||||
build-linux-amd64-artifacts,
|
||||
build-linux-arm64-artifacts,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
outputs:
|
||||
nightly-build-result: ${{ steps.set-nightly-build-result.outputs.nightly-build-result }}
|
||||
steps:
|
||||
@@ -214,7 +214,7 @@ jobs:
|
||||
allocate-runners,
|
||||
release-images-to-dockerhub,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
# When we push to ACR, it's easy to fail due to some unknown network issues.
|
||||
# However, we don't want to fail the whole workflow because of this.
|
||||
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
|
||||
@@ -249,7 +249,7 @@ jobs:
|
||||
name: Stop linux-amd64 runner
|
||||
# Only run this job when the runner is allocated.
|
||||
if: ${{ always() }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [
|
||||
allocate-runners,
|
||||
build-linux-amd64-artifacts,
|
||||
@@ -275,7 +275,7 @@ jobs:
|
||||
name: Stop linux-arm64 runner
|
||||
# Only run this job when the runner is allocated.
|
||||
if: ${{ always() }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [
|
||||
allocate-runners,
|
||||
build-linux-arm64-artifacts,
|
||||
@@ -303,7 +303,7 @@ jobs:
|
||||
needs: [
|
||||
release-images-to-dockerhub
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
permissions:
|
||||
issues: write
|
||||
env:
|
||||
|
||||
6
.github/workflows/nightly-ci.yml
vendored
6
.github/workflows/nightly-ci.yml
vendored
@@ -13,7 +13,7 @@ jobs:
|
||||
sqlness-test:
|
||||
name: Run sqlness test
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@@ -133,7 +133,7 @@ jobs:
|
||||
name: Check status
|
||||
needs: [sqlness-test, sqlness-windows, test-on-windows]
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
outputs:
|
||||
check-result: ${{ steps.set-check-result.outputs.check-result }}
|
||||
steps:
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' && always() }} # Not requiring successful dependent jobs, always run.
|
||||
name: Send notification to Greptime team
|
||||
needs: [check-status]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
env:
|
||||
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_DEVELOP_CHANNEL }}
|
||||
steps:
|
||||
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
release-dev-builder-images:
|
||||
name: Release dev builder images
|
||||
if: ${{ inputs.release_dev_builder_ubuntu_image || inputs.release_dev_builder_centos_image || inputs.release_dev_builder_android_image }} # Only manually trigger this job.
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04-16-cores
|
||||
outputs:
|
||||
version: ${{ steps.set-version.outputs.version }}
|
||||
steps:
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
|
||||
release-dev-builder-images-ecr:
|
||||
name: Release dev builder images to AWS ECR
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [
|
||||
release-dev-builder-images
|
||||
]
|
||||
@@ -148,7 +148,7 @@ jobs:
|
||||
|
||||
release-dev-builder-images-cn: # Note: Be careful issue: https://github.com/containers/skopeo/issues/1874 and we decide to use the latest stable skopeo container.
|
||||
name: Release dev builder images to CN region
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [
|
||||
release-dev-builder-images
|
||||
]
|
||||
|
||||
26
.github/workflows/release.yml
vendored
26
.github/workflows/release.yml
vendored
@@ -18,11 +18,11 @@ on:
|
||||
description: The runner uses to build linux-amd64 artifacts
|
||||
default: ec2-c6i.4xlarge-amd64
|
||||
options:
|
||||
- ubuntu-22.04
|
||||
- ubuntu-22.04-8-cores
|
||||
- ubuntu-22.04-16-cores
|
||||
- ubuntu-22.04-32-cores
|
||||
- ubuntu-22.04-64-cores
|
||||
- ubuntu-20.04
|
||||
- ubuntu-20.04-8-cores
|
||||
- ubuntu-20.04-16-cores
|
||||
- ubuntu-20.04-32-cores
|
||||
- ubuntu-20.04-64-cores
|
||||
- ec2-c6i.xlarge-amd64 # 4C8G
|
||||
- ec2-c6i.2xlarge-amd64 # 8C16G
|
||||
- ec2-c6i.4xlarge-amd64 # 16C32G
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
allocate-runners:
|
||||
name: Allocate runners
|
||||
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
outputs:
|
||||
linux-amd64-runner: ${{ steps.start-linux-amd64-runner.outputs.label }}
|
||||
linux-arm64-runner: ${{ steps.start-linux-arm64-runner.outputs.label }}
|
||||
@@ -299,7 +299,7 @@ jobs:
|
||||
build-linux-amd64-artifacts,
|
||||
build-linux-arm64-artifacts,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-2004-16-cores
|
||||
outputs:
|
||||
build-image-result: ${{ steps.set-build-image-result.outputs.build-image-result }}
|
||||
steps:
|
||||
@@ -335,7 +335,7 @@ jobs:
|
||||
build-windows-artifacts,
|
||||
release-images-to-dockerhub,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
# When we push to ACR, it's easy to fail due to some unknown network issues.
|
||||
# However, we don't want to fail the whole workflow because of this.
|
||||
# The ACR have daily sync with DockerHub, so don't worry about the image not being updated.
|
||||
@@ -377,7 +377,7 @@ jobs:
|
||||
build-windows-artifacts,
|
||||
release-images-to-dockerhub,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@@ -396,7 +396,7 @@ jobs:
|
||||
name: Stop linux-amd64 runner
|
||||
# Only run this job when the runner is allocated.
|
||||
if: ${{ always() }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [
|
||||
allocate-runners,
|
||||
build-linux-amd64-artifacts,
|
||||
@@ -422,7 +422,7 @@ jobs:
|
||||
name: Stop linux-arm64 runner
|
||||
# Only run this job when the runner is allocated.
|
||||
if: ${{ always() }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
needs: [
|
||||
allocate-runners,
|
||||
build-linux-arm64-artifacts,
|
||||
@@ -448,7 +448,7 @@ jobs:
|
||||
name: Bump doc version
|
||||
if: ${{ github.event_name == 'push' || github.event_name == 'schedule' }}
|
||||
needs: [allocate-runners]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
|
||||
permissions:
|
||||
issues: write # Allows the action to create issues for cyborg.
|
||||
@@ -475,7 +475,7 @@ jobs:
|
||||
build-macos-artifacts,
|
||||
build-windows-artifacts,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
# Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs
|
||||
permissions:
|
||||
issues: write # Allows the action to create issues for cyborg.
|
||||
|
||||
2
.github/workflows/semantic-pull-request.yml
vendored
2
.github/workflows/semantic-pull-request.yml
vendored
@@ -13,7 +13,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -4167,7 +4167,6 @@ dependencies = [
|
||||
"bytes",
|
||||
"cache",
|
||||
"catalog",
|
||||
"chrono",
|
||||
"client",
|
||||
"common-base",
|
||||
"common-catalog",
|
||||
|
||||
2
Makefile
2
Makefile
@@ -60,6 +60,8 @@ ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), all)
|
||||
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64,linux/arm64 --push
|
||||
else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), amd64)
|
||||
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/amd64 --push
|
||||
else ifeq ($(BUILDX_MULTI_PLATFORM_BUILD), arm64)
|
||||
BUILDX_MULTI_PLATFORM_BUILD_OPTS := --platform linux/arm64 --push
|
||||
else
|
||||
BUILDX_MULTI_PLATFORM_BUILD_OPTS := -o type=docker
|
||||
endif
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM ubuntu:22.04 as builder
|
||||
FROM ubuntu:20.04 as builder
|
||||
|
||||
ARG CARGO_PROFILE
|
||||
ARG FEATURES
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM ubuntu:latest
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# The binary name of GreptimeDB executable.
|
||||
# Defaults to "greptime", but sometimes in other projects it might be different.
|
||||
|
||||
@@ -41,7 +41,7 @@ RUN mv protoc3/include/* /usr/local/include/
|
||||
# and the repositories are pulled from trusted sources (still us, of course). Doing so does not violate the intention
|
||||
# of the Git's addition to the "safe.directory" at the first place (see the commit message here:
|
||||
# https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9).
|
||||
# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using
|
||||
# There's also another solution to this, that we add the desired submodules to the safe directory, instead of using
|
||||
# wildcard here. However, that requires the git's config files and the submodules all owned by the very same user.
|
||||
# It's troublesome to do this since the dev build runs in Docker, which is under user "root"; while outside the Docker,
|
||||
# it can be a different user that have prepared the submodules.
|
||||
|
||||
@@ -38,7 +38,6 @@ use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef};
|
||||
use session::context::{Channel, QueryContext};
|
||||
use snafu::prelude::*;
|
||||
use table::dist_table::DistTable;
|
||||
use table::metadata::TableId;
|
||||
use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME};
|
||||
use table::table_name::TableName;
|
||||
use table::TableRef;
|
||||
@@ -287,28 +286,6 @@ impl CatalogManager for KvBackendCatalogManager {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
async fn tables_by_ids(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table_ids: &[TableId],
|
||||
) -> Result<Vec<TableRef>> {
|
||||
let table_info_values = self
|
||||
.table_metadata_manager
|
||||
.table_info_manager()
|
||||
.batch_get(table_ids)
|
||||
.await
|
||||
.context(TableMetadataManagerSnafu)?;
|
||||
|
||||
let tables = table_info_values
|
||||
.into_values()
|
||||
.filter(|t| t.table_info.catalog_name == catalog && t.table_info.schema_name == schema)
|
||||
.map(build_table)
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(tables)
|
||||
}
|
||||
|
||||
fn tables<'a>(
|
||||
&'a self,
|
||||
catalog: &'a str,
|
||||
|
||||
@@ -87,14 +87,6 @@ pub trait CatalogManager: Send + Sync {
|
||||
query_ctx: Option<&QueryContext>,
|
||||
) -> Result<Option<TableRef>>;
|
||||
|
||||
/// Returns the tables by table ids.
|
||||
async fn tables_by_ids(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table_ids: &[TableId],
|
||||
) -> Result<Vec<TableRef>>;
|
||||
|
||||
/// Returns all tables with a stream by catalog and schema.
|
||||
fn tables<'a>(
|
||||
&'a self,
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::any::Any;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, RwLock, Weak};
|
||||
|
||||
use async_stream::{stream, try_stream};
|
||||
@@ -28,7 +28,6 @@ use common_meta::kv_backend::memory::MemoryKvBackend;
|
||||
use futures_util::stream::BoxStream;
|
||||
use session::context::QueryContext;
|
||||
use snafu::OptionExt;
|
||||
use table::metadata::TableId;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
|
||||
@@ -144,33 +143,6 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn tables_by_ids(
|
||||
&self,
|
||||
catalog: &str,
|
||||
schema: &str,
|
||||
table_ids: &[TableId],
|
||||
) -> Result<Vec<TableRef>> {
|
||||
let catalogs = self.catalogs.read().unwrap();
|
||||
|
||||
let schemas = catalogs.get(catalog).context(CatalogNotFoundSnafu {
|
||||
catalog_name: catalog,
|
||||
})?;
|
||||
|
||||
let tables = schemas
|
||||
.get(schema)
|
||||
.context(SchemaNotFoundSnafu { catalog, schema })?;
|
||||
|
||||
let filter_ids: HashSet<_> = table_ids.iter().collect();
|
||||
// It is very inefficient, but we do not need to optimize it since it will not be called in `MemoryCatalogManager`.
|
||||
let tables = tables
|
||||
.values()
|
||||
.filter(|t| filter_ids.contains(&t.table_info().table_id()))
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(tables)
|
||||
}
|
||||
|
||||
fn tables<'a>(
|
||||
&'a self,
|
||||
catalog: &'a str,
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
mod client;
|
||||
pub mod client_manager;
|
||||
#[cfg(feature = "testing")]
|
||||
mod database;
|
||||
pub mod error;
|
||||
pub mod flow;
|
||||
@@ -33,6 +34,7 @@ pub use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
||||
use snafu::OptionExt;
|
||||
|
||||
pub use self::client::Client;
|
||||
#[cfg(feature = "testing")]
|
||||
pub use self::database::Database;
|
||||
pub use self::error::{Error, Result};
|
||||
use crate::error::{IllegalDatabaseResponseSnafu, ServerSnafu};
|
||||
|
||||
@@ -32,7 +32,7 @@ use common_meta::key::TableMetadataManager;
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::logging::TracingOptions;
|
||||
use common_version::{short_version, version};
|
||||
use flow::{FlownodeBuilder, FlownodeInstance, FrontendClient, FrontendInvoker};
|
||||
use flow::{FlownodeBuilder, FlownodeInstance, FrontendInvoker};
|
||||
use meta_client::{MetaClientOptions, MetaClientType};
|
||||
use servers::Mode;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -317,8 +317,6 @@ impl StartCommand {
|
||||
Arc::new(executor),
|
||||
);
|
||||
|
||||
let frontend_client = FrontendClient::from_meta_client(meta_client.clone());
|
||||
|
||||
let flow_metadata_manager = Arc::new(FlowMetadataManager::new(cached_meta_backend.clone()));
|
||||
let flownode_builder = FlownodeBuilder::new(
|
||||
opts,
|
||||
@@ -326,7 +324,6 @@ impl StartCommand {
|
||||
table_metadata_manager,
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager,
|
||||
Arc::new(frontend_client),
|
||||
)
|
||||
.with_heartbeat_task(heartbeat_task);
|
||||
|
||||
|
||||
@@ -54,10 +54,7 @@ use datanode::config::{DatanodeOptions, ProcedureConfig, RegionEngineConfig, Sto
|
||||
use datanode::datanode::{Datanode, DatanodeBuilder};
|
||||
use datanode::region_server::RegionServer;
|
||||
use file_engine::config::EngineConfig as FileEngineConfig;
|
||||
use flow::{
|
||||
FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendClient,
|
||||
FrontendInvoker,
|
||||
};
|
||||
use flow::{FlowConfig, FlowWorkerManager, FlownodeBuilder, FlownodeOptions, FrontendInvoker};
|
||||
use frontend::frontend::FrontendOptions;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance as FeInstance, StandaloneDatanodeManager};
|
||||
@@ -536,16 +533,12 @@ impl StartCommand {
|
||||
flow: opts.flow.clone(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fe_server_addr = fe_opts.grpc.bind_addr.clone();
|
||||
let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
|
||||
let flow_builder = FlownodeBuilder::new(
|
||||
flownode_options,
|
||||
plugins.clone(),
|
||||
table_metadata_manager.clone(),
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager.clone(),
|
||||
Arc::new(frontend_client),
|
||||
);
|
||||
let flownode = Arc::new(
|
||||
flow_builder
|
||||
|
||||
@@ -130,10 +130,3 @@ pub const SEMANTIC_TYPE_TIME_INDEX: &str = "TIMESTAMP";
|
||||
pub fn is_readonly_schema(schema: &str) -> bool {
|
||||
matches!(schema, INFORMATION_SCHEMA_NAME)
|
||||
}
|
||||
|
||||
// ---- special table and fields ----
|
||||
pub const TRACE_ID_COLUMN: &str = "trace_id";
|
||||
pub const SPAN_ID_COLUMN: &str = "span_id";
|
||||
pub const SPAN_NAME_COLUMN: &str = "span_name";
|
||||
pub const PARENT_SPAN_ID_COLUMN: &str = "parent_span_id";
|
||||
// ---- End of special table and fields ----
|
||||
|
||||
@@ -12,16 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Two UDAFs are implemented for HyperLogLog:
|
||||
//!
|
||||
//! - `hll`: Accepts a string column and aggregates the values into a
|
||||
//! HyperLogLog state.
|
||||
//! - `hll_merge`: Accepts a binary column of states generated by `hll`
|
||||
//! and merges them into a single state.
|
||||
//!
|
||||
//! The states can be then used to estimate the cardinality of the
|
||||
//! values in the column by `hll_count` UDF.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::*;
|
||||
|
||||
@@ -12,12 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Implementation of the `uddsketch_state` UDAF that generate the state of
|
||||
//! UDDSketch for a given set of values.
|
||||
//!
|
||||
//! The generated state can be used to compute approximate quantiles using
|
||||
//! `uddsketch_calc` UDF.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::*;
|
||||
|
||||
@@ -12,16 +12,24 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! # Deprecate Warning:
|
||||
//!
|
||||
//! This module is deprecated and will be removed in the future.
|
||||
//! All UDAF implementation here are not maintained and should
|
||||
//! not be used before they are refactored into the `src/aggr`
|
||||
//! version.
|
||||
mod argmax;
|
||||
mod argmin;
|
||||
mod diff;
|
||||
mod mean;
|
||||
mod polyval;
|
||||
mod scipy_stats_norm_cdf;
|
||||
mod scipy_stats_norm_pdf;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use argmax::ArgmaxAccumulatorCreator;
|
||||
pub use argmin::ArgminAccumulatorCreator;
|
||||
use common_query::logical_plan::AggregateFunctionCreatorRef;
|
||||
pub use diff::DiffAccumulatorCreator;
|
||||
pub use mean::MeanAccumulatorCreator;
|
||||
pub use polyval::PolyvalAccumulatorCreator;
|
||||
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
|
||||
pub use scipy_stats_norm_pdf::ScipyStatsNormPdfAccumulatorCreator;
|
||||
|
||||
use crate::function_registry::FunctionRegistry;
|
||||
use crate::scalars::vector::product::VectorProductCreator;
|
||||
@@ -68,22 +76,31 @@ pub(crate) struct AggregateFunctions;
|
||||
|
||||
impl AggregateFunctions {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
|
||||
"vec_sum",
|
||||
1,
|
||||
Arc::new(|| Arc::new(VectorSumCreator::default())),
|
||||
)));
|
||||
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
|
||||
"vec_product",
|
||||
1,
|
||||
Arc::new(|| Arc::new(VectorProductCreator::default())),
|
||||
)));
|
||||
macro_rules! register_aggr_func {
|
||||
($name :expr, $arg_count :expr, $creator :ty) => {
|
||||
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
|
||||
$name,
|
||||
$arg_count,
|
||||
Arc::new(|| Arc::new(<$creator>::default())),
|
||||
)));
|
||||
};
|
||||
}
|
||||
|
||||
register_aggr_func!("diff", 1, DiffAccumulatorCreator);
|
||||
register_aggr_func!("mean", 1, MeanAccumulatorCreator);
|
||||
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
|
||||
register_aggr_func!("argmax", 1, ArgmaxAccumulatorCreator);
|
||||
register_aggr_func!("argmin", 1, ArgminAccumulatorCreator);
|
||||
register_aggr_func!("scipystatsnormcdf", 2, ScipyStatsNormCdfAccumulatorCreator);
|
||||
register_aggr_func!("scipystatsnormpdf", 2, ScipyStatsNormPdfAccumulatorCreator);
|
||||
register_aggr_func!("vec_sum", 1, VectorSumCreator);
|
||||
register_aggr_func!("vec_product", 1, VectorProductCreator);
|
||||
|
||||
#[cfg(feature = "geo")]
|
||||
registry.register_aggregate_function(Arc::new(AggregateFunctionMeta::new(
|
||||
register_aggr_func!(
|
||||
"json_encode_path",
|
||||
3,
|
||||
Arc::new(|| Arc::new(super::geo::encoding::JsonPathEncodeFunctionCreator::default())),
|
||||
)));
|
||||
super::geo::encoding::JsonPathEncodeFunctionCreator
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
208
src/common/function/src/scalars/aggregate/argmax.rs
Normal file
208
src/common/function/src/scalars/aggregate/argmax.rs
Normal file
@@ -0,0 +1,208 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::{LogicalPrimitiveType, WrapperType};
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::ensure;
|
||||
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
|
||||
// return the index of the max value
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Argmax<T> {
|
||||
max: Option<T>,
|
||||
n: u64,
|
||||
}
|
||||
|
||||
impl<T> Argmax<T>
|
||||
where
|
||||
T: PartialOrd + Copy,
|
||||
{
|
||||
fn update(&mut self, value: T, index: u64) {
|
||||
if let Some(Ordering::Less) = self.max.partial_cmp(&Some(value)) {
|
||||
self.max = Some(value);
|
||||
self.n = index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Accumulator for Argmax<T>
|
||||
where
|
||||
T: WrapperType + PartialOrd,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
match self.max {
|
||||
Some(max) => Ok(vec![max.into(), self.n.into()]),
|
||||
_ => Ok(vec![Value::Null, self.n.into()]),
|
||||
}
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let column = &values[0];
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
for (i, v) in column.iter_data().enumerate() {
|
||||
if let Some(value) = v {
|
||||
self.update(value, i as u64);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(
|
||||
states.len() == 2,
|
||||
BadAccumulatorImplSnafu {
|
||||
err_msg: "expect 2 states in `merge_batch`",
|
||||
}
|
||||
);
|
||||
|
||||
let max = &states[0];
|
||||
let index = &states[1];
|
||||
let max: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(max) };
|
||||
let index: &<u64 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
|
||||
index
|
||||
.iter_data()
|
||||
.flatten()
|
||||
.zip(max.iter_data().flatten())
|
||||
.for_each(|(i, max)| self.update(max, i));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
match self.max {
|
||||
Some(_) => Ok(self.n.into()),
|
||||
_ => Ok(Value::Null),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct ArgmaxAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Argmax::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"ARGMAX\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::uint64_datatype())
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
let input_types = self.input_types()?;
|
||||
|
||||
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
|
||||
|
||||
Ok(vec![
|
||||
input_types.into_iter().next().unwrap(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
argmax.update_batch(&[]).unwrap();
|
||||
assert_eq!(Value::Null, argmax.evaluate().unwrap());
|
||||
|
||||
// test update one not-null value
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
argmax.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
argmax.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Null, argmax.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(3),
|
||||
]))];
|
||||
argmax.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(2_u64), argmax.evaluate().unwrap());
|
||||
|
||||
// test update null-value batch
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(4),
|
||||
]))];
|
||||
argmax.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(2_u64), argmax.evaluate().unwrap());
|
||||
|
||||
// test update with constant vector
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
argmax.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
|
||||
}
|
||||
}
|
||||
216
src/common/function/src/scalars/aggregate/argmin.rs
Normal file
216
src/common/function/src/scalars/aggregate/argmin.rs
Normal file
@@ -0,0 +1,216 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, InvalidInputStateSnafu, Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::ensure;
|
||||
|
||||
// // https://numpy.org/doc/stable/reference/generated/numpy.argmin.html
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Argmin<T> {
|
||||
min: Option<T>,
|
||||
n: u32,
|
||||
}
|
||||
|
||||
impl<T> Argmin<T>
|
||||
where
|
||||
T: Copy + PartialOrd,
|
||||
{
|
||||
fn update(&mut self, value: T, index: u32) {
|
||||
match self.min {
|
||||
Some(min) => {
|
||||
if let Some(Ordering::Greater) = min.partial_cmp(&value) {
|
||||
self.min = Some(value);
|
||||
self.n = index;
|
||||
}
|
||||
}
|
||||
None => {
|
||||
self.min = Some(value);
|
||||
self.n = index;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Accumulator for Argmin<T>
|
||||
where
|
||||
T: WrapperType + PartialOrd,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
match self.min {
|
||||
Some(min) => Ok(vec![min.into(), self.n.into()]),
|
||||
_ => Ok(vec![Value::Null, self.n.into()]),
|
||||
}
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(values.len() == 1, InvalidInputStateSnafu);
|
||||
|
||||
let column = &values[0];
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
for (i, v) in column.iter_data().enumerate() {
|
||||
if let Some(value) = v {
|
||||
self.update(value, i as u32);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(
|
||||
states.len() == 2,
|
||||
BadAccumulatorImplSnafu {
|
||||
err_msg: "expect 2 states in `merge_batch`",
|
||||
}
|
||||
);
|
||||
|
||||
let min = &states[0];
|
||||
let index = &states[1];
|
||||
let min: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(min) };
|
||||
let index: &<u32 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
|
||||
index
|
||||
.iter_data()
|
||||
.flatten()
|
||||
.zip(min.iter_data().flatten())
|
||||
.for_each(|(i, min)| self.update(min, i));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
match self.min {
|
||||
Some(_) => Ok(self.n.into()),
|
||||
_ => Ok(Value::Null),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct ArgminAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for ArgminAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Argmin::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"ARGMIN\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::uint32_datatype())
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
let input_types = self.input_types()?;
|
||||
|
||||
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
|
||||
|
||||
Ok(vec![
|
||||
input_types.into_iter().next().unwrap(),
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
argmin.update_batch(&[]).unwrap();
|
||||
assert_eq!(Value::Null, argmin.evaluate().unwrap());
|
||||
|
||||
// test update one not-null value
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
argmin.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
argmin.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Null, argmin.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(3),
|
||||
]))];
|
||||
argmin.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
|
||||
|
||||
// test update null-value batch
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(4),
|
||||
]))];
|
||||
argmin.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
|
||||
|
||||
// test update with constant vector
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
argmin.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
|
||||
}
|
||||
}
|
||||
252
src/common/function/src/scalars/aggregate/diff.rs
Normal file
252
src/common/function/src/scalars/aggregate/diff.rs
Normal file
@@ -0,0 +1,252 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, InvalidInputStateSnafu,
|
||||
Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.diff.html
|
||||
// I is the input type, O is the output type.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Diff<I, O> {
|
||||
values: Vec<I>,
|
||||
_phantom: PhantomData<O>,
|
||||
}
|
||||
|
||||
impl<I, O> Diff<I, O> {
|
||||
fn push(&mut self, value: I) {
|
||||
self.values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
impl<I, O> Accumulator for Diff<I, O>
|
||||
where
|
||||
I: WrapperType,
|
||||
O: WrapperType,
|
||||
I::Native: AsPrimitive<O::Native>,
|
||||
O::Native: std::ops::Sub<Output = O::Native>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
.values
|
||||
.iter()
|
||||
.map(|&n| n.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![Value::List(ListValue::new(
|
||||
nums,
|
||||
I::LogicalType::build_data_type(),
|
||||
))])
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(values.len() == 1, InvalidInputStateSnafu);
|
||||
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<I as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let states = &states[0];
|
||||
let states = states
|
||||
.as_any()
|
||||
.downcast_ref::<ListVector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect ListVector, got vector type {}",
|
||||
states.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
for state in states.values_iter() {
|
||||
if let Some(state) = state.context(FromScalarValueSnafu)? {
|
||||
self.update_batch(&[state])?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
if self.values.is_empty() || self.values.len() == 1 {
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
let diff = self
|
||||
.values
|
||||
.windows(2)
|
||||
.map(|x| {
|
||||
let native = x[1].into_native().as_() - x[0].into_native().as_();
|
||||
O::from_native(native).into()
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
let diff = Value::List(ListValue::new(diff, O::LogicalType::build_data_type()));
|
||||
Ok(diff)
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct DiffAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Diff::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"DIFF\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
|
||||
with_match_primitive_type_id!(
|
||||
input_types[0].logical_type_id(),
|
||||
|$S| {
|
||||
Ok(ConcreteDataType::list_datatype($S::default().into()))
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
|
||||
with_match_primitive_type_id!(
|
||||
input_types[0].logical_type_id(),
|
||||
|$S| {
|
||||
Ok(vec![ConcreteDataType::list_datatype($S::default().into())])
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
diff.update_batch(&[]).unwrap();
|
||||
assert!(diff.values.is_empty());
|
||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||
|
||||
// test update one not-null value
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
diff.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
diff.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
]))];
|
||||
let values = vec![Value::from(2_i64), Value::from(1_i64)];
|
||||
diff.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
|
||||
diff.evaluate().unwrap()
|
||||
);
|
||||
|
||||
// test update null-value batch
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
]))];
|
||||
let values = vec![Value::from(5_i64), Value::from(1_i64)];
|
||||
diff.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
|
||||
diff.evaluate().unwrap()
|
||||
);
|
||||
|
||||
// test update with constant vector
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
4,
|
||||
))];
|
||||
let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
|
||||
diff.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(values, ConcreteDataType::int64_datatype())),
|
||||
diff.evaluate().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
238
src/common/function/src/scalars/aggregate/mean.rs
Normal file
238
src/common/function/src/scalars/aggregate/mean.rs
Normal file
@@ -0,0 +1,238 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu, InvalidInputStateSnafu,
|
||||
Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::WrapperType;
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, UInt64Vector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Mean<T> {
|
||||
sum: f64,
|
||||
n: u64,
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T> Mean<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
#[inline(always)]
|
||||
fn push(&mut self, value: T) {
|
||||
self.sum += value.into_native().as_();
|
||||
self.n += 1;
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn update(&mut self, sum: f64, n: u64) {
|
||||
self.sum += sum;
|
||||
self.n += n;
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Accumulator for Mean<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
Ok(vec![self.sum.into(), self.n.into()])
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(values.len() == 1, InvalidInputStateSnafu);
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(
|
||||
states.len() == 2,
|
||||
BadAccumulatorImplSnafu {
|
||||
err_msg: "expect 2 states in `merge_batch`",
|
||||
}
|
||||
);
|
||||
|
||||
let sum = &states[0];
|
||||
let n = &states[1];
|
||||
|
||||
let sum = sum
|
||||
.as_any()
|
||||
.downcast_ref::<Float64Vector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect Float64Vector, got vector type {}",
|
||||
sum.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
|
||||
let n = n
|
||||
.as_any()
|
||||
.downcast_ref::<UInt64Vector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect UInt64Vector, got vector type {}",
|
||||
sum.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
|
||||
sum.iter_data().zip(n.iter_data()).for_each(|(sum, n)| {
|
||||
if let (Some(sum), Some(n)) = (sum, n) {
|
||||
self.update(sum, n);
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
if self.n == 0 {
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
let values = self.sum / self.n as f64;
|
||||
Ok(values.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct MeanAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for MeanAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Mean::<<$S as LogicalPrimitiveType>::Native>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"MEAN\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
|
||||
Ok(vec![
|
||||
ConcreteDataType::float64_datatype(),
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut mean = Mean::<i32>::default();
|
||||
mean.update_batch(&[]).unwrap();
|
||||
assert_eq!(Value::Null, mean.evaluate().unwrap());
|
||||
|
||||
// test update one not-null value
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
mean.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(42.0_f64), mean.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
mean.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Null, mean.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
]))];
|
||||
mean.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(0.6666666666666666), mean.evaluate().unwrap());
|
||||
|
||||
// test update null-value batch
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
]))];
|
||||
mean.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(1.6666666666666667), mean.evaluate().unwrap());
|
||||
|
||||
// test update with constant vector
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
mean.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::from(4.0), mean.evaluate().unwrap());
|
||||
}
|
||||
}
|
||||
329
src/common/function/src/scalars/aggregate/polyval.rs
Normal file
329
src/common/function/src/scalars/aggregate/polyval.rs
Normal file
@@ -0,0 +1,329 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
|
||||
FromScalarValueSnafu, InvalidInputColSnafu, InvalidInputStateSnafu, Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::{LogicalPrimitiveType, WrapperType};
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, Helper, Int64Vector, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.polyval.html
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Polyval<T, PolyT>
|
||||
where
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
|
||||
{
|
||||
values: Vec<T>,
|
||||
// DataFusion casts constant in into i64 type.
|
||||
x: Option<i64>,
|
||||
_phantom: PhantomData<PolyT>,
|
||||
}
|
||||
|
||||
impl<T, PolyT> Polyval<T, PolyT>
|
||||
where
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, PolyT> Accumulator for Polyval<T, PolyT>
|
||||
where
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType + std::iter::Sum<<PolyT as WrapperType>::Native>,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native> + std::iter::Sum<PolyT::Native>,
|
||||
i64: AsPrimitive<<PolyT as WrapperType>::Native>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
.values
|
||||
.iter()
|
||||
.map(|&n| n.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
|
||||
self.x.into(),
|
||||
])
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(values.len() == 2, InvalidInputStateSnafu);
|
||||
ensure!(values[0].len() == values[1].len(), InvalidInputStateSnafu);
|
||||
if values[0].len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
// This is a unary accumulator, so only one column is provided.
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
});
|
||||
|
||||
let x = &values[1];
|
||||
let x = Helper::check_get_scalar::<i64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"POLYVAL\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
||||
let first = x.get(0);
|
||||
ensure!(!first.is_null(), InvalidInputColSnafu);
|
||||
|
||||
for i in 1..x.len() {
|
||||
ensure!(first == x.get(i), InvalidInputColSnafu);
|
||||
}
|
||||
|
||||
let first = match first {
|
||||
Value::Int64(v) => v,
|
||||
// unreachable because we have checked `first` is not null and is i64 above
|
||||
_ => unreachable!(),
|
||||
};
|
||||
if let Some(x) = self.x {
|
||||
ensure!(x == first, InvalidInputColSnafu);
|
||||
} else {
|
||||
self.x = Some(first);
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// DataFusion executes accumulators in partitions. In some execution stage, DataFusion will
|
||||
// merge states from other accumulators (returned by `state()` method).
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(
|
||||
states.len() == 2,
|
||||
BadAccumulatorImplSnafu {
|
||||
err_msg: "expect 2 states in `merge_batch`",
|
||||
}
|
||||
);
|
||||
|
||||
let x = &states[1];
|
||||
let x = x
|
||||
.as_any()
|
||||
.downcast_ref::<Int64Vector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect Int64Vector, got vector type {}",
|
||||
x.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
let x = x.get(0);
|
||||
if x.is_null() {
|
||||
return Ok(());
|
||||
}
|
||||
let x = match x {
|
||||
Value::Int64(x) => x,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
self.x = Some(x);
|
||||
|
||||
let values = &states[0];
|
||||
let values = values
|
||||
.as_any()
|
||||
.downcast_ref::<ListVector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect ListVector, got vector type {}",
|
||||
values.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// DataFusion expects this function to return the final value of this aggregator.
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
if self.values.is_empty() {
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
let x = if let Some(x) = self.x {
|
||||
x
|
||||
} else {
|
||||
return Ok(Value::Null);
|
||||
};
|
||||
let len = self.values.len();
|
||||
let polyval: PolyT = self
|
||||
.values
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, &value)| value.into_native().as_() * x.pow((len - 1 - i) as u32).as_())
|
||||
.sum();
|
||||
Ok(polyval.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct PolyvalAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Polyval::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"POLYVAL\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
|
||||
let input_type = self.input_types()?[0].logical_type_id();
|
||||
with_match_primitive_type_id!(
|
||||
input_type,
|
||||
|$S| {
|
||||
Ok(<<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::build_data_type())
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
|
||||
Ok(vec![
|
||||
ConcreteDataType::list_datatype(input_types.into_iter().next().unwrap()),
|
||||
ConcreteDataType::int64_datatype(),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
polyval.update_batch(&[]).unwrap();
|
||||
assert!(polyval.values.is_empty());
|
||||
assert_eq!(Value::Null, polyval.evaluate().unwrap());
|
||||
|
||||
// test update one not-null value
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(3)])),
|
||||
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
|
||||
];
|
||||
polyval.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Int64(3), polyval.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
|
||||
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
|
||||
];
|
||||
polyval.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Null, polyval.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(3), Some(0), Some(1)])),
|
||||
Arc::new(Int64Vector::from(vec![
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
])),
|
||||
];
|
||||
polyval.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Int64(13), polyval.evaluate().unwrap());
|
||||
|
||||
// test update null-value batch
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(3), Some(0), None, Some(1)])),
|
||||
Arc::new(Int64Vector::from(vec![
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
])),
|
||||
];
|
||||
polyval.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Int64(13), polyval.evaluate().unwrap());
|
||||
|
||||
// test update with constant vector
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
2,
|
||||
)),
|
||||
Arc::new(Int64Vector::from(vec![Some(5_i64), Some(5_i64)])),
|
||||
];
|
||||
polyval.update_batch(&v).unwrap();
|
||||
assert_eq!(Value::Int64(24), polyval.evaluate().unwrap());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,270 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
|
||||
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
|
||||
Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use statrs::distribution::{ContinuousCDF, Normal};
|
||||
use statrs::statistics::Statistics;
|
||||
|
||||
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ScipyStatsNormCdf<T> {
|
||||
values: Vec<T>,
|
||||
x: Option<f64>,
|
||||
}
|
||||
|
||||
impl<T> ScipyStatsNormCdf<T> {
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Accumulator for ScipyStatsNormCdf<T>
|
||||
where
|
||||
T: WrapperType + std::iter::Sum<T>,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
.values
|
||||
.iter()
|
||||
.map(|&x| x.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
|
||||
self.x.into(),
|
||||
])
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(values.len() == 2, InvalidInputStateSnafu);
|
||||
ensure!(values[1].len() == values[0].len(), InvalidInputStateSnafu);
|
||||
|
||||
if values[0].len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"SCIPYSTATSNORMCDF\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
let first = x.get(0);
|
||||
ensure!(!first.is_null(), InvalidInputColSnafu);
|
||||
let first = match first {
|
||||
Value::Float64(OrderedFloat(v)) => v,
|
||||
// unreachable because we have checked `first` is not null and is i64 above
|
||||
_ => unreachable!(),
|
||||
};
|
||||
if let Some(x) = self.x {
|
||||
ensure!(x == first, InvalidInputColSnafu);
|
||||
} else {
|
||||
self.x = Some(first);
|
||||
};
|
||||
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(
|
||||
states.len() == 2,
|
||||
BadAccumulatorImplSnafu {
|
||||
err_msg: "expect 2 states in `merge_batch`",
|
||||
}
|
||||
);
|
||||
|
||||
let x = &states[1];
|
||||
let x = x
|
||||
.as_any()
|
||||
.downcast_ref::<Float64Vector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect Float64Vector, got vector type {}",
|
||||
x.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
let x = x.get(0);
|
||||
if x.is_null() {
|
||||
return Ok(());
|
||||
}
|
||||
let x = match x {
|
||||
Value::Float64(OrderedFloat(x)) => x,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
self.x = Some(x);
|
||||
|
||||
let values = &states[0];
|
||||
let values = values
|
||||
.as_any()
|
||||
.downcast_ref::<ListVector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect ListVector, got vector type {}",
|
||||
values.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
|
||||
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
|
||||
if mean.is_nan() || std_dev.is_nan() {
|
||||
Ok(Value::Null)
|
||||
} else {
|
||||
let x = if let Some(x) = self.x {
|
||||
x
|
||||
} else {
|
||||
return Ok(Value::Null);
|
||||
};
|
||||
let n = Normal::new(mean, std_dev).context(GenerateFunctionSnafu)?;
|
||||
Ok(n.cdf(x).into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct ScipyStatsNormCdfAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(ScipyStatsNormCdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"SCIPYSTATSNORMCDF\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
|
||||
Ok(vec![
|
||||
ConcreteDataType::list_datatype(input_types[0].clone()),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||
scipy_stats_norm_cdf.update_batch(&[]).unwrap();
|
||||
assert!(scipy_stats_norm_cdf.values.is_empty());
|
||||
assert_eq!(Value::Null, scipy_stats_norm_cdf.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
])),
|
||||
];
|
||||
scipy_stats_norm_cdf.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::from(0.8086334555398362),
|
||||
scipy_stats_norm_cdf.evaluate().unwrap()
|
||||
);
|
||||
|
||||
// test update null-value batch
|
||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
None,
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
])),
|
||||
];
|
||||
scipy_stats_norm_cdf.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::from(0.5412943699039795),
|
||||
scipy_stats_norm_cdf.evaluate().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,271 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
self, BadAccumulatorImplSnafu, CreateAccumulatorSnafu, DowncastVectorSnafu,
|
||||
FromScalarValueSnafu, GenerateFunctionSnafu, InvalidInputColSnafu, InvalidInputStateSnafu,
|
||||
Result,
|
||||
};
|
||||
use common_query::logical_plan::accumulator::AggrFuncTypeStore;
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use statrs::distribution::{Continuous, Normal};
|
||||
use statrs::statistics::Statistics;
|
||||
|
||||
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ScipyStatsNormPdf<T> {
|
||||
values: Vec<T>,
|
||||
x: Option<f64>,
|
||||
}
|
||||
|
||||
impl<T> ScipyStatsNormPdf<T> {
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Accumulator for ScipyStatsNormPdf<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
.values
|
||||
.iter()
|
||||
.map(|&x| x.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(nums, T::LogicalType::build_data_type())),
|
||||
self.x.into(),
|
||||
])
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(values.len() == 2, InvalidInputStateSnafu);
|
||||
ensure!(values[1].len() == values[0].len(), InvalidInputStateSnafu);
|
||||
|
||||
if values[0].len() == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"SCIPYSTATSNORMPDF\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
let first = x.get(0);
|
||||
ensure!(!first.is_null(), InvalidInputColSnafu);
|
||||
let first = match first {
|
||||
Value::Float64(OrderedFloat(v)) => v,
|
||||
// unreachable because we have checked `first` is not null and is i64 above
|
||||
_ => unreachable!(),
|
||||
};
|
||||
if let Some(x) = self.x {
|
||||
ensure!(x == first, InvalidInputColSnafu);
|
||||
} else {
|
||||
self.x = Some(first);
|
||||
};
|
||||
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(
|
||||
states.len() == 2,
|
||||
BadAccumulatorImplSnafu {
|
||||
err_msg: "expect 2 states in `merge_batch`",
|
||||
}
|
||||
);
|
||||
|
||||
let x = &states[1];
|
||||
let x = x
|
||||
.as_any()
|
||||
.downcast_ref::<Float64Vector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect Float64Vector, got vector type {}",
|
||||
x.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
let x = x.get(0);
|
||||
if x.is_null() {
|
||||
return Ok(());
|
||||
}
|
||||
let x = match x {
|
||||
Value::Float64(OrderedFloat(x)) => x,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
self.x = Some(x);
|
||||
|
||||
let values = &states[0];
|
||||
let values = values
|
||||
.as_any()
|
||||
.downcast_ref::<ListVector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect ListVector, got vector type {}",
|
||||
values.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
|
||||
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
|
||||
|
||||
if mean.is_nan() || std_dev.is_nan() {
|
||||
Ok(Value::Null)
|
||||
} else {
|
||||
let x = if let Some(x) = self.x {
|
||||
x
|
||||
} else {
|
||||
return Ok(Value::Null);
|
||||
};
|
||||
let n = Normal::new(mean, std_dev).context(GenerateFunctionSnafu)?;
|
||||
Ok(n.pdf(x).into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct ScipyStatsNormPdfAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(ScipyStatsNormPdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"SCIPYSTATSNORMpdf\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 2, InvalidInputStateSnafu);
|
||||
Ok(vec![
|
||||
ConcreteDataType::list_datatype(input_types[0].clone()),
|
||||
ConcreteDataType::float64_datatype(),
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||
scipy_stats_norm_pdf.update_batch(&[]).unwrap();
|
||||
assert!(scipy_stats_norm_pdf.values.is_empty());
|
||||
assert_eq!(Value::Null, scipy_stats_norm_pdf.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
])),
|
||||
];
|
||||
scipy_stats_norm_pdf.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::from(0.17843340219081558),
|
||||
scipy_stats_norm_pdf.evaluate().unwrap()
|
||||
);
|
||||
|
||||
// test update null-value batch
|
||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
None,
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
])),
|
||||
];
|
||||
scipy_stats_norm_pdf.update_batch(&v).unwrap();
|
||||
assert_eq!(
|
||||
Value::from(0.12343972049858312),
|
||||
scipy_stats_norm_pdf.evaluate().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -343,7 +343,6 @@ pub enum FlowType {
|
||||
impl FlowType {
|
||||
pub const RECORDING_RULE: &str = "recording_rule";
|
||||
pub const STREAMING: &str = "streaming";
|
||||
pub const FLOW_TYPE_KEY: &str = "flow_type";
|
||||
}
|
||||
|
||||
impl Default for FlowType {
|
||||
@@ -399,8 +398,7 @@ impl From<&CreateFlowData> for CreateRequest {
|
||||
};
|
||||
|
||||
let flow_type = value.flow_type.unwrap_or_default().to_string();
|
||||
req.flow_options
|
||||
.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
|
||||
req.flow_options.insert("flow_type".to_string(), flow_type);
|
||||
req
|
||||
}
|
||||
}
|
||||
@@ -432,7 +430,7 @@ impl From<&CreateFlowData> for (FlowInfoValue, Vec<(FlowPartitionId, FlowRouteVa
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let flow_type = value.flow_type.unwrap_or_default().to_string();
|
||||
options.insert(FlowType::FLOW_TYPE_KEY.to_string(), flow_type);
|
||||
options.insert("flow_type".to_string(), flow_type);
|
||||
|
||||
let flow_info = FlowInfoValue {
|
||||
source_table_ids: value.source_table_ids.clone(),
|
||||
|
||||
@@ -24,6 +24,7 @@ use datatypes::arrow::datatypes::DataType as ArrowDatatype;
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use snafu::{Location, Snafu};
|
||||
use statrs::StatsError;
|
||||
|
||||
#[derive(Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
@@ -37,6 +38,14 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to generate function"))]
|
||||
GenerateFunction {
|
||||
#[snafu(source)]
|
||||
error: StatsError,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to cast scalar value into vector"))]
|
||||
FromScalarValue {
|
||||
#[snafu(implicit)]
|
||||
@@ -88,6 +97,12 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("unexpected: not constant column"))]
|
||||
InvalidInputCol {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("General DataFusion error"))]
|
||||
GeneralDataFusion {
|
||||
#[snafu(source)]
|
||||
@@ -233,6 +248,8 @@ impl ErrorExt for Error {
|
||||
Error::CreateAccumulator { .. }
|
||||
| Error::DowncastVector { .. }
|
||||
| Error::InvalidInputState { .. }
|
||||
| Error::InvalidInputCol { .. }
|
||||
| Error::GenerateFunction { .. }
|
||||
| Error::BadAccumulatorImpl { .. }
|
||||
| Error::ToScalarValue { .. }
|
||||
| Error::GetScalarVector { .. }
|
||||
|
||||
@@ -597,7 +597,7 @@ impl fmt::Display for FulltextAnalyzer {
|
||||
}
|
||||
|
||||
/// Skipping options for a column.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct SkippingIndexOptions {
|
||||
/// The granularity of the skip index.
|
||||
@@ -607,15 +607,6 @@ pub struct SkippingIndexOptions {
|
||||
pub index_type: SkippingIndexType,
|
||||
}
|
||||
|
||||
impl Default for SkippingIndexOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
granularity: DEFAULT_GRANULARITY,
|
||||
index_type: SkippingIndexType::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SkippingIndexOptions {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "granularity={}", self.granularity)?;
|
||||
|
||||
@@ -16,7 +16,6 @@ async-trait.workspace = true
|
||||
bytes.workspace = true
|
||||
cache.workspace = true
|
||||
catalog.workspace = true
|
||||
chrono.workspace = true
|
||||
client.workspace = true
|
||||
common-base.workspace = true
|
||||
common-config.workspace = true
|
||||
|
||||
@@ -49,13 +49,12 @@ pub(crate) use crate::adapter::node_context::FlownodeContext;
|
||||
use crate::adapter::refill::RefillTask;
|
||||
use crate::adapter::table_source::ManagedTableSource;
|
||||
use crate::adapter::util::relation_desc_to_column_schemas_with_fallback;
|
||||
pub(crate) use crate::adapter::worker::{create_worker, WorkerHandle};
|
||||
pub(crate) use crate::adapter::worker::{create_worker, Worker, WorkerHandle};
|
||||
use crate::compute::ErrCollector;
|
||||
use crate::df_optimizer::sql_to_flow_plan;
|
||||
use crate::error::{EvalSnafu, ExternalSnafu, InternalSnafu, InvalidQuerySnafu, UnexpectedSnafu};
|
||||
use crate::expr::Batch;
|
||||
use crate::metrics::{METRIC_FLOW_INSERT_ELAPSED, METRIC_FLOW_ROWS, METRIC_FLOW_RUN_INTERVAL_MS};
|
||||
use crate::recording_rules::RecordingRuleEngine;
|
||||
use crate::repr::{self, DiffRow, RelationDesc, Row, BATCH_SIZE};
|
||||
|
||||
mod flownode_impl;
|
||||
@@ -64,7 +63,7 @@ pub(crate) mod refill;
|
||||
mod stat;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
pub(crate) mod util;
|
||||
mod util;
|
||||
mod worker;
|
||||
|
||||
pub(crate) mod node_context;
|
||||
@@ -172,8 +171,6 @@ pub struct FlowWorkerManager {
|
||||
flush_lock: RwLock<()>,
|
||||
/// receive a oneshot sender to send state size report
|
||||
state_report_handler: RwLock<Option<StateReportHandler>>,
|
||||
/// engine for recording rule
|
||||
rule_engine: RecordingRuleEngine,
|
||||
}
|
||||
|
||||
/// Building FlownodeManager
|
||||
@@ -188,7 +185,6 @@ impl FlowWorkerManager {
|
||||
node_id: Option<u32>,
|
||||
query_engine: Arc<dyn QueryEngine>,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
rule_engine: RecordingRuleEngine,
|
||||
) -> Self {
|
||||
let srv_map = ManagedTableSource::new(
|
||||
table_meta.table_info_manager().clone(),
|
||||
@@ -211,7 +207,6 @@ impl FlowWorkerManager {
|
||||
node_id,
|
||||
flush_lock: RwLock::new(()),
|
||||
state_report_handler: RwLock::new(None),
|
||||
rule_engine,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -220,6 +215,25 @@ impl FlowWorkerManager {
|
||||
self
|
||||
}
|
||||
|
||||
/// Create a flownode manager with one worker
|
||||
pub fn new_with_workers<'s>(
|
||||
node_id: Option<u32>,
|
||||
query_engine: Arc<dyn QueryEngine>,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
num_workers: usize,
|
||||
) -> (Self, Vec<Worker<'s>>) {
|
||||
let mut zelf = Self::new(node_id, query_engine, table_meta);
|
||||
|
||||
let workers: Vec<_> = (0..num_workers)
|
||||
.map(|_| {
|
||||
let (handle, worker) = create_worker();
|
||||
zelf.add_worker_handle(handle);
|
||||
worker
|
||||
})
|
||||
.collect();
|
||||
(zelf, workers)
|
||||
}
|
||||
|
||||
/// add a worker handler to manager, meaning this corresponding worker is under it's manage
|
||||
pub fn add_worker_handle(&mut self, handle: WorkerHandle) {
|
||||
self.worker_handles.push(handle);
|
||||
@@ -737,11 +751,7 @@ pub struct CreateFlowArgs {
|
||||
/// Create&Remove flow
|
||||
impl FlowWorkerManager {
|
||||
/// remove a flow by it's id
|
||||
#[allow(unreachable_code)]
|
||||
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
|
||||
// TODO(discord9): reroute some back to streaming engine later
|
||||
return self.rule_engine.remove_flow(flow_id).await;
|
||||
|
||||
for handle in self.worker_handles.iter() {
|
||||
if handle.contains_flow(flow_id).await? {
|
||||
handle.remove_flow(flow_id).await?;
|
||||
@@ -757,10 +767,8 @@ impl FlowWorkerManager {
|
||||
/// steps to create task:
|
||||
/// 1. parse query into typed plan(and optional parse expire_after expr)
|
||||
/// 2. render source/sink with output table id and used input table id
|
||||
#[allow(clippy::too_many_arguments, unreachable_code)]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
|
||||
// TODO(discord9): reroute some back to streaming engine later
|
||||
return self.rule_engine.create_flow(args).await;
|
||||
let CreateFlowArgs {
|
||||
flow_id,
|
||||
sink_table_name,
|
||||
|
||||
@@ -153,13 +153,7 @@ impl Flownode for FlowWorkerManager {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unreachable_code, unused)]
|
||||
async fn handle_inserts(&self, request: InsertRequests) -> Result<FlowResponse> {
|
||||
return self
|
||||
.rule_engine
|
||||
.handle_inserts(request)
|
||||
.await
|
||||
.map_err(to_meta_err(snafu::location!()));
|
||||
// using try_read to ensure two things:
|
||||
// 1. flush wouldn't happen until inserts before it is inserted
|
||||
// 2. inserts happening concurrently with flush wouldn't be block by flush
|
||||
@@ -212,15 +206,15 @@ impl Flownode for FlowWorkerManager {
|
||||
.collect_vec();
|
||||
let table_col_names = table_schema.relation_desc.names;
|
||||
let table_col_names = table_col_names
|
||||
.iter().enumerate()
|
||||
.map(|(idx,name)| match name {
|
||||
Some(name) => Ok(name.clone()),
|
||||
None => InternalSnafu {
|
||||
reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
|
||||
}
|
||||
.fail().map_err(BoxedError::new).context(ExternalSnafu),
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
.iter().enumerate()
|
||||
.map(|(idx,name)| match name {
|
||||
Some(name) => Ok(name.clone()),
|
||||
None => InternalSnafu {
|
||||
reason: format!("Expect column {idx} of table id={table_id} to have name in table schema, found None"),
|
||||
}
|
||||
.fail().map_err(BoxedError::new).context(ExternalSnafu),
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let name_to_col = HashMap::<_, _>::from_iter(
|
||||
insert_schema
|
||||
.iter()
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Some utility functions
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use arrow_schema::ArrowError;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_error::{define_into_tonic_status, from_err_code_msg_to_header};
|
||||
use common_macro::stack_trace_debug;
|
||||
@@ -54,13 +53,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Time error"))]
|
||||
Time {
|
||||
source: common_time::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("External error"))]
|
||||
External {
|
||||
source: BoxedError,
|
||||
@@ -164,15 +156,6 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Arrow error: {raw:?} in context: {context}"))]
|
||||
Arrow {
|
||||
#[snafu(source)]
|
||||
raw: ArrowError,
|
||||
context: String,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Datafusion error: {raw:?} in context: {context}"))]
|
||||
Datafusion {
|
||||
#[snafu(source)]
|
||||
@@ -247,7 +230,6 @@ impl ErrorExt for Error {
|
||||
match self {
|
||||
Self::Eval { .. }
|
||||
| Self::JoinTask { .. }
|
||||
| Self::Arrow { .. }
|
||||
| Self::Datafusion { .. }
|
||||
| Self::InsertIntoFlow { .. } => StatusCode::Internal,
|
||||
Self::FlowAlreadyExist { .. } => StatusCode::TableAlreadyExists,
|
||||
@@ -256,9 +238,7 @@ impl ErrorExt for Error {
|
||||
| Self::FlowNotFound { .. }
|
||||
| Self::ListFlows { .. } => StatusCode::TableNotFound,
|
||||
Self::Plan { .. } | Self::Datatypes { .. } => StatusCode::PlanQuery,
|
||||
Self::InvalidQuery { .. } | Self::CreateFlow { .. } | Self::Time { .. } => {
|
||||
StatusCode::EngineExecuteQuery
|
||||
}
|
||||
Self::InvalidQuery { .. } | Self::CreateFlow { .. } => StatusCode::EngineExecuteQuery,
|
||||
Self::Unexpected { .. } => StatusCode::Unexpected,
|
||||
Self::NotImplemented { .. } | Self::UnsupportedTemporalFilter { .. } => {
|
||||
StatusCode::Unsupported
|
||||
|
||||
@@ -238,7 +238,6 @@ mod test {
|
||||
|
||||
for (sql, current, expected) in &testcases {
|
||||
let plan = sql_to_substrait(engine.clone(), sql).await;
|
||||
|
||||
let mut ctx = create_test_ctx();
|
||||
let flow_plan = TypedPlan::from_substrait_plan(&mut ctx, &plan)
|
||||
.await
|
||||
|
||||
@@ -130,6 +130,13 @@ impl HeartbeatTask {
|
||||
|
||||
pub fn shutdown(&self) {
|
||||
info!("Close heartbeat task for flownode");
|
||||
if self
|
||||
.running
|
||||
.compare_exchange(true, false, Ordering::AcqRel, Ordering::Acquire)
|
||||
.is_err()
|
||||
{
|
||||
warn!("Call close heartbeat task multiple times");
|
||||
}
|
||||
}
|
||||
|
||||
fn new_heartbeat_request(
|
||||
|
||||
@@ -33,7 +33,6 @@ mod expr;
|
||||
pub mod heartbeat;
|
||||
mod metrics;
|
||||
mod plan;
|
||||
mod recording_rules;
|
||||
mod repr;
|
||||
mod server;
|
||||
mod transform;
|
||||
@@ -44,5 +43,4 @@ mod test_utils;
|
||||
|
||||
pub use adapter::{FlowConfig, FlowWorkerManager, FlowWorkerManagerRef, FlownodeOptions};
|
||||
pub use error::{Error, Result};
|
||||
pub use recording_rules::FrontendClient;
|
||||
pub use server::{FlownodeBuilder, FlownodeInstance, FlownodeServer, FrontendInvoker};
|
||||
|
||||
@@ -28,32 +28,6 @@ lazy_static! {
|
||||
&["table_id"]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_FLOW_RULE_ENGINE_QUERY_TIME: HistogramVec = register_histogram_vec!(
|
||||
"greptime_flow_rule_engine_query_time",
|
||||
"flow rule engine query time",
|
||||
&["flow_id"],
|
||||
vec![
|
||||
0.0,
|
||||
1.,
|
||||
3.,
|
||||
5.,
|
||||
10.,
|
||||
20.,
|
||||
30.,
|
||||
60.,
|
||||
2. * 60.,
|
||||
5. * 60.,
|
||||
10. * 60.
|
||||
]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_FLOW_RULE_ENGINE_SLOW_QUERY: HistogramVec = register_histogram_vec!(
|
||||
"greptime_flow_rule_engine_slow_query",
|
||||
"flow rule engine slow query",
|
||||
&["flow_id", "sql", "peer"],
|
||||
vec![60., 2. * 60., 3. * 60., 5. * 60., 10. * 60.]
|
||||
)
|
||||
.unwrap();
|
||||
pub static ref METRIC_FLOW_RUN_INTERVAL_MS: IntGauge =
|
||||
register_int_gauge!("greptime_flow_run_interval_ms", "flow run interval in ms").unwrap();
|
||||
pub static ref METRIC_FLOW_ROWS: IntCounterVec = register_int_counter_vec!(
|
||||
|
||||
@@ -1,882 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Run flow as recording rule which is time-window-aware normal query triggered every tick set by user
|
||||
|
||||
mod engine;
|
||||
mod frontend_client;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::pb_value_to_value_ref;
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::DfRecordBatch;
|
||||
use common_telemetry::warn;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion::logical_expr::Expr;
|
||||
use datafusion::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion::sql::unparser::Unparser;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion, TreeNodeRewriter};
|
||||
use datafusion_common::{DFSchema, TableReference};
|
||||
use datafusion_expr::{ColumnarValue, LogicalPlan};
|
||||
use datafusion_physical_expr::PhysicalExprRef;
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
use datatypes::scalars::ScalarVector;
|
||||
use datatypes::schema::TIME_INDEX_KEY;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{
|
||||
TimestampMicrosecondVector, TimestampMillisecondVector, TimestampNanosecondVector,
|
||||
TimestampSecondVector, Vector,
|
||||
};
|
||||
pub use engine::RecordingRuleEngine;
|
||||
pub use frontend_client::FrontendClient;
|
||||
use itertools::Itertools;
|
||||
use query::parser::QueryLanguageParser;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::adapter::util::from_proto_to_data_type;
|
||||
use crate::df_optimizer::apply_df_optimizer;
|
||||
use crate::error::{ArrowSnafu, DatafusionSnafu, DatatypesSnafu, ExternalSnafu, UnexpectedSnafu};
|
||||
use crate::expr::error::DataTypeSnafu;
|
||||
use crate::Error;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TimeWindowExpr {
|
||||
expr: PhysicalExprRef,
|
||||
column_name: String,
|
||||
}
|
||||
|
||||
impl TimeWindowExpr {
|
||||
pub fn new(expr: PhysicalExprRef, column_name: String) -> Self {
|
||||
Self { expr, column_name }
|
||||
}
|
||||
|
||||
pub fn from_expr(expr: &Expr, column_name: &str, df_schema: &DFSchema) -> Result<Self, Error> {
|
||||
let phy_planner = DefaultPhysicalPlanner::default();
|
||||
|
||||
let phy_expr: PhysicalExprRef = phy_planner
|
||||
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!(
|
||||
"Failed to create physical expression from {expr:?} using {df_schema:?}"
|
||||
),
|
||||
})?;
|
||||
Ok(Self {
|
||||
expr: phy_expr,
|
||||
column_name: column_name.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Find timestamps from rows using time window expr
|
||||
pub async fn handle_rows(
|
||||
&self,
|
||||
rows_list: Vec<api::v1::Rows>,
|
||||
) -> Result<BTreeSet<Timestamp>, Error> {
|
||||
let mut time_windows = BTreeSet::new();
|
||||
|
||||
for rows in rows_list {
|
||||
// pick the time index column and use it to eval on `self.expr`
|
||||
let ts_col_index = rows
|
||||
.schema
|
||||
.iter()
|
||||
.map(|col| col.column_name.clone())
|
||||
.position(|name| name == self.column_name);
|
||||
let Some(ts_col_index) = ts_col_index else {
|
||||
warn!("can't found time index column in schema: {:?}", rows.schema);
|
||||
continue;
|
||||
};
|
||||
let col_schema = &rows.schema[ts_col_index];
|
||||
let cdt = from_proto_to_data_type(col_schema)?;
|
||||
|
||||
let column_values = rows
|
||||
.rows
|
||||
.iter()
|
||||
.map(|row| &row.values[ts_col_index])
|
||||
.collect_vec();
|
||||
|
||||
let mut vector = cdt.create_mutable_vector(column_values.len());
|
||||
for value in column_values {
|
||||
let value = pb_value_to_value_ref(value, &None);
|
||||
vector.try_push_value_ref(value).context(DataTypeSnafu {
|
||||
msg: "Failed to convert rows to columns",
|
||||
})?;
|
||||
}
|
||||
let vector = vector.to_vector();
|
||||
|
||||
let df_schema = create_df_schema_for_ts_column(&self.column_name, cdt)?;
|
||||
|
||||
let rb =
|
||||
DfRecordBatch::try_new(df_schema.inner().clone(), vec![vector.to_arrow_array()])
|
||||
.with_context(|_e| ArrowSnafu {
|
||||
context: format!(
|
||||
"Failed to create record batch from {df_schema:?} and {vector:?}"
|
||||
),
|
||||
})?;
|
||||
|
||||
let eval_res = self.expr.evaluate(&rb).with_context(|_| DatafusionSnafu {
|
||||
context: format!(
|
||||
"Failed to evaluate physical expression {:?} on {rb:?}",
|
||||
self.expr
|
||||
),
|
||||
})?;
|
||||
|
||||
let res = columnar_to_ts_vector(&eval_res)?;
|
||||
|
||||
for ts in res.into_iter().flatten() {
|
||||
time_windows.insert(ts);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(time_windows)
|
||||
}
|
||||
}
|
||||
|
||||
fn create_df_schema_for_ts_column(name: &str, cdt: ConcreteDataType) -> Result<DFSchema, Error> {
|
||||
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
|
||||
name,
|
||||
cdt.as_arrow_type(),
|
||||
false,
|
||||
)]));
|
||||
|
||||
let df_schema = DFSchema::from_field_specific_qualified_schema(
|
||||
vec![Some(TableReference::bare("TimeIndexOnlyTable"))],
|
||||
&arrow_schema,
|
||||
)
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
|
||||
})?;
|
||||
|
||||
Ok(df_schema)
|
||||
}
|
||||
|
||||
/// Convert `ColumnarValue` to `Vec<Option<Timestamp>>`
|
||||
fn columnar_to_ts_vector(columnar: &ColumnarValue) -> Result<Vec<Option<Timestamp>>, Error> {
|
||||
let val = match columnar {
|
||||
datafusion_expr::ColumnarValue::Array(array) => {
|
||||
let ty = array.data_type();
|
||||
let ty = ConcreteDataType::from_arrow_type(ty);
|
||||
let time_unit = if let ConcreteDataType::Timestamp(ty) = ty {
|
||||
ty.unit()
|
||||
} else {
|
||||
return UnexpectedSnafu {
|
||||
reason: format!("Non-timestamp type: {ty:?}"),
|
||||
}
|
||||
.fail();
|
||||
};
|
||||
|
||||
match time_unit {
|
||||
TimeUnit::Second => TimestampSecondVector::try_from_arrow_array(array.clone())
|
||||
.with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to create vector from arrow array {array:?}"),
|
||||
})?
|
||||
.iter_data()
|
||||
.map(|d| d.map(|d| d.0))
|
||||
.collect_vec(),
|
||||
TimeUnit::Millisecond => {
|
||||
TimestampMillisecondVector::try_from_arrow_array(array.clone())
|
||||
.with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to create vector from arrow array {array:?}"),
|
||||
})?
|
||||
.iter_data()
|
||||
.map(|d| d.map(|d| d.0))
|
||||
.collect_vec()
|
||||
}
|
||||
TimeUnit::Microsecond => {
|
||||
TimestampMicrosecondVector::try_from_arrow_array(array.clone())
|
||||
.with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to create vector from arrow array {array:?}"),
|
||||
})?
|
||||
.iter_data()
|
||||
.map(|d| d.map(|d| d.0))
|
||||
.collect_vec()
|
||||
}
|
||||
TimeUnit::Nanosecond => {
|
||||
TimestampNanosecondVector::try_from_arrow_array(array.clone())
|
||||
.with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to create vector from arrow array {array:?}"),
|
||||
})?
|
||||
.iter_data()
|
||||
.map(|d| d.map(|d| d.0))
|
||||
.collect_vec()
|
||||
}
|
||||
}
|
||||
}
|
||||
datafusion_expr::ColumnarValue::Scalar(scalar) => {
|
||||
let value = Value::try_from(scalar.clone()).with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to convert scalar {scalar:?} to value"),
|
||||
})?;
|
||||
let ts = value.as_timestamp().context(UnexpectedSnafu {
|
||||
reason: format!("Expect Timestamp, found {:?}", value),
|
||||
})?;
|
||||
vec![Some(ts)]
|
||||
}
|
||||
};
|
||||
Ok(val)
|
||||
}
|
||||
|
||||
/// Convert sql to datafusion logical plan
|
||||
pub async fn sql_to_df_plan(
|
||||
query_ctx: QueryContextRef,
|
||||
engine: QueryEngineRef,
|
||||
sql: &str,
|
||||
optimize: bool,
|
||||
) -> Result<LogicalPlan, Error> {
|
||||
let stmt = QueryLanguageParser::parse_sql(sql, &query_ctx)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let plan = engine
|
||||
.planner()
|
||||
.plan(&stmt, query_ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let plan = if optimize {
|
||||
apply_df_optimizer(plan).await?
|
||||
} else {
|
||||
plan
|
||||
};
|
||||
Ok(plan)
|
||||
}
|
||||
|
||||
/// Return (the column name of time index column, the time window expr, the expected time unit of time index column, the expr's schema for evaluating the time window)
|
||||
async fn find_time_window_expr(
|
||||
plan: &LogicalPlan,
|
||||
catalog_man: CatalogManagerRef,
|
||||
query_ctx: QueryContextRef,
|
||||
) -> Result<(String, Option<datafusion_expr::Expr>, TimeUnit, DFSchema), Error> {
|
||||
// TODO(discord9): find the expr that do time window
|
||||
|
||||
let mut table_name = None;
|
||||
|
||||
// first find the table source in the logical plan
|
||||
plan.apply(|plan| {
|
||||
let LogicalPlan::TableScan(table_scan) = plan else {
|
||||
return Ok(TreeNodeRecursion::Continue);
|
||||
};
|
||||
table_name = Some(table_scan.table_name.clone());
|
||||
Ok(TreeNodeRecursion::Stop)
|
||||
})
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!("Can't find table source in plan {plan:?}"),
|
||||
})?;
|
||||
let Some(table_name) = table_name else {
|
||||
UnexpectedSnafu {
|
||||
reason: format!("Can't find table source in plan {plan:?}"),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
|
||||
let current_schema = query_ctx.current_schema();
|
||||
|
||||
let catalog_name = table_name.catalog().unwrap_or(query_ctx.current_catalog());
|
||||
let schema_name = table_name.schema().unwrap_or(¤t_schema);
|
||||
let table_name = table_name.table();
|
||||
|
||||
let Some(table_ref) = catalog_man
|
||||
.table(catalog_name, schema_name, table_name, Some(&query_ctx))
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?
|
||||
else {
|
||||
UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Can't find table {table_name:?} in catalog {catalog_name:?}/{schema_name:?}"
|
||||
),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
|
||||
let schema = &table_ref.table_info().meta.schema;
|
||||
|
||||
let ts_index = schema.timestamp_column().context(UnexpectedSnafu {
|
||||
reason: format!("Can't find timestamp column in table {table_name:?}"),
|
||||
})?;
|
||||
|
||||
let ts_col_name = ts_index.name.clone();
|
||||
|
||||
let expected_time_unit = ts_index.data_type.as_timestamp().with_context(|| UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Expected timestamp column {ts_col_name:?} in table {table_name:?} to be timestamp, but got {ts_index:?}"
|
||||
),
|
||||
})?.unit();
|
||||
|
||||
let arrow_schema = Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
|
||||
ts_col_name.clone(),
|
||||
ts_index.data_type.as_arrow_type(),
|
||||
false,
|
||||
)]));
|
||||
|
||||
let df_schema = DFSchema::from_field_specific_qualified_schema(
|
||||
vec![Some(TableReference::bare(table_name))],
|
||||
&arrow_schema,
|
||||
)
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!("Failed to create DFSchema from arrow schema {arrow_schema:?}"),
|
||||
})?;
|
||||
|
||||
// find the time window expr which refers to the time index column
|
||||
let mut aggr_expr = None;
|
||||
let mut time_window_expr: Option<Expr> = None;
|
||||
|
||||
let find_inner_aggr_expr = |plan: &LogicalPlan| {
|
||||
if let LogicalPlan::Aggregate(aggregate) = plan {
|
||||
aggr_expr = Some(aggregate.clone());
|
||||
};
|
||||
|
||||
Ok(TreeNodeRecursion::Continue)
|
||||
};
|
||||
plan.apply(find_inner_aggr_expr)
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!("Can't find aggr expr in plan {plan:?}"),
|
||||
})?;
|
||||
|
||||
if let Some(aggregate) = aggr_expr {
|
||||
for group_expr in &aggregate.group_expr {
|
||||
let refs = group_expr.column_refs();
|
||||
if refs.len() != 1 {
|
||||
continue;
|
||||
}
|
||||
let ref_col = refs.iter().next().unwrap();
|
||||
|
||||
let index = aggregate.input.schema().maybe_index_of_column(ref_col);
|
||||
let Some(index) = index else {
|
||||
continue;
|
||||
};
|
||||
let field = aggregate.input.schema().field(index);
|
||||
|
||||
let is_time_index = field.metadata().get(TIME_INDEX_KEY) == Some(&"true".to_string());
|
||||
|
||||
if is_time_index {
|
||||
let rewrite_column = group_expr.clone();
|
||||
let rewritten = rewrite_column
|
||||
.rewrite(&mut RewriteColumn {
|
||||
table_name: table_name.to_string(),
|
||||
})
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!("Rewrite expr failed, expr={:?}", group_expr),
|
||||
})?
|
||||
.data;
|
||||
struct RewriteColumn {
|
||||
table_name: String,
|
||||
}
|
||||
|
||||
impl TreeNodeRewriter for RewriteColumn {
|
||||
type Node = Expr;
|
||||
fn f_down(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
|
||||
let Expr::Column(mut column) = node else {
|
||||
return Ok(Transformed::no(node));
|
||||
};
|
||||
|
||||
column.relation = Some(TableReference::bare(self.table_name.clone()));
|
||||
|
||||
Ok(Transformed::yes(Expr::Column(column)))
|
||||
}
|
||||
}
|
||||
|
||||
time_window_expr = Some(rewritten);
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok((ts_col_name, time_window_expr, expected_time_unit, df_schema))
|
||||
} else {
|
||||
// can't found time window expr, return None
|
||||
Ok((ts_col_name, None, expected_time_unit, df_schema))
|
||||
}
|
||||
}
|
||||
|
||||
/// Find nearest lower bound for time `current` in given `plan` for the time window expr.
|
||||
/// i.e. for time window expr being `date_bin(INTERVAL '5 minutes', ts) as time_window` and `current="2021-07-01 00:01:01.000"`,
|
||||
/// return `Some("2021-07-01 00:00:00.000")`
|
||||
/// if `plan` doesn't contain a `TIME INDEX` column, return `None`
|
||||
///
|
||||
/// Time window expr is a expr that:
|
||||
/// 1. ref only to a time index column
|
||||
/// 2. is monotonic increasing
|
||||
/// 3. show up in GROUP BY clause
|
||||
///
|
||||
/// note this plan should only contain one TableScan
|
||||
pub async fn find_plan_time_window_bound(
|
||||
plan: &LogicalPlan,
|
||||
current: Timestamp,
|
||||
query_ctx: QueryContextRef,
|
||||
engine: QueryEngineRef,
|
||||
) -> Result<(String, Option<Timestamp>, Option<Timestamp>), Error> {
|
||||
// TODO(discord9): find the expr that do time window
|
||||
let catalog_man = engine.engine_state().catalog_manager();
|
||||
|
||||
let (ts_col_name, time_window_expr, expected_time_unit, df_schema) =
|
||||
find_time_window_expr(plan, catalog_man.clone(), query_ctx).await?;
|
||||
// cast current to ts_index's type
|
||||
let new_current = current
|
||||
.convert_to(expected_time_unit)
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Failed to cast current timestamp {current:?} to {expected_time_unit}"),
|
||||
})?;
|
||||
|
||||
// if no time_window_expr is found, return None
|
||||
if let Some(time_window_expr) = time_window_expr {
|
||||
let lower_bound =
|
||||
find_expr_time_window_lower_bound(&time_window_expr, &df_schema, new_current)?;
|
||||
let upper_bound =
|
||||
find_expr_time_window_upper_bound(&time_window_expr, &df_schema, new_current)?;
|
||||
Ok((ts_col_name, lower_bound, upper_bound))
|
||||
} else {
|
||||
Ok((ts_col_name, None, None))
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the lower bound of time window in given `expr` and `current` timestamp.
|
||||
///
|
||||
/// i.e. for `current="2021-07-01 00:01:01.000"` and `expr=date_bin(INTERVAL '5 minutes', ts) as time_window` and `ts_col=ts`,
|
||||
/// return `Some("2021-07-01 00:00:00.000")` since it's the lower bound
|
||||
/// of current time window given the current timestamp
|
||||
///
|
||||
/// if return None, meaning this time window have no lower bound
|
||||
fn find_expr_time_window_lower_bound(
|
||||
expr: &Expr,
|
||||
df_schema: &DFSchema,
|
||||
current: Timestamp,
|
||||
) -> Result<Option<Timestamp>, Error> {
|
||||
let phy_planner = DefaultPhysicalPlanner::default();
|
||||
|
||||
let phy_expr: PhysicalExprRef = phy_planner
|
||||
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!(
|
||||
"Failed to create physical expression from {expr:?} using {df_schema:?}"
|
||||
),
|
||||
})?;
|
||||
|
||||
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
|
||||
let input_time_unit = cur_time_window.unit();
|
||||
Ok(cur_time_window.convert_to(input_time_unit))
|
||||
}
|
||||
|
||||
/// Find the upper bound for time window expression
|
||||
fn find_expr_time_window_upper_bound(
|
||||
expr: &Expr,
|
||||
df_schema: &DFSchema,
|
||||
current: Timestamp,
|
||||
) -> Result<Option<Timestamp>, Error> {
|
||||
use std::cmp::Ordering;
|
||||
|
||||
let phy_planner = DefaultPhysicalPlanner::default();
|
||||
|
||||
let phy_expr: PhysicalExprRef = phy_planner
|
||||
.create_physical_expr(expr, df_schema, &SessionContext::new().state())
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!(
|
||||
"Failed to create physical expression from {expr:?} using {df_schema:?}"
|
||||
),
|
||||
})?;
|
||||
|
||||
let cur_time_window = eval_ts_to_ts(&phy_expr, df_schema, current)?;
|
||||
|
||||
// search to find the lower bound
|
||||
let mut offset: i64 = 1;
|
||||
let mut lower_bound = Some(current);
|
||||
let upper_bound;
|
||||
// first expontial probe to found a range for binary search
|
||||
loop {
|
||||
let Some(next_val) = current.value().checked_add(offset) else {
|
||||
// no upper bound if overflow
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let next_time_probe = common_time::Timestamp::new(next_val, current.unit());
|
||||
|
||||
let next_time_window = eval_ts_to_ts(&phy_expr, df_schema, next_time_probe)?;
|
||||
|
||||
match next_time_window.cmp(&cur_time_window) {
|
||||
Ordering::Less => {UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Unsupported time window expression, expect monotonic increasing for time window expression {expr:?}"
|
||||
),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
Ordering::Equal => {
|
||||
lower_bound = Some(next_time_probe);
|
||||
}
|
||||
Ordering::Greater => {
|
||||
upper_bound = Some(next_time_probe);
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
let Some(new_offset) = offset.checked_mul(2) else {
|
||||
// no upper bound if overflow
|
||||
return Ok(None);
|
||||
};
|
||||
offset = new_offset;
|
||||
}
|
||||
|
||||
// binary search for the exact upper bound
|
||||
|
||||
ensure!(lower_bound.map(|v|v.unit())==upper_bound.map(|v|v.unit()), UnexpectedSnafu{
|
||||
reason: format!(" unit mismatch for time window expression {expr:?}, found {lower_bound:?} and {upper_bound:?}"),
|
||||
});
|
||||
|
||||
let output_unit = upper_bound
|
||||
.context(UnexpectedSnafu {
|
||||
reason: "should have lower bound",
|
||||
})?
|
||||
.unit();
|
||||
|
||||
let mut low = lower_bound
|
||||
.context(UnexpectedSnafu {
|
||||
reason: "should have lower bound",
|
||||
})?
|
||||
.value();
|
||||
let mut high = upper_bound
|
||||
.context(UnexpectedSnafu {
|
||||
reason: "should have upper bound",
|
||||
})?
|
||||
.value();
|
||||
while low < high {
|
||||
let mid = (low + high) / 2;
|
||||
let mid_probe = common_time::Timestamp::new(mid, output_unit);
|
||||
let mid_time_window = eval_ts_to_ts(&phy_expr, df_schema, mid_probe)?;
|
||||
|
||||
match mid_time_window.cmp(&cur_time_window) {
|
||||
Ordering::Less => UnexpectedSnafu {
|
||||
reason: format!("Binary search failed for time window expression {expr:?}"),
|
||||
}
|
||||
.fail()?,
|
||||
Ordering::Equal => low = mid + 1,
|
||||
Ordering::Greater => high = mid,
|
||||
}
|
||||
}
|
||||
|
||||
let final_upper_bound_for_time_window = common_time::Timestamp::new(high, output_unit);
|
||||
|
||||
Ok(Some(final_upper_bound_for_time_window))
|
||||
}
|
||||
|
||||
fn eval_ts_to_ts(
|
||||
phy: &PhysicalExprRef,
|
||||
df_schema: &DFSchema,
|
||||
input_value: Timestamp,
|
||||
) -> Result<Timestamp, Error> {
|
||||
let ts_vector = match input_value.unit() {
|
||||
TimeUnit::Second => {
|
||||
TimestampSecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
|
||||
}
|
||||
TimeUnit::Millisecond => {
|
||||
TimestampMillisecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
|
||||
}
|
||||
TimeUnit::Microsecond => {
|
||||
TimestampMicrosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
|
||||
}
|
||||
TimeUnit::Nanosecond => {
|
||||
TimestampNanosecondVector::from_vec(vec![input_value.value()]).to_arrow_array()
|
||||
}
|
||||
};
|
||||
|
||||
let rb = DfRecordBatch::try_new(df_schema.inner().clone(), vec![ts_vector.clone()])
|
||||
.with_context(|_| ArrowSnafu {
|
||||
context: format!("Failed to create record batch from {df_schema:?} and {ts_vector:?}"),
|
||||
})?;
|
||||
|
||||
let eval_res = phy.evaluate(&rb).with_context(|_| DatafusionSnafu {
|
||||
context: format!("Failed to evaluate physical expression {phy:?} on {rb:?}"),
|
||||
})?;
|
||||
|
||||
if let Some(Some(ts)) = columnar_to_ts_vector(&eval_res)?.first() {
|
||||
Ok(*ts)
|
||||
} else {
|
||||
UnexpectedSnafu {
|
||||
reason: format!(
|
||||
"Expected timestamp in expression {phy:?} but got {:?}",
|
||||
eval_res
|
||||
),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(discord9): a method to found out the precise time window
|
||||
|
||||
/// Find out the `Filter` Node corresponding to outermost `WHERE` and add a new filter expr to it
|
||||
#[derive(Debug)]
|
||||
pub struct AddFilterRewriter {
|
||||
extra_filter: Expr,
|
||||
is_rewritten: bool,
|
||||
}
|
||||
|
||||
impl AddFilterRewriter {
|
||||
fn new(filter: Expr) -> Self {
|
||||
Self {
|
||||
extra_filter: filter,
|
||||
is_rewritten: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TreeNodeRewriter for AddFilterRewriter {
|
||||
type Node = LogicalPlan;
|
||||
fn f_up(&mut self, node: Self::Node) -> DfResult<Transformed<Self::Node>> {
|
||||
if self.is_rewritten {
|
||||
return Ok(Transformed::no(node));
|
||||
}
|
||||
match node {
|
||||
LogicalPlan::Filter(mut filter) if !filter.having => {
|
||||
filter.predicate = filter.predicate.and(self.extra_filter.clone());
|
||||
self.is_rewritten = true;
|
||||
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
|
||||
}
|
||||
LogicalPlan::TableScan(_) => {
|
||||
// add a new filter
|
||||
let filter =
|
||||
datafusion_expr::Filter::try_new(self.extra_filter.clone(), Arc::new(node))?;
|
||||
self.is_rewritten = true;
|
||||
Ok(Transformed::yes(LogicalPlan::Filter(filter)))
|
||||
}
|
||||
_ => Ok(Transformed::no(node)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn df_plan_to_sql(plan: &LogicalPlan) -> Result<String, Error> {
|
||||
let unparser = Unparser::default();
|
||||
let sql = unparser
|
||||
.plan_to_sql(plan)
|
||||
.with_context(|_e| DatafusionSnafu {
|
||||
context: format!("Failed to unparse logical plan {plan:?}"),
|
||||
})?;
|
||||
Ok(sql.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datafusion_common::tree_node::TreeNode;
|
||||
use pretty_assertions::assert_eq;
|
||||
use session::context::QueryContext;
|
||||
|
||||
use super::{sql_to_df_plan, *};
|
||||
use crate::recording_rules::{df_plan_to_sql, AddFilterRewriter};
|
||||
use crate::test_utils::create_test_query_engine;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_add_filter() {
|
||||
let testcases = vec![
|
||||
(
|
||||
"SELECT number FROM numbers_with_ts GROUP BY number","SELECT numbers_with_ts.number FROM numbers_with_ts WHERE (number > 4) GROUP BY numbers_with_ts.number"
|
||||
),
|
||||
(
|
||||
"SELECT number FROM numbers_with_ts WHERE number < 2 OR number >10",
|
||||
"SELECT numbers_with_ts.number FROM numbers_with_ts WHERE ((numbers_with_ts.number < 2) OR (numbers_with_ts.number > 10)) AND (number > 4)"
|
||||
),
|
||||
(
|
||||
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window",
|
||||
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE (number > 4) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
|
||||
)
|
||||
];
|
||||
use datafusion_expr::{col, lit};
|
||||
let query_engine = create_test_query_engine();
|
||||
let ctx = QueryContext::arc();
|
||||
|
||||
for (before, after) in testcases {
|
||||
let sql = before;
|
||||
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut add_filter = AddFilterRewriter::new(col("number").gt(lit(4u32)));
|
||||
let plan = plan.rewrite(&mut add_filter).unwrap().data;
|
||||
let new_sql = df_plan_to_sql(&plan).unwrap();
|
||||
assert_eq!(after, new_sql);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_plan_time_window_lower_bound() {
|
||||
use datafusion_expr::{col, lit};
|
||||
let query_engine = create_test_query_engine();
|
||||
let ctx = QueryContext::arc();
|
||||
|
||||
let testcases = [
|
||||
// same alias is not same column
|
||||
(
|
||||
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts GROUP BY ts;",
|
||||
Timestamp::new(1740394109, TimeUnit::Second),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(1740394109000, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(1740394109001, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS ts FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:29' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:48:29.001' AS TIMESTAMP))) GROUP BY numbers_with_ts.ts"
|
||||
),
|
||||
// complex time window index
|
||||
(
|
||||
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts GROUP BY time_window;",
|
||||
Timestamp::new(1740394109, TimeUnit::Second),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(1740394080, TimeUnit::Second)),
|
||||
Some(Timestamp::new(1740394140, TimeUnit::Second)),
|
||||
),
|
||||
"SELECT arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)') AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('2025-02-24 10:48:00' AS TIMESTAMP)) AND (ts <= CAST('2025-02-24 10:49:00' AS TIMESTAMP))) GROUP BY arrow_cast(date_bin(INTERVAL '1 MINS', numbers_with_ts.ts), 'Timestamp(Second, None)')"
|
||||
),
|
||||
// no time index
|
||||
(
|
||||
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
("ts".to_string(), None, None),
|
||||
"SELECT date_bin('5 minutes', ts) FROM numbers_with_ts;"
|
||||
),
|
||||
// time index
|
||||
(
|
||||
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
|
||||
Timestamp::new(23, TimeUnit::Nanosecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
|
||||
),
|
||||
// on spot
|
||||
(
|
||||
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
|
||||
Timestamp::new(0, TimeUnit::Nanosecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
|
||||
),
|
||||
// different time unit
|
||||
(
|
||||
"SELECT date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
|
||||
Timestamp::new(23_000_000, TimeUnit::Nanosecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
|
||||
),
|
||||
// time index with other fields
|
||||
(
|
||||
"SELECT sum(number) as sum_up, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT sum(numbers_with_ts.number) AS sum_up, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts)"
|
||||
),
|
||||
// time index with other pks
|
||||
(
|
||||
"SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number"
|
||||
),
|
||||
// subquery
|
||||
(
|
||||
"SELECT number, time_window FROM (SELECT number, date_bin('5 minutes', ts) as time_window FROM numbers_with_ts GROUP BY time_window, number);",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT numbers_with_ts.number, time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number)"
|
||||
),
|
||||
// cte
|
||||
(
|
||||
"with cte as (select number, date_bin('5 minutes', ts) as time_window from numbers_with_ts GROUP BY time_window, number) select number, time_window from cte;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT cte.number, cte.time_window FROM (SELECT numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP))) GROUP BY date_bin('5 minutes', numbers_with_ts.ts), numbers_with_ts.number) AS cte"
|
||||
),
|
||||
// complex subquery without alias
|
||||
(
|
||||
"SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) GROUP BY number, time_window, bucket_name;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT sum(numbers_with_ts.number), numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts) AS time_window, bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) GROUP BY numbers_with_ts.number, date_bin('5 minutes', numbers_with_ts.ts), bucket_name"
|
||||
),
|
||||
// complex subquery alias
|
||||
(
|
||||
"SELECT sum(number), number, date_bin('5 minutes', ts) as time_window, bucket_name FROM (SELECT number, ts, case when number < 5 THEN 'bucket_0_5' when number >= 5 THEN 'bucket_5_inf' END as bucket_name FROM numbers_with_ts) as cte GROUP BY number, time_window, bucket_name;",
|
||||
Timestamp::new(23, TimeUnit::Millisecond),
|
||||
(
|
||||
"ts".to_string(),
|
||||
Some(Timestamp::new(0, TimeUnit::Millisecond)),
|
||||
Some(Timestamp::new(300000, TimeUnit::Millisecond)),
|
||||
),
|
||||
"SELECT sum(cte.number), cte.number, date_bin('5 minutes', cte.ts) AS time_window, cte.bucket_name FROM (SELECT numbers_with_ts.number, numbers_with_ts.ts, CASE WHEN (numbers_with_ts.number < 5) THEN 'bucket_0_5' WHEN (numbers_with_ts.number >= 5) THEN 'bucket_5_inf' END AS bucket_name FROM numbers_with_ts WHERE ((ts >= CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AND (ts <= CAST('1970-01-01 00:05:00' AS TIMESTAMP)))) AS cte GROUP BY cte.number, date_bin('5 minutes', cte.ts), cte.bucket_name"
|
||||
),
|
||||
];
|
||||
|
||||
for (sql, current, expected, expected_unparsed) in testcases {
|
||||
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, true)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let real =
|
||||
find_plan_time_window_bound(&plan, current, ctx.clone(), query_engine.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(expected, real);
|
||||
|
||||
let plan = sql_to_df_plan(ctx.clone(), query_engine.clone(), sql, false)
|
||||
.await
|
||||
.unwrap();
|
||||
let (col_name, lower, upper) = real;
|
||||
let new_sql = if lower.is_some() {
|
||||
let to_df_literal = |value| {
|
||||
let value = Value::from(value);
|
||||
|
||||
value.try_to_scalar_value(&value.data_type()).unwrap()
|
||||
};
|
||||
let lower = to_df_literal(lower.unwrap());
|
||||
let upper = to_df_literal(upper.unwrap());
|
||||
let expr = col(&col_name)
|
||||
.gt_eq(lit(lower))
|
||||
.and(col(&col_name).lt_eq(lit(upper)));
|
||||
let mut add_filter = AddFilterRewriter::new(expr);
|
||||
let plan = plan.rewrite(&mut add_filter).unwrap().data;
|
||||
df_plan_to_sql(&plan).unwrap()
|
||||
} else {
|
||||
sql.to_string()
|
||||
};
|
||||
assert_eq!(expected_unparsed, new_sql);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,776 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use api::v1::flow::FlowResponse;
|
||||
use common_error::ext::BoxedError;
|
||||
use common_meta::ddl::create_flow::FlowType;
|
||||
use common_meta::key::flow::FlowMetadataManagerRef;
|
||||
use common_meta::key::table_info::TableInfoManager;
|
||||
use common_meta::key::TableMetadataManagerRef;
|
||||
use common_telemetry::tracing::warn;
|
||||
use common_telemetry::{debug, info};
|
||||
use common_time::Timestamp;
|
||||
use datafusion::sql::unparser::expr_to_sql;
|
||||
use datafusion_common::tree_node::TreeNode;
|
||||
use datatypes::value::Value;
|
||||
use query::QueryEngineRef;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use store_api::storage::RegionId;
|
||||
use table::metadata::TableId;
|
||||
use tokio::sync::oneshot::error::TryRecvError;
|
||||
use tokio::sync::{oneshot, RwLock};
|
||||
use tokio::time::Instant;
|
||||
|
||||
use super::frontend_client::FrontendClient;
|
||||
use super::{df_plan_to_sql, AddFilterRewriter, TimeWindowExpr};
|
||||
use crate::adapter::{CreateFlowArgs, FlowId, TableName};
|
||||
use crate::error::{
|
||||
DatafusionSnafu, DatatypesSnafu, ExternalSnafu, FlowAlreadyExistSnafu, InternalSnafu,
|
||||
TimeSnafu, UnexpectedSnafu,
|
||||
};
|
||||
use crate::metrics::{METRIC_FLOW_RULE_ENGINE_QUERY_TIME, METRIC_FLOW_RULE_ENGINE_SLOW_QUERY};
|
||||
use crate::recording_rules::{find_plan_time_window_bound, find_time_window_expr, sql_to_df_plan};
|
||||
use crate::Error;
|
||||
|
||||
/// TODO(discord9): make those constants configurable
|
||||
/// The default rule engine query timeout is 10 minutes
|
||||
pub const DEFAULT_RULE_ENGINE_QUERY_TIMEOUT: Duration = Duration::from_secs(10 * 60);
|
||||
|
||||
/// will output a warn log for any query that runs for more that 1 minutes, and also every 1 minutes when that query is still running
|
||||
pub const SLOW_QUERY_THRESHOLD: Duration = Duration::from_secs(60);
|
||||
|
||||
/// TODO(discord9): determine how to configure refresh rate
|
||||
pub struct RecordingRuleEngine {
|
||||
tasks: RwLock<BTreeMap<FlowId, RecordingRuleTask>>,
|
||||
shutdown_txs: RwLock<BTreeMap<FlowId, oneshot::Sender<()>>>,
|
||||
frontend_client: Arc<FrontendClient>,
|
||||
flow_metadata_manager: FlowMetadataManagerRef,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
engine: QueryEngineRef,
|
||||
}
|
||||
|
||||
impl RecordingRuleEngine {
|
||||
pub fn new(
|
||||
frontend_client: Arc<FrontendClient>,
|
||||
engine: QueryEngineRef,
|
||||
flow_metadata_manager: FlowMetadataManagerRef,
|
||||
table_meta: TableMetadataManagerRef,
|
||||
) -> Self {
|
||||
Self {
|
||||
tasks: Default::default(),
|
||||
shutdown_txs: Default::default(),
|
||||
frontend_client,
|
||||
flow_metadata_manager,
|
||||
table_meta,
|
||||
engine,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn handle_inserts(
|
||||
&self,
|
||||
request: api::v1::region::InsertRequests,
|
||||
) -> Result<FlowResponse, Error> {
|
||||
let table_info_mgr = self.table_meta.table_info_manager();
|
||||
let mut group_by_table_name: HashMap<TableName, Vec<api::v1::Rows>> = HashMap::new();
|
||||
for r in request.requests {
|
||||
let tid = RegionId::from(r.region_id).table_id();
|
||||
let name = get_table_name(table_info_mgr, &tid).await?;
|
||||
let entry = group_by_table_name.entry(name).or_default();
|
||||
if let Some(rows) = r.rows {
|
||||
entry.push(rows);
|
||||
}
|
||||
}
|
||||
|
||||
for (_flow_id, task) in self.tasks.read().await.iter() {
|
||||
let src_table_names = &task.source_table_names;
|
||||
|
||||
for src_table_name in src_table_names {
|
||||
if let Some(entry) = group_by_table_name.get(src_table_name) {
|
||||
let Some(expr) = &task.time_window_expr else {
|
||||
continue;
|
||||
};
|
||||
let involved_time_windows = expr.handle_rows(entry.clone()).await?;
|
||||
let mut state = task.state.write().await;
|
||||
state
|
||||
.dirty_time_windows
|
||||
.add_lower_bounds(involved_time_windows.into_iter());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Default::default())
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_table_name(zelf: &TableInfoManager, table_id: &TableId) -> Result<TableName, Error> {
|
||||
zelf.get(*table_id)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Table id = {:?}, couldn't found table name", table_id),
|
||||
})
|
||||
.map(|name| name.table_name())
|
||||
.map(|name| [name.catalog_name, name.schema_name, name.table_name])
|
||||
}
|
||||
|
||||
const MIN_REFRESH_DURATION: Duration = Duration::new(5, 0);
|
||||
|
||||
impl RecordingRuleEngine {
|
||||
pub async fn create_flow(&self, args: CreateFlowArgs) -> Result<Option<FlowId>, Error> {
|
||||
let CreateFlowArgs {
|
||||
flow_id,
|
||||
sink_table_name,
|
||||
source_table_ids,
|
||||
create_if_not_exists,
|
||||
or_replace,
|
||||
expire_after,
|
||||
comment: _,
|
||||
sql,
|
||||
flow_options,
|
||||
query_ctx,
|
||||
} = args;
|
||||
|
||||
// or replace logic
|
||||
{
|
||||
let is_exist = self.tasks.read().await.contains_key(&flow_id);
|
||||
match (create_if_not_exists, or_replace, is_exist) {
|
||||
// if replace, ignore that old flow exists
|
||||
(_, true, true) => {
|
||||
info!("Replacing flow with id={}", flow_id);
|
||||
}
|
||||
(false, false, true) => FlowAlreadyExistSnafu { id: flow_id }.fail()?,
|
||||
// already exists, and not replace, return None
|
||||
(true, false, true) => {
|
||||
info!("Flow with id={} already exists, do nothing", flow_id);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// continue as normal
|
||||
(_, _, false) => (),
|
||||
}
|
||||
}
|
||||
|
||||
let flow_type = flow_options.get(FlowType::FLOW_TYPE_KEY);
|
||||
|
||||
ensure!(
|
||||
flow_type == Some(&FlowType::RecordingRule.to_string()) || flow_type.is_none(),
|
||||
UnexpectedSnafu {
|
||||
reason: format!("Flow type is not RecordingRule nor None, got {flow_type:?}")
|
||||
}
|
||||
);
|
||||
|
||||
let Some(query_ctx) = query_ctx else {
|
||||
UnexpectedSnafu {
|
||||
reason: "Query context is None".to_string(),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
let query_ctx = Arc::new(query_ctx);
|
||||
let mut source_table_names = Vec::new();
|
||||
for src_id in source_table_ids {
|
||||
let table_name = self
|
||||
.table_meta
|
||||
.table_info_manager()
|
||||
.get(src_id)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?
|
||||
.with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Table id = {:?}, couldn't found table name", src_id),
|
||||
})
|
||||
.map(|name| name.table_name())
|
||||
.map(|name| [name.catalog_name, name.schema_name, name.table_name])?;
|
||||
source_table_names.push(table_name);
|
||||
}
|
||||
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
let plan = sql_to_df_plan(query_ctx.clone(), self.engine.clone(), &sql, true).await?;
|
||||
let (column_name, time_window_expr, _, df_schema) = find_time_window_expr(
|
||||
&plan,
|
||||
self.engine.engine_state().catalog_manager().clone(),
|
||||
query_ctx.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let phy_expr = time_window_expr
|
||||
.map(|expr| TimeWindowExpr::from_expr(&expr, &column_name, &df_schema))
|
||||
.transpose()?;
|
||||
|
||||
let task = RecordingRuleTask::new(
|
||||
flow_id,
|
||||
&sql,
|
||||
phy_expr,
|
||||
expire_after,
|
||||
sink_table_name,
|
||||
source_table_names,
|
||||
query_ctx,
|
||||
rx,
|
||||
);
|
||||
|
||||
let task_inner = task.clone();
|
||||
let engine = self.engine.clone();
|
||||
let frontend = self.frontend_client.clone();
|
||||
|
||||
// TODO(discord9): also save handle & use time wheel or what for better
|
||||
let _handle = common_runtime::spawn_global(async move {
|
||||
match task_inner.start_executing(engine, frontend).await {
|
||||
Ok(()) => info!("Flow {} shutdown", task_inner.flow_id),
|
||||
Err(err) => common_telemetry::error!(
|
||||
"Flow {} encounter unrecoverable error: {err:?}",
|
||||
task_inner.flow_id
|
||||
),
|
||||
}
|
||||
});
|
||||
|
||||
// TODO(discord9): deal with replace logic
|
||||
let replaced_old_task_opt = self.tasks.write().await.insert(flow_id, task);
|
||||
drop(replaced_old_task_opt);
|
||||
|
||||
self.shutdown_txs.write().await.insert(flow_id, tx);
|
||||
|
||||
Ok(Some(flow_id))
|
||||
}
|
||||
|
||||
pub async fn remove_flow(&self, flow_id: FlowId) -> Result<(), Error> {
|
||||
if self.tasks.write().await.remove(&flow_id).is_none() {
|
||||
warn!("Flow {flow_id} not found in tasks")
|
||||
}
|
||||
let Some(tx) = self.shutdown_txs.write().await.remove(&flow_id) else {
|
||||
UnexpectedSnafu {
|
||||
reason: format!("Can't found shutdown tx for flow {flow_id}"),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
if tx.send(()).is_err() {
|
||||
warn!("Fail to shutdown flow {flow_id} due to receiver already dropped, maybe flow {flow_id} is already dropped?")
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RecordingRuleTask {
|
||||
flow_id: FlowId,
|
||||
query: String,
|
||||
time_window_expr: Option<TimeWindowExpr>,
|
||||
/// in seconds
|
||||
expire_after: Option<i64>,
|
||||
sink_table_name: [String; 3],
|
||||
source_table_names: HashSet<[String; 3]>,
|
||||
state: Arc<RwLock<RecordingRuleState>>,
|
||||
}
|
||||
|
||||
impl RecordingRuleTask {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
flow_id: FlowId,
|
||||
query: &str,
|
||||
time_window_expr: Option<TimeWindowExpr>,
|
||||
expire_after: Option<i64>,
|
||||
sink_table_name: [String; 3],
|
||||
source_table_names: Vec<[String; 3]>,
|
||||
query_ctx: QueryContextRef,
|
||||
shutdown_rx: oneshot::Receiver<()>,
|
||||
) -> Self {
|
||||
Self {
|
||||
flow_id,
|
||||
query: query.to_string(),
|
||||
time_window_expr,
|
||||
expire_after,
|
||||
sink_table_name,
|
||||
source_table_names: source_table_names.into_iter().collect(),
|
||||
state: Arc::new(RwLock::new(RecordingRuleState::new(query_ctx, shutdown_rx))),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl RecordingRuleTask {
|
||||
/// This should be called in a new tokio task
|
||||
pub async fn start_executing(
|
||||
&self,
|
||||
engine: QueryEngineRef,
|
||||
frontend_client: Arc<FrontendClient>,
|
||||
) -> Result<(), Error> {
|
||||
// only first query don't need upper bound
|
||||
let mut is_first = true;
|
||||
|
||||
loop {
|
||||
// FIXME(discord9): test if need upper bound also works
|
||||
let new_query = self.gen_query_with_time_window(engine.clone()).await?;
|
||||
|
||||
let insert_into = if let Some(new_query) = new_query {
|
||||
format!(
|
||||
"INSERT INTO {}.{}.{} {}",
|
||||
self.sink_table_name[0],
|
||||
self.sink_table_name[1],
|
||||
self.sink_table_name[2],
|
||||
new_query
|
||||
)
|
||||
} else {
|
||||
tokio::time::sleep(MIN_REFRESH_DURATION).await;
|
||||
continue;
|
||||
};
|
||||
|
||||
if is_first {
|
||||
is_first = false;
|
||||
}
|
||||
|
||||
let instant = Instant::now();
|
||||
let flow_id = self.flow_id;
|
||||
let db_client = frontend_client.get_database_client().await?;
|
||||
let peer_addr = db_client.peer.addr;
|
||||
debug!(
|
||||
"Executing flow {flow_id}(expire_after={:?} secs) on {:?} with query {}",
|
||||
self.expire_after, peer_addr, &insert_into
|
||||
);
|
||||
|
||||
let timer = METRIC_FLOW_RULE_ENGINE_QUERY_TIME
|
||||
.with_label_values(&[flow_id.to_string().as_str()])
|
||||
.start_timer();
|
||||
|
||||
let res = db_client.database.sql(&insert_into).await;
|
||||
drop(timer);
|
||||
|
||||
let elapsed = instant.elapsed();
|
||||
if let Ok(res1) = &res {
|
||||
debug!(
|
||||
"Flow {flow_id} executed, result: {res1:?}, elapsed: {:?}",
|
||||
elapsed
|
||||
);
|
||||
} else if let Err(res) = &res {
|
||||
warn!(
|
||||
"Failed to execute Flow {flow_id} on frontend {}, result: {res:?}, elapsed: {:?} with query: {}",
|
||||
peer_addr, elapsed, &insert_into
|
||||
);
|
||||
}
|
||||
|
||||
// record slow query
|
||||
if elapsed >= SLOW_QUERY_THRESHOLD {
|
||||
warn!(
|
||||
"Flow {flow_id} on frontend {} executed for {:?} before complete, query: {}",
|
||||
peer_addr, elapsed, &insert_into
|
||||
);
|
||||
METRIC_FLOW_RULE_ENGINE_SLOW_QUERY
|
||||
.with_label_values(&[flow_id.to_string().as_str(), &insert_into, &peer_addr])
|
||||
.observe(elapsed.as_secs_f64());
|
||||
}
|
||||
|
||||
self.state
|
||||
.write()
|
||||
.await
|
||||
.after_query_exec(elapsed, res.is_ok());
|
||||
|
||||
let sleep_until = {
|
||||
let mut state = self.state.write().await;
|
||||
match state.shutdown_rx.try_recv() {
|
||||
Ok(()) => break Ok(()),
|
||||
Err(TryRecvError::Closed) => {
|
||||
warn!("Unexpected shutdown flow {flow_id}, shutdown anyway");
|
||||
break Ok(());
|
||||
}
|
||||
Err(TryRecvError::Empty) => (),
|
||||
}
|
||||
state.get_next_start_query_time(None)
|
||||
};
|
||||
tokio::time::sleep_until(sleep_until).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// will merge and use the first ten time window in query
|
||||
async fn gen_query_with_time_window(
|
||||
&self,
|
||||
engine: QueryEngineRef,
|
||||
) -> Result<Option<String>, Error> {
|
||||
let query_ctx = self.state.read().await.query_ctx.clone();
|
||||
let start = SystemTime::now();
|
||||
let since_the_epoch = start
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards");
|
||||
let low_bound = self
|
||||
.expire_after
|
||||
.map(|e| since_the_epoch.as_secs() - e as u64);
|
||||
|
||||
let Some(low_bound) = low_bound else {
|
||||
return Ok(Some(self.query.clone()));
|
||||
};
|
||||
|
||||
let low_bound = Timestamp::new_second(low_bound as i64);
|
||||
|
||||
let plan = sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.query, true).await?;
|
||||
|
||||
// TODO(discord9): find more time window!
|
||||
let (col_name, lower, upper) =
|
||||
find_plan_time_window_bound(&plan, low_bound, query_ctx.clone(), engine.clone())
|
||||
.await?;
|
||||
|
||||
let new_sql = {
|
||||
let expr = {
|
||||
match (lower, upper) {
|
||||
(Some(l), Some(u)) => {
|
||||
let window_size = u.sub(&l).with_context(|| UnexpectedSnafu {
|
||||
reason: format!("Can't get window size from {u:?} - {l:?}"),
|
||||
})?;
|
||||
|
||||
self.state
|
||||
.write()
|
||||
.await
|
||||
.dirty_time_windows
|
||||
.gen_filter_exprs(&col_name, lower, window_size)?
|
||||
}
|
||||
_ => {
|
||||
warn!(
|
||||
"Flow id = {:?}, can't get window size: lower={lower:?}, upper={upper:?}, using the same query", self.flow_id
|
||||
);
|
||||
// since no time window lower/upper bound is found, just return the original query
|
||||
return Ok(Some(self.query.clone()));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
debug!(
|
||||
"Flow id={:?}, Generated filter expr: {:?}",
|
||||
self.flow_id,
|
||||
expr.as_ref()
|
||||
.map(|expr| expr_to_sql(expr).with_context(|_| DatafusionSnafu {
|
||||
context: format!("Failed to generate filter expr from {expr:?}"),
|
||||
}))
|
||||
.transpose()?
|
||||
.map(|s| s.to_string())
|
||||
);
|
||||
|
||||
let Some(expr) = expr else {
|
||||
// no new data, hence no need to update
|
||||
debug!("Flow id={:?}, no new data, not update", self.flow_id);
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let mut add_filter = AddFilterRewriter::new(expr);
|
||||
// make a not optimized plan for clearer unparse
|
||||
let plan =
|
||||
sql_to_df_plan(query_ctx.clone(), engine.clone(), &self.query, false).await?;
|
||||
let plan = plan
|
||||
.clone()
|
||||
.rewrite(&mut add_filter)
|
||||
.with_context(|_| DatafusionSnafu {
|
||||
context: format!("Failed to rewrite plan {plan:?}"),
|
||||
})?
|
||||
.data;
|
||||
df_plan_to_sql(&plan)?
|
||||
};
|
||||
|
||||
Ok(Some(new_sql))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RecordingRuleState {
|
||||
query_ctx: QueryContextRef,
|
||||
/// last query complete time
|
||||
last_update_time: Instant,
|
||||
/// last time query duration
|
||||
last_query_duration: Duration,
|
||||
/// Dirty Time windows need to be updated
|
||||
/// mapping of `start -> end` and non-overlapping
|
||||
dirty_time_windows: DirtyTimeWindows,
|
||||
exec_state: ExecState,
|
||||
shutdown_rx: oneshot::Receiver<()>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct DirtyTimeWindows {
|
||||
windows: BTreeMap<Timestamp, Option<Timestamp>>,
|
||||
}
|
||||
|
||||
fn to_df_literal(value: Timestamp) -> Result<datafusion_common::ScalarValue, Error> {
|
||||
let value = Value::from(value);
|
||||
let value = value
|
||||
.try_to_scalar_value(&value.data_type())
|
||||
.with_context(|_| DatatypesSnafu {
|
||||
extra: format!("Failed to convert to scalar value: {}", value),
|
||||
})?;
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
impl DirtyTimeWindows {
|
||||
/// Time window merge distance
|
||||
const MERGE_DIST: i32 = 3;
|
||||
|
||||
/// Maximum number of filters allowed in a single query
|
||||
const MAX_FILTER_NUM: usize = 20;
|
||||
|
||||
/// Add lower bounds to the dirty time windows. Upper bounds are ignored.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `lower_bounds` - An iterator of lower bounds to be added.
|
||||
pub fn add_lower_bounds(&mut self, lower_bounds: impl Iterator<Item = Timestamp>) {
|
||||
for lower_bound in lower_bounds {
|
||||
let entry = self.windows.entry(lower_bound);
|
||||
entry.or_insert(None);
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate all filter expressions consuming all time windows
|
||||
pub fn gen_filter_exprs(
|
||||
&mut self,
|
||||
col_name: &str,
|
||||
expire_lower_bound: Option<Timestamp>,
|
||||
window_size: chrono::Duration,
|
||||
) -> Result<Option<datafusion_expr::Expr>, Error> {
|
||||
debug!(
|
||||
"expire_lower_bound: {:?}, window_size: {:?}",
|
||||
expire_lower_bound.map(|t| t.to_iso8601_string()),
|
||||
window_size
|
||||
);
|
||||
self.merge_dirty_time_windows(window_size)?;
|
||||
|
||||
if self.windows.len() > Self::MAX_FILTER_NUM {
|
||||
warn!(
|
||||
"Too many time windows: {}, only the first {} are taken for this query, the group by expression might be wrong",
|
||||
self.windows.len(),
|
||||
Self::MAX_FILTER_NUM
|
||||
);
|
||||
}
|
||||
|
||||
// get the first `MAX_FILTER_NUM` time windows
|
||||
let nth = self
|
||||
.windows
|
||||
.iter()
|
||||
.nth(Self::MAX_FILTER_NUM)
|
||||
.map(|(key, _)| *key);
|
||||
let first_nth = {
|
||||
if let Some(nth) = nth {
|
||||
let mut after = self.windows.split_off(&nth);
|
||||
std::mem::swap(&mut self.windows, &mut after);
|
||||
|
||||
after
|
||||
} else {
|
||||
std::mem::take(&mut self.windows)
|
||||
}
|
||||
};
|
||||
|
||||
let mut expr_lst = vec![];
|
||||
for (start, end) in first_nth.into_iter() {
|
||||
debug!(
|
||||
"Time window start: {:?}, end: {:?}",
|
||||
start.to_iso8601_string(),
|
||||
end.map(|t| t.to_iso8601_string())
|
||||
);
|
||||
let start = if let Some(expire) = expire_lower_bound {
|
||||
start.max(expire)
|
||||
} else {
|
||||
start
|
||||
};
|
||||
|
||||
// filter out expired time window
|
||||
if let Some(end) = end {
|
||||
if end <= start {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
use datafusion_expr::{col, lit};
|
||||
let lower = to_df_literal(start)?;
|
||||
let upper = end.map(to_df_literal).transpose()?;
|
||||
let expr = if let Some(upper) = upper {
|
||||
col(col_name)
|
||||
.gt_eq(lit(lower))
|
||||
.and(col(col_name).lt(lit(upper)))
|
||||
} else {
|
||||
col(col_name).gt_eq(lit(lower))
|
||||
};
|
||||
expr_lst.push(expr);
|
||||
}
|
||||
let expr = expr_lst.into_iter().reduce(|a, b| a.and(b));
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
/// Merge time windows that overlaps or get too close
|
||||
pub fn merge_dirty_time_windows(&mut self, window_size: chrono::Duration) -> Result<(), Error> {
|
||||
let mut new_windows = BTreeMap::new();
|
||||
|
||||
let mut prev_tw = None;
|
||||
for (lower_bound, upper_bound) in std::mem::take(&mut self.windows) {
|
||||
let Some(prev_tw) = &mut prev_tw else {
|
||||
prev_tw = Some((lower_bound, upper_bound));
|
||||
continue;
|
||||
};
|
||||
|
||||
let std_window_size = window_size.to_std().map_err(|e| {
|
||||
InternalSnafu {
|
||||
reason: e.to_string(),
|
||||
}
|
||||
.build()
|
||||
})?;
|
||||
|
||||
// if cur.lower - prev.upper <= window_size * 2, merge
|
||||
let prev_upper = prev_tw
|
||||
.1
|
||||
.unwrap_or(prev_tw.0.add_duration(std_window_size).context(TimeSnafu)?);
|
||||
prev_tw.1 = Some(prev_upper);
|
||||
|
||||
let cur_upper = upper_bound.unwrap_or(
|
||||
lower_bound
|
||||
.add_duration(std_window_size)
|
||||
.context(TimeSnafu)?,
|
||||
);
|
||||
|
||||
if lower_bound
|
||||
.sub(&prev_upper)
|
||||
.map(|dist| dist <= window_size * Self::MERGE_DIST)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
prev_tw.1 = Some(cur_upper);
|
||||
} else {
|
||||
new_windows.insert(prev_tw.0, prev_tw.1);
|
||||
*prev_tw = (lower_bound, Some(cur_upper));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(prev_tw) = prev_tw {
|
||||
new_windows.insert(prev_tw.0, prev_tw.1);
|
||||
}
|
||||
|
||||
self.windows = new_windows;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordingRuleState {
|
||||
pub fn new(query_ctx: QueryContextRef, shutdown_rx: oneshot::Receiver<()>) -> Self {
|
||||
Self {
|
||||
query_ctx,
|
||||
last_update_time: Instant::now(),
|
||||
last_query_duration: Duration::from_secs(0),
|
||||
dirty_time_windows: Default::default(),
|
||||
exec_state: ExecState::Idle,
|
||||
shutdown_rx,
|
||||
}
|
||||
}
|
||||
|
||||
/// called after last query is done
|
||||
/// `is_succ` indicate whether the last query is successful
|
||||
pub fn after_query_exec(&mut self, elapsed: Duration, _is_succ: bool) {
|
||||
self.exec_state = ExecState::Idle;
|
||||
self.last_query_duration = elapsed;
|
||||
self.last_update_time = Instant::now();
|
||||
}
|
||||
|
||||
/// wait for at least `last_query_duration`, at most `max_timeout` to start next query
|
||||
pub fn get_next_start_query_time(&self, max_timeout: Option<Duration>) -> Instant {
|
||||
let next_duration = max_timeout
|
||||
.unwrap_or(self.last_query_duration)
|
||||
.min(self.last_query_duration);
|
||||
let next_duration = next_duration.max(MIN_REFRESH_DURATION);
|
||||
|
||||
self.last_update_time + next_duration
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
enum ExecState {
|
||||
Idle,
|
||||
Executing,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_merge_dirty_time_windows() {
|
||||
let mut dirty = DirtyTimeWindows::default();
|
||||
dirty.add_lower_bounds(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
]
|
||||
.into_iter(),
|
||||
);
|
||||
dirty
|
||||
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60))
|
||||
.unwrap();
|
||||
// just enough to merge
|
||||
assert_eq!(
|
||||
dirty.windows,
|
||||
BTreeMap::from([(
|
||||
Timestamp::new_second(0),
|
||||
Some(Timestamp::new_second(
|
||||
(2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
|
||||
))
|
||||
)])
|
||||
);
|
||||
|
||||
// separate time window
|
||||
let mut dirty = DirtyTimeWindows::default();
|
||||
dirty.add_lower_bounds(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
]
|
||||
.into_iter(),
|
||||
);
|
||||
dirty
|
||||
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60))
|
||||
.unwrap();
|
||||
// just enough to merge
|
||||
assert_eq!(
|
||||
BTreeMap::from([
|
||||
(
|
||||
Timestamp::new_second(0),
|
||||
Some(Timestamp::new_second(5 * 60))
|
||||
),
|
||||
(
|
||||
Timestamp::new_second((2 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
Some(Timestamp::new_second(
|
||||
(3 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
|
||||
))
|
||||
)
|
||||
]),
|
||||
dirty.windows
|
||||
);
|
||||
|
||||
// overlapping
|
||||
let mut dirty = DirtyTimeWindows::default();
|
||||
dirty.add_lower_bounds(
|
||||
vec![
|
||||
Timestamp::new_second(0),
|
||||
Timestamp::new_second((DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60),
|
||||
]
|
||||
.into_iter(),
|
||||
);
|
||||
dirty
|
||||
.merge_dirty_time_windows(chrono::Duration::seconds(5 * 60))
|
||||
.unwrap();
|
||||
// just enough to merge
|
||||
assert_eq!(
|
||||
BTreeMap::from([(
|
||||
Timestamp::new_second(0),
|
||||
Some(Timestamp::new_second(
|
||||
(1 + DirtyTimeWindows::MERGE_DIST as i64) * 5 * 60
|
||||
))
|
||||
),]),
|
||||
dirty.windows
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,150 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Frontend client to run flow as recording rule which is time-window-aware normal query triggered every tick set by user
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use client::{Client, Database, DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_grpc::channel_manager::{ChannelConfig, ChannelManager};
|
||||
use common_meta::cluster::{NodeInfo, NodeInfoKey, Role};
|
||||
use common_meta::peer::Peer;
|
||||
use common_meta::rpc::store::RangeRequest;
|
||||
use meta_client::client::MetaClient;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{ExternalSnafu, UnexpectedSnafu};
|
||||
use crate::recording_rules::engine::DEFAULT_RULE_ENGINE_QUERY_TIMEOUT;
|
||||
use crate::Error;
|
||||
|
||||
fn default_channel_mgr() -> ChannelManager {
|
||||
let cfg = ChannelConfig::new().timeout(DEFAULT_RULE_ENGINE_QUERY_TIMEOUT);
|
||||
ChannelManager::with_config(cfg)
|
||||
}
|
||||
|
||||
fn client_from_urls(addrs: Vec<String>) -> Client {
|
||||
Client::with_manager_and_urls(default_channel_mgr(), addrs)
|
||||
}
|
||||
|
||||
/// A simple frontend client able to execute sql using grpc protocol
|
||||
#[derive(Debug)]
|
||||
pub enum FrontendClient {
|
||||
Distributed {
|
||||
meta_client: Arc<MetaClient>,
|
||||
},
|
||||
Standalone {
|
||||
/// for the sake of simplicity still use grpc even in standalone mode
|
||||
/// notice the client here should all be lazy, so that can wait after frontend is booted then make conn
|
||||
/// TODO(discord9): not use grpc under standalone mode
|
||||
database_client: DatabaseWithPeer,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DatabaseWithPeer {
|
||||
pub database: Database,
|
||||
pub peer: Peer,
|
||||
}
|
||||
|
||||
impl DatabaseWithPeer {
|
||||
fn new(database: Database, peer: Peer) -> Self {
|
||||
Self { database, peer }
|
||||
}
|
||||
}
|
||||
|
||||
impl FrontendClient {
|
||||
pub fn from_meta_client(meta_client: Arc<MetaClient>) -> Self {
|
||||
Self::Distributed { meta_client }
|
||||
}
|
||||
|
||||
pub fn from_static_grpc_addr(addr: String) -> Self {
|
||||
let peer = Peer {
|
||||
id: 0,
|
||||
addr: addr.clone(),
|
||||
};
|
||||
|
||||
let client = client_from_urls(vec![addr]);
|
||||
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
Self::Standalone {
|
||||
database_client: DatabaseWithPeer::new(database, peer),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FrontendClient {
|
||||
async fn scan_for_frontend(&self) -> Result<Vec<(NodeInfoKey, NodeInfo)>, Error> {
|
||||
let Self::Distributed { meta_client, .. } = self else {
|
||||
return Ok(vec![]);
|
||||
};
|
||||
let cluster_client = meta_client
|
||||
.cluster_client()
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let cluster_id = meta_client.id().0;
|
||||
let prefix = NodeInfoKey::key_prefix_with_role(cluster_id, Role::Frontend);
|
||||
let req = RangeRequest::new().with_prefix(prefix);
|
||||
let resp = cluster_client
|
||||
.range(req)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
let mut res = Vec::with_capacity(resp.kvs.len());
|
||||
for kv in resp.kvs {
|
||||
let key = NodeInfoKey::try_from(kv.key)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
|
||||
let val = NodeInfo::try_from(kv.value)
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExternalSnafu)?;
|
||||
res.push((key, val));
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
/// Get the database with max `last_activity_ts`
|
||||
async fn get_last_active_frontend(&self) -> Result<DatabaseWithPeer, Error> {
|
||||
if let Self::Standalone { database_client } = self {
|
||||
return Ok(database_client.clone());
|
||||
}
|
||||
|
||||
let frontends = self.scan_for_frontend().await?;
|
||||
let mut last_activity_ts = i64::MIN;
|
||||
let mut peer = None;
|
||||
for (_key, val) in frontends.iter() {
|
||||
if val.last_activity_ts > last_activity_ts {
|
||||
last_activity_ts = val.last_activity_ts;
|
||||
peer = Some(val.peer.clone());
|
||||
}
|
||||
}
|
||||
let Some(peer) = peer else {
|
||||
UnexpectedSnafu {
|
||||
reason: format!("No frontend available: {:?}", frontends),
|
||||
}
|
||||
.fail()?
|
||||
};
|
||||
let client = client_from_urls(vec![peer.addr.clone()]);
|
||||
let database = Database::new(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, client);
|
||||
Ok(DatabaseWithPeer::new(database, peer))
|
||||
}
|
||||
|
||||
/// Get a database client, and possibly update it before returning.
|
||||
pub async fn get_database_client(&self) -> Result<DatabaseWithPeer, Error> {
|
||||
match self {
|
||||
Self::Standalone { database_client } => Ok(database_client.clone()),
|
||||
Self::Distributed { meta_client: _ } => self.get_last_active_frontend().await,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -57,7 +57,6 @@ use crate::error::{
|
||||
};
|
||||
use crate::heartbeat::HeartbeatTask;
|
||||
use crate::metrics::{METRIC_FLOW_PROCESSING_TIME, METRIC_FLOW_ROWS};
|
||||
use crate::recording_rules::{FrontendClient, RecordingRuleEngine};
|
||||
use crate::transform::register_function_to_query_engine;
|
||||
use crate::utils::{SizeReportSender, StateReportHandler};
|
||||
use crate::{Error, FlowWorkerManager, FlownodeOptions};
|
||||
@@ -246,7 +245,6 @@ impl FlownodeInstance {
|
||||
self.server.shutdown().await.context(ShutdownServerSnafu)?;
|
||||
|
||||
if let Some(task) = &self.heartbeat_task {
|
||||
info!("Close heartbeat task for flownode");
|
||||
task.shutdown();
|
||||
}
|
||||
|
||||
@@ -273,8 +271,6 @@ pub struct FlownodeBuilder {
|
||||
heartbeat_task: Option<HeartbeatTask>,
|
||||
/// receive a oneshot sender to send state size report
|
||||
state_report_handler: Option<StateReportHandler>,
|
||||
/// Client to send sql to frontend
|
||||
frontend_client: Arc<FrontendClient>,
|
||||
}
|
||||
|
||||
impl FlownodeBuilder {
|
||||
@@ -285,7 +281,6 @@ impl FlownodeBuilder {
|
||||
table_meta: TableMetadataManagerRef,
|
||||
catalog_manager: CatalogManagerRef,
|
||||
flow_metadata_manager: FlowMetadataManagerRef,
|
||||
frontend_client: Arc<FrontendClient>,
|
||||
) -> Self {
|
||||
Self {
|
||||
opts,
|
||||
@@ -295,7 +290,6 @@ impl FlownodeBuilder {
|
||||
flow_metadata_manager,
|
||||
heartbeat_task: None,
|
||||
state_report_handler: None,
|
||||
frontend_client,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -453,14 +447,7 @@ impl FlownodeBuilder {
|
||||
|
||||
let node_id = self.opts.node_id.map(|id| id as u32);
|
||||
|
||||
let rule_engine = RecordingRuleEngine::new(
|
||||
self.frontend_client.clone(),
|
||||
query_engine.clone(),
|
||||
self.flow_metadata_manager.clone(),
|
||||
table_meta.clone(),
|
||||
);
|
||||
|
||||
let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta, rule_engine);
|
||||
let mut man = FlowWorkerManager::new(node_id, query_engine, table_meta);
|
||||
for worker_id in 0..num_workers {
|
||||
let (tx, rx) = oneshot::channel();
|
||||
|
||||
|
||||
@@ -86,8 +86,7 @@ pub fn create_test_query_engine() -> Arc<dyn QueryEngine> {
|
||||
|
||||
let schema = vec![
|
||||
datatypes::schema::ColumnSchema::new("number", CDT::uint32_datatype(), false),
|
||||
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false)
|
||||
.with_time_index(true),
|
||||
datatypes::schema::ColumnSchema::new("ts", CDT::timestamp_millisecond_datatype(), false),
|
||||
];
|
||||
let mut columns = vec![];
|
||||
let numbers = (1..=10).collect_vec();
|
||||
|
||||
@@ -36,11 +36,11 @@ use servers::error::{
|
||||
TableNotFoundSnafu,
|
||||
};
|
||||
use servers::http::jaeger::QueryTraceParams;
|
||||
use servers::otlp::trace::{
|
||||
use servers::otlp::trace::v0::{
|
||||
DURATION_NANO_COLUMN, SERVICE_NAME_COLUMN, SPAN_ATTRIBUTES_COLUMN, SPAN_ID_COLUMN,
|
||||
SPAN_KIND_COLUMN, SPAN_KIND_PREFIX, SPAN_NAME_COLUMN, TIMESTAMP_COLUMN, TRACE_ID_COLUMN,
|
||||
TRACE_TABLE_NAME,
|
||||
};
|
||||
use servers::otlp::trace::TRACE_TABLE_NAME;
|
||||
use servers::query_handler::JaegerQueryHandler;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
@@ -127,26 +127,4 @@ impl Instance {
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)
|
||||
}
|
||||
|
||||
pub async fn handle_trace_inserts(
|
||||
&self,
|
||||
rows: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
) -> ServerResult<Output> {
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
let result = limiter.limit_row_inserts(&rows);
|
||||
if result.is_none() {
|
||||
return InFlightWriteBytesExceededSnafu.fail();
|
||||
}
|
||||
result
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.inserter
|
||||
.handle_trace_inserts(rows, ctx, self.statement_executor.as_ref())
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(ExecuteGrpcRequestSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,7 +101,17 @@ impl OpenTelemetryProtocolHandler for Instance {
|
||||
|
||||
OTLP_TRACES_ROWS.inc_by(rows as u64);
|
||||
|
||||
self.handle_trace_inserts(requests, ctx)
|
||||
let _guard = if let Some(limiter) = &self.limiter {
|
||||
let result = limiter.limit_row_inserts(&requests);
|
||||
if result.is_none() {
|
||||
return InFlightWriteBytesExceededSnafu.fail();
|
||||
}
|
||||
result
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.handle_log_inserts(requests, ctx)
|
||||
.await
|
||||
.map_err(BoxedError::new)
|
||||
.context(error::ExecuteGrpcQuerySnafu)
|
||||
|
||||
@@ -112,7 +112,6 @@ impl MetaClientBuilder {
|
||||
.enable_store()
|
||||
.enable_heartbeat()
|
||||
.enable_procedure()
|
||||
.enable_access_cluster_info()
|
||||
}
|
||||
|
||||
pub fn enable_heartbeat(self) -> Self {
|
||||
|
||||
@@ -799,14 +799,6 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("A cursor named {name} already exists"))]
|
||||
CursorExists { name: String },
|
||||
|
||||
#[snafu(display("Column options error"))]
|
||||
ColumnOptions {
|
||||
#[snafu(source)]
|
||||
source: api::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -958,8 +950,6 @@ impl ErrorExt for Error {
|
||||
|
||||
Error::UpgradeCatalogManagerRef { .. } => StatusCode::Internal,
|
||||
Error::StatementTimeout { .. } => StatusCode::Cancelled,
|
||||
|
||||
Error::ColumnOptions { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@ use std::sync::Arc;
|
||||
|
||||
use ahash::{HashMap, HashMapExt, HashSet, HashSetExt};
|
||||
use api::v1::alter_table_expr::Kind;
|
||||
use api::v1::column_def::options_from_skipping;
|
||||
use api::v1::region::{
|
||||
InsertRequest as RegionInsertRequest, InsertRequests as RegionInsertRequests,
|
||||
RegionRequestHeader,
|
||||
@@ -27,9 +26,7 @@ use api::v1::{
|
||||
};
|
||||
use catalog::CatalogManagerRef;
|
||||
use client::{OutputData, OutputMeta};
|
||||
use common_catalog::consts::{
|
||||
default_engine, PARENT_SPAN_ID_COLUMN, SPAN_NAME_COLUMN, TRACE_ID_COLUMN,
|
||||
};
|
||||
use common_catalog::consts::default_engine;
|
||||
use common_grpc_expr::util::ColumnExpr;
|
||||
use common_meta::cache::TableFlownodeSetCacheRef;
|
||||
use common_meta::node_manager::{AffectedRows, NodeManagerRef};
|
||||
@@ -37,16 +34,13 @@ use common_meta::peer::Peer;
|
||||
use common_query::prelude::{GREPTIME_TIMESTAMP, GREPTIME_VALUE};
|
||||
use common_query::Output;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use common_telemetry::{error, info, warn};
|
||||
use datatypes::schema::SkippingIndexOptions;
|
||||
use common_telemetry::{error, info};
|
||||
use futures_util::future;
|
||||
use meter_macros::write_meter;
|
||||
use partition::manager::PartitionRuleManagerRef;
|
||||
use session::context::QueryContextRef;
|
||||
use snafu::prelude::*;
|
||||
use snafu::ResultExt;
|
||||
use sql::partition::partition_rule_for_hexstring;
|
||||
use sql::statements::create::Partitions;
|
||||
use sql::statements::insert::Insert;
|
||||
use store_api::metric_engine_consts::{
|
||||
LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME, PHYSICAL_TABLE_METADATA_KEY,
|
||||
@@ -59,8 +53,8 @@ use table::table_reference::TableReference;
|
||||
use table::TableRef;
|
||||
|
||||
use crate::error::{
|
||||
CatalogSnafu, ColumnOptionsSnafu, FindRegionLeaderSnafu, InvalidInsertRequestSnafu,
|
||||
JoinTaskSnafu, RequestInsertsSnafu, Result, TableNotFoundSnafu,
|
||||
CatalogSnafu, FindRegionLeaderSnafu, InvalidInsertRequestSnafu, JoinTaskSnafu,
|
||||
RequestInsertsSnafu, Result, TableNotFoundSnafu,
|
||||
};
|
||||
use crate::expr_helper;
|
||||
use crate::region_req_factory::RegionRequestFactory;
|
||||
@@ -74,7 +68,6 @@ pub struct Inserter {
|
||||
catalog_manager: CatalogManagerRef,
|
||||
partition_manager: PartitionRuleManagerRef,
|
||||
node_manager: NodeManagerRef,
|
||||
#[allow(unused)]
|
||||
table_flownode_set_cache: TableFlownodeSetCacheRef,
|
||||
}
|
||||
|
||||
@@ -91,8 +84,6 @@ enum AutoCreateTableType {
|
||||
Log,
|
||||
/// A table that merges rows by `last_non_null` strategy.
|
||||
LastNonNull,
|
||||
/// Create table that build index and default partition rules on trace_id
|
||||
Trace,
|
||||
}
|
||||
|
||||
impl AutoCreateTableType {
|
||||
@@ -102,7 +93,6 @@ impl AutoCreateTableType {
|
||||
AutoCreateTableType::Physical => "physical",
|
||||
AutoCreateTableType::Log => "log",
|
||||
AutoCreateTableType::LastNonNull => "last_non_null",
|
||||
AutoCreateTableType::Trace => "trace",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -181,21 +171,6 @@ impl Inserter {
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn handle_trace_inserts(
|
||||
&self,
|
||||
requests: RowInsertRequests,
|
||||
ctx: QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<Output> {
|
||||
self.handle_row_inserts_with_create_type(
|
||||
requests,
|
||||
ctx,
|
||||
statement_executor,
|
||||
AutoCreateTableType::Trace,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Handles row inserts request and creates a table with `last_non_null` merge mode on demand.
|
||||
pub async fn handle_last_non_null_inserts(
|
||||
&self,
|
||||
@@ -363,8 +338,6 @@ impl Inserter {
|
||||
instant_requests,
|
||||
} = requests;
|
||||
|
||||
// TODO(discord9): mirror some
|
||||
|
||||
// Mirror requests for source table to flownode asynchronously
|
||||
let flow_mirror_task = FlowMirrorTask::new(
|
||||
&self.table_flownode_set_cache,
|
||||
@@ -555,56 +528,7 @@ impl Inserter {
|
||||
// for it's a very unexpected behavior and should be set by user explicitly
|
||||
for create_table in create_tables {
|
||||
let table = self
|
||||
.create_physical_table(create_table, None, ctx, statement_executor)
|
||||
.await?;
|
||||
let table_info = table.table_info();
|
||||
if table_info.is_ttl_instant_table() {
|
||||
instant_table_ids.insert(table_info.table_id());
|
||||
}
|
||||
table_infos.insert(table_info.table_id(), table.table_info());
|
||||
}
|
||||
for alter_expr in alter_tables.into_iter() {
|
||||
statement_executor
|
||||
.alter_table_inner(alter_expr, ctx.clone())
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
AutoCreateTableType::Trace => {
|
||||
// note that auto create table shouldn't be ttl instant table
|
||||
// for it's a very unexpected behavior and should be set by user explicitly
|
||||
for mut create_table in create_tables {
|
||||
// prebuilt partition rules for uuid data: see the function
|
||||
// for more information
|
||||
let partitions = partition_rule_for_hexstring(TRACE_ID_COLUMN);
|
||||
// add skip index to
|
||||
// - trace_id: when searching by trace id
|
||||
// - parent_span_id: when searching root span
|
||||
// - span_name: when searching certain types of span
|
||||
let index_columns = [TRACE_ID_COLUMN, PARENT_SPAN_ID_COLUMN, SPAN_NAME_COLUMN];
|
||||
for index_column in index_columns {
|
||||
if let Some(col) = create_table
|
||||
.column_defs
|
||||
.iter_mut()
|
||||
.find(|c| c.name == index_column)
|
||||
{
|
||||
col.options = options_from_skipping(&SkippingIndexOptions::default())
|
||||
.context(ColumnOptionsSnafu)?;
|
||||
} else {
|
||||
warn!(
|
||||
"Column {} not found when creating index for trace table: {}.",
|
||||
index_column, create_table.table_name
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let table = self
|
||||
.create_physical_table(
|
||||
create_table,
|
||||
Some(partitions),
|
||||
ctx,
|
||||
statement_executor,
|
||||
)
|
||||
.create_physical_table(create_table, ctx, statement_executor)
|
||||
.await?;
|
||||
let table_info = table.table_info();
|
||||
if table_info.is_ttl_instant_table() {
|
||||
@@ -734,9 +658,6 @@ impl Inserter {
|
||||
AutoCreateTableType::LastNonNull => {
|
||||
table_options.push((MERGE_MODE_KEY, "last_non_null"));
|
||||
}
|
||||
AutoCreateTableType::Trace => {
|
||||
table_options.push((APPEND_MODE_KEY, "true"));
|
||||
}
|
||||
}
|
||||
|
||||
let schema = ctx.current_schema();
|
||||
@@ -745,7 +666,6 @@ impl Inserter {
|
||||
let request_schema = req.rows.as_ref().unwrap().schema.as_slice();
|
||||
let mut create_table_expr =
|
||||
build_create_table_expr(&table_ref, request_schema, engine_name)?;
|
||||
|
||||
info!("Table `{table_ref}` does not exist, try creating table");
|
||||
for (k, v) in table_options {
|
||||
create_table_expr
|
||||
@@ -787,7 +707,6 @@ impl Inserter {
|
||||
async fn create_physical_table(
|
||||
&self,
|
||||
mut create_table_expr: CreateTableExpr,
|
||||
partitions: Option<Partitions>,
|
||||
ctx: &QueryContextRef,
|
||||
statement_executor: &StatementExecutor,
|
||||
) -> Result<TableRef> {
|
||||
@@ -801,7 +720,7 @@ impl Inserter {
|
||||
info!("Table `{table_ref}` does not exist, try creating table");
|
||||
}
|
||||
let res = statement_executor
|
||||
.create_table_inner(&mut create_table_expr, partitions, ctx.clone())
|
||||
.create_table_inner(&mut create_table_expr, None, ctx.clone())
|
||||
.await;
|
||||
|
||||
let table_ref = TableReference::full(
|
||||
@@ -898,14 +817,12 @@ struct CreateAlterTableResult {
|
||||
table_infos: HashMap<TableId, Arc<TableInfo>>,
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
struct FlowMirrorTask {
|
||||
requests: HashMap<Peer, RegionInsertRequests>,
|
||||
num_rows: usize,
|
||||
}
|
||||
|
||||
impl FlowMirrorTask {
|
||||
#[allow(unused)]
|
||||
async fn new(
|
||||
cache: &TableFlownodeSetCacheRef,
|
||||
requests: impl Iterator<Item = &RegionInsertRequest>,
|
||||
@@ -979,7 +896,6 @@ impl FlowMirrorTask {
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
fn detach(self, node_manager: NodeManagerRef) -> Result<()> {
|
||||
crate::metrics::DIST_MIRROR_PENDING_ROW_COUNT.add(self.num_rows as i64);
|
||||
for (peer, inserts) in self.requests {
|
||||
|
||||
@@ -21,8 +21,14 @@ use table::TableRef;
|
||||
use crate::parser::QueryLanguageParser;
|
||||
use crate::{QueryEngineFactory, QueryEngineRef};
|
||||
|
||||
mod argmax_test;
|
||||
mod argmin_test;
|
||||
mod mean_test;
|
||||
mod my_sum_udaf_example;
|
||||
mod polyval_test;
|
||||
mod query_engine_test;
|
||||
mod scipy_stats_norm_cdf_test;
|
||||
mod scipy_stats_norm_pdf;
|
||||
mod time_range_filter_test;
|
||||
|
||||
mod function;
|
||||
|
||||
73
src/query/src/tests/argmax_test.rs
Normal file
73
src/query/src/tests/argmax_test.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::WrapperType;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::tests::{exec_selection, function};
|
||||
use crate::QueryEngine;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_argmax_aggregator() -> Result<()> {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let engine = function::create_query_engine();
|
||||
|
||||
macro_rules! test_argmax {
|
||||
([], $( { $T:ty } ),*) => {
|
||||
$(
|
||||
let column_name = format!("{}_number", std::any::type_name::<$T>());
|
||||
test_argmax_success::<$T>(&column_name, "numbers", engine.clone()).await?;
|
||||
)*
|
||||
}
|
||||
}
|
||||
for_all_primitive_types! { test_argmax }
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn test_argmax_success<T>(
|
||||
column_name: &str,
|
||||
table_name: &str,
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: WrapperType + PartialOrd,
|
||||
{
|
||||
let sql = format!("select ARGMAX({column_name}) as argmax from {table_name}");
|
||||
let result = exec_selection(engine.clone(), &sql).await;
|
||||
let value = function::get_value_from_batches("argmax", result);
|
||||
|
||||
let numbers =
|
||||
function::get_numbers_from_table::<T>(column_name, table_name, engine.clone()).await;
|
||||
let expected_value = match numbers.len() {
|
||||
0 => 0_u64,
|
||||
_ => {
|
||||
let mut index = 0;
|
||||
let mut max = numbers[0];
|
||||
for (i, &number) in numbers.iter().enumerate() {
|
||||
if max < number {
|
||||
max = number;
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
index as u64
|
||||
}
|
||||
};
|
||||
let expected_value = Value::from(expected_value);
|
||||
assert_eq!(value, expected_value);
|
||||
Ok(())
|
||||
}
|
||||
73
src/query/src/tests/argmin_test.rs
Normal file
73
src/query/src/tests/argmin_test.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::WrapperType;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::tests::{exec_selection, function};
|
||||
use crate::QueryEngine;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_argmin_aggregator() -> Result<()> {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let engine = function::create_query_engine();
|
||||
|
||||
macro_rules! test_argmin {
|
||||
([], $( { $T:ty } ),*) => {
|
||||
$(
|
||||
let column_name = format!("{}_number", std::any::type_name::<$T>());
|
||||
test_argmin_success::<$T>(&column_name, "numbers", engine.clone()).await?;
|
||||
)*
|
||||
}
|
||||
}
|
||||
for_all_primitive_types! { test_argmin }
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn test_argmin_success<T>(
|
||||
column_name: &str,
|
||||
table_name: &str,
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: WrapperType + PartialOrd,
|
||||
{
|
||||
let sql = format!("select argmin({column_name}) as argmin from {table_name}");
|
||||
let result = exec_selection(engine.clone(), &sql).await;
|
||||
let value = function::get_value_from_batches("argmin", result);
|
||||
|
||||
let numbers =
|
||||
function::get_numbers_from_table::<T>(column_name, table_name, engine.clone()).await;
|
||||
let expected_value = match numbers.len() {
|
||||
0 => 0_u32,
|
||||
_ => {
|
||||
let mut index = 0;
|
||||
let mut min = numbers[0];
|
||||
for (i, &number) in numbers.iter().enumerate() {
|
||||
if min > number {
|
||||
min = number;
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
index as u32
|
||||
}
|
||||
};
|
||||
let expected_value = Value::from(expected_value);
|
||||
assert_eq!(value, expected_value);
|
||||
Ok(())
|
||||
}
|
||||
@@ -16,14 +16,42 @@ use std::sync::Arc;
|
||||
|
||||
use common_function::scalars::vector::impl_conv::veclit_to_binlit;
|
||||
use common_recordbatch::RecordBatch;
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::BinaryVector;
|
||||
use datatypes::types::WrapperType;
|
||||
use datatypes::vectors::{BinaryVector, Helper};
|
||||
use rand::Rng;
|
||||
use table::test_util::MemTable;
|
||||
|
||||
use crate::tests::new_query_engine_with_table;
|
||||
use crate::QueryEngineRef;
|
||||
use crate::tests::{exec_selection, new_query_engine_with_table};
|
||||
use crate::{QueryEngine, QueryEngineRef};
|
||||
|
||||
pub fn create_query_engine() -> QueryEngineRef {
|
||||
let mut column_schemas = vec![];
|
||||
let mut columns = vec![];
|
||||
macro_rules! create_number_table {
|
||||
([], $( { $T:ty } ),*) => {
|
||||
$(
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let column_name = format!("{}_number", std::any::type_name::<$T>());
|
||||
let column_schema = ColumnSchema::new(column_name, Value::from(<$T>::default()).data_type(), true);
|
||||
column_schemas.push(column_schema);
|
||||
|
||||
let numbers = (1..=10).map(|_| rng.gen::<$T>()).collect::<Vec<$T>>();
|
||||
let column: VectorRef = Arc::new(<$T as Scalar>::VectorType::from_vec(numbers.to_vec()));
|
||||
columns.push(column);
|
||||
)*
|
||||
}
|
||||
}
|
||||
for_all_primitive_types! { create_number_table }
|
||||
|
||||
let schema = Arc::new(Schema::new(column_schemas.clone()));
|
||||
let recordbatch = RecordBatch::new(schema, columns).unwrap();
|
||||
let number_table = MemTable::table("numbers", recordbatch);
|
||||
new_query_engine_with_table(number_table)
|
||||
}
|
||||
|
||||
pub fn create_query_engine_for_vector10x3() -> QueryEngineRef {
|
||||
let mut column_schemas = vec![];
|
||||
@@ -53,6 +81,22 @@ pub fn create_query_engine_for_vector10x3() -> QueryEngineRef {
|
||||
new_query_engine_with_table(vector_table)
|
||||
}
|
||||
|
||||
pub async fn get_numbers_from_table<'s, T>(
|
||||
column_name: &'s str,
|
||||
table_name: &'s str,
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Vec<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
{
|
||||
let sql = format!("SELECT {column_name} FROM {table_name}");
|
||||
let numbers = exec_selection(engine, &sql).await;
|
||||
|
||||
let column = numbers[0].column(0);
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(column) };
|
||||
column.iter_data().flatten().collect::<Vec<T>>()
|
||||
}
|
||||
|
||||
pub fn get_value_from_batches(column_name: &str, batches: Vec<RecordBatch>) -> Value {
|
||||
assert_eq!(1, batches.len());
|
||||
assert_eq!(batches[0].num_columns(), 1);
|
||||
|
||||
68
src/query/src/tests/mean_test.rs
Normal file
68
src/query/src/tests/mean_test.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::WrapperType;
|
||||
use datatypes::value::OrderedFloat;
|
||||
use num_traits::AsPrimitive;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::tests::{exec_selection, function};
|
||||
use crate::QueryEngine;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mean_aggregator() -> Result<()> {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let engine = function::create_query_engine();
|
||||
|
||||
macro_rules! test_mean {
|
||||
([], $( { $T:ty } ),*) => {
|
||||
$(
|
||||
let column_name = format!("{}_number", std::any::type_name::<$T>());
|
||||
test_mean_success::<$T>(&column_name, "numbers", engine.clone()).await?;
|
||||
)*
|
||||
}
|
||||
}
|
||||
for_all_primitive_types! { test_mean }
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn test_mean_success<T>(
|
||||
column_name: &str,
|
||||
table_name: &str,
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: WrapperType + AsPrimitive<f64>,
|
||||
{
|
||||
let sql = format!("select MEAN({column_name}) as mean from {table_name}");
|
||||
let result = exec_selection(engine.clone(), &sql).await;
|
||||
let value = function::get_value_from_batches("mean", result);
|
||||
|
||||
let numbers =
|
||||
function::get_numbers_from_table::<T>(column_name, table_name, engine.clone()).await;
|
||||
let numbers = numbers.iter().map(|&n| n.as_()).collect::<Vec<f64>>();
|
||||
let expected = numbers.iter().sum::<f64>() / (numbers.len() as f64);
|
||||
let Value::Float64(OrderedFloat(value)) = value else {
|
||||
unreachable!()
|
||||
};
|
||||
assert!(
|
||||
(value - expected).abs() < 1e-3,
|
||||
"expected {expected}, actual {value}"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
70
src/query/src/tests/polyval_test.rs
Normal file
70
src/query/src/tests/polyval_test.rs
Normal file
@@ -0,0 +1,70 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::WrapperType;
|
||||
use num_traits::AsPrimitive;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::tests::{exec_selection, function};
|
||||
use crate::QueryEngine;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_polyval_aggregator() -> Result<()> {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let engine = function::create_query_engine();
|
||||
|
||||
macro_rules! test_polyval {
|
||||
([], $( { $T:ty } ),*) => {
|
||||
$(
|
||||
let column_name = format!("{}_number", std::any::type_name::<$T>());
|
||||
test_polyval_success::<$T, <<<$T as WrapperType>::LogicalType as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>(&column_name, "numbers", engine.clone()).await?;
|
||||
)*
|
||||
}
|
||||
}
|
||||
for_all_primitive_types! { test_polyval }
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn test_polyval_success<T, PolyT>(
|
||||
column_name: &str,
|
||||
table_name: &str,
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: WrapperType,
|
||||
PolyT: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native> + std::iter::Sum,
|
||||
i64: AsPrimitive<PolyT::Native>,
|
||||
{
|
||||
let sql = format!("select POLYVAL({column_name}, 0) as polyval from {table_name}");
|
||||
let result = exec_selection(engine.clone(), &sql).await;
|
||||
let value = function::get_value_from_batches("polyval", result);
|
||||
|
||||
let numbers =
|
||||
function::get_numbers_from_table::<T>(column_name, table_name, engine.clone()).await;
|
||||
let expected_value = numbers.iter().copied();
|
||||
let x = 0i64;
|
||||
let len = expected_value.len();
|
||||
let expected_native: PolyT::Native = expected_value
|
||||
.enumerate()
|
||||
.map(|(i, v)| v.into_native().as_() * (x.pow((len - 1 - i) as u32)).as_())
|
||||
.sum();
|
||||
assert_eq!(value, PolyT::from_native(expected_native).into());
|
||||
Ok(())
|
||||
}
|
||||
69
src/query/src/tests/scipy_stats_norm_cdf_test.rs
Normal file
69
src/query/src/tests/scipy_stats_norm_cdf_test.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::types::WrapperType;
|
||||
use num_traits::AsPrimitive;
|
||||
use statrs::distribution::{ContinuousCDF, Normal};
|
||||
use statrs::statistics::Statistics;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::tests::{exec_selection, function};
|
||||
use crate::QueryEngine;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_scipy_stats_norm_cdf_aggregator() -> Result<()> {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let engine = function::create_query_engine();
|
||||
|
||||
macro_rules! test_scipy_stats_norm_cdf {
|
||||
([], $( { $T:ty } ),*) => {
|
||||
$(
|
||||
let column_name = format!("{}_number", std::any::type_name::<$T>());
|
||||
test_scipy_stats_norm_cdf_success::<$T>(&column_name, "numbers", engine.clone()).await?;
|
||||
)*
|
||||
}
|
||||
}
|
||||
for_all_primitive_types! { test_scipy_stats_norm_cdf }
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn test_scipy_stats_norm_cdf_success<T>(
|
||||
column_name: &str,
|
||||
table_name: &str,
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: WrapperType + AsPrimitive<f64>,
|
||||
{
|
||||
let sql = format!(
|
||||
"select SCIPYSTATSNORMCDF({column_name},2.0) as scipy_stats_norm_cdf from {table_name}",
|
||||
);
|
||||
let result = exec_selection(engine.clone(), &sql).await;
|
||||
let value = function::get_value_from_batches("scipy_stats_norm_cdf", result);
|
||||
|
||||
let numbers =
|
||||
function::get_numbers_from_table::<T>(column_name, table_name, engine.clone()).await;
|
||||
let expected_value = numbers.iter().map(|&n| n.as_()).collect::<Vec<f64>>();
|
||||
let mean = expected_value.clone().mean();
|
||||
let stddev = expected_value.std_dev();
|
||||
|
||||
let n = Normal::new(mean, stddev).unwrap();
|
||||
let expected_value = n.cdf(2.0);
|
||||
|
||||
assert_eq!(value, expected_value.into());
|
||||
Ok(())
|
||||
}
|
||||
69
src/query/src/tests/scipy_stats_norm_pdf.rs
Normal file
69
src/query/src/tests/scipy_stats_norm_pdf.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::types::WrapperType;
|
||||
use num_traits::AsPrimitive;
|
||||
use statrs::distribution::{Continuous, Normal};
|
||||
use statrs::statistics::Statistics;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::tests::{exec_selection, function};
|
||||
use crate::QueryEngine;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_scipy_stats_norm_pdf_aggregator() -> Result<()> {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let engine = function::create_query_engine();
|
||||
|
||||
macro_rules! test_scipy_stats_norm_pdf {
|
||||
([], $( { $T:ty } ),*) => {
|
||||
$(
|
||||
let column_name = format!("{}_number", std::any::type_name::<$T>());
|
||||
test_scipy_stats_norm_pdf_success::<$T>(&column_name, "numbers", engine.clone()).await?;
|
||||
)*
|
||||
}
|
||||
}
|
||||
for_all_primitive_types! { test_scipy_stats_norm_pdf }
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn test_scipy_stats_norm_pdf_success<T>(
|
||||
column_name: &str,
|
||||
table_name: &str,
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: WrapperType + AsPrimitive<f64>,
|
||||
{
|
||||
let sql = format!(
|
||||
"select SCIPYSTATSNORMPDF({column_name},2.0) as scipy_stats_norm_pdf from {table_name}"
|
||||
);
|
||||
let result = exec_selection(engine.clone(), &sql).await;
|
||||
let value = function::get_value_from_batches("scipy_stats_norm_pdf", result);
|
||||
|
||||
let numbers =
|
||||
function::get_numbers_from_table::<T>(column_name, table_name, engine.clone()).await;
|
||||
let expected_value = numbers.iter().map(|&n| n.as_()).collect::<Vec<f64>>();
|
||||
let mean = expected_value.clone().mean();
|
||||
let stddev = expected_value.std_dev();
|
||||
|
||||
let n = Normal::new(mean, stddev).unwrap();
|
||||
let expected_value = n.pdf(2.0);
|
||||
|
||||
assert_eq!(value, expected_value.into());
|
||||
Ok(())
|
||||
}
|
||||
@@ -34,11 +34,11 @@ use crate::error::{
|
||||
};
|
||||
use crate::http::HttpRecordsOutput;
|
||||
use crate::metrics::METRIC_JAEGER_QUERY_ELAPSED;
|
||||
use crate::otlp::trace::{
|
||||
use crate::otlp::trace::v0::{
|
||||
DURATION_NANO_COLUMN, SERVICE_NAME_COLUMN, SPAN_ATTRIBUTES_COLUMN, SPAN_ID_COLUMN,
|
||||
SPAN_KIND_COLUMN, SPAN_KIND_PREFIX, SPAN_NAME_COLUMN, TIMESTAMP_COLUMN, TRACE_ID_COLUMN,
|
||||
TRACE_TABLE_NAME,
|
||||
};
|
||||
use crate::otlp::trace::TRACE_TABLE_NAME;
|
||||
use crate::query_handler::JaegerQueryHandlerRef;
|
||||
|
||||
/// JaegerAPIResponse is the response of Jaeger HTTP API.
|
||||
|
||||
@@ -15,12 +15,8 @@
|
||||
pub mod attributes;
|
||||
pub mod span;
|
||||
pub mod v0;
|
||||
pub mod v1;
|
||||
|
||||
use api::v1::RowInsertRequests;
|
||||
pub use common_catalog::consts::{
|
||||
PARENT_SPAN_ID_COLUMN, SPAN_ID_COLUMN, SPAN_NAME_COLUMN, TRACE_ID_COLUMN,
|
||||
};
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use pipeline::{GreptimePipelineParams, PipelineWay};
|
||||
use session::context::QueryContextRef;
|
||||
@@ -30,15 +26,6 @@ use crate::query_handler::PipelineHandlerRef;
|
||||
|
||||
pub const TRACE_TABLE_NAME: &str = "opentelemetry_traces";
|
||||
|
||||
pub const SERVICE_NAME_COLUMN: &str = "service_name";
|
||||
pub const TIMESTAMP_COLUMN: &str = "timestamp";
|
||||
pub const DURATION_NANO_COLUMN: &str = "duration_nano";
|
||||
pub const SPAN_KIND_COLUMN: &str = "span_kind";
|
||||
pub const SPAN_ATTRIBUTES_COLUMN: &str = "span_attributes";
|
||||
/// The span kind prefix in the database.
|
||||
/// If the span kind is `server`, it will be stored as `SPAN_KIND_SERVER` in the database.
|
||||
pub const SPAN_KIND_PREFIX: &str = "SPAN_KIND_";
|
||||
|
||||
/// Convert SpanTraces to GreptimeDB row insert requests.
|
||||
/// Returns `InsertRequests` and total number of rows to ingest
|
||||
pub fn to_grpc_insert_requests(
|
||||
@@ -58,16 +45,8 @@ pub fn to_grpc_insert_requests(
|
||||
query_ctx,
|
||||
pipeline_handler,
|
||||
),
|
||||
PipelineWay::OtlpTraceDirectV1 => v1::v1_to_grpc_insert_requests(
|
||||
request,
|
||||
pipeline,
|
||||
pipeline_params,
|
||||
table_name,
|
||||
query_ctx,
|
||||
pipeline_handler,
|
||||
),
|
||||
_ => NotSupportedSnafu {
|
||||
feat: "Unsupported pipeline for trace",
|
||||
feat: "Unsupported pipeline for logs",
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
|
||||
@@ -128,10 +128,6 @@ impl From<Attributes> for jsonb::Value<'static> {
|
||||
}
|
||||
|
||||
impl Attributes {
|
||||
pub fn take(self) -> Vec<KeyValue> {
|
||||
self.0
|
||||
}
|
||||
|
||||
pub fn get_ref(&self) -> &Vec<KeyValue> {
|
||||
&self.0
|
||||
}
|
||||
|
||||
@@ -16,8 +16,7 @@ use std::fmt::Display;
|
||||
|
||||
use common_time::timestamp::Timestamp;
|
||||
use itertools::Itertools;
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use opentelemetry_proto::tonic::common::v1::{any_value, InstrumentationScope, KeyValue};
|
||||
use opentelemetry_proto::tonic::common::v1::{InstrumentationScope, KeyValue};
|
||||
use opentelemetry_proto::tonic::trace::v1::span::{Event, Link};
|
||||
use opentelemetry_proto::tonic::trace::v1::{Span, Status};
|
||||
use serde::Serialize;
|
||||
@@ -231,51 +230,6 @@ pub fn status_to_string(status: &Option<Status>) -> (String, String) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert OpenTelemetry traces to SpanTraces
|
||||
///
|
||||
/// See
|
||||
/// <https://github.com/open-telemetry/opentelemetry-proto/blob/main/opentelemetry/proto/trace/v1/trace.proto>
|
||||
/// for data structure of OTLP traces.
|
||||
pub fn parse(request: ExportTraceServiceRequest) -> TraceSpans {
|
||||
let span_size = request
|
||||
.resource_spans
|
||||
.iter()
|
||||
.flat_map(|res| res.scope_spans.iter())
|
||||
.flat_map(|scope| scope.spans.iter())
|
||||
.count();
|
||||
let mut spans = Vec::with_capacity(span_size);
|
||||
for resource_spans in request.resource_spans {
|
||||
let resource_attrs = resource_spans
|
||||
.resource
|
||||
.map(|r| r.attributes)
|
||||
.unwrap_or_default();
|
||||
let service_name = resource_attrs
|
||||
.iter()
|
||||
.find_or_first(|kv| kv.key == "service.name")
|
||||
.and_then(|kv| kv.value.clone())
|
||||
.and_then(|v| match v.value {
|
||||
Some(any_value::Value::StringValue(s)) => Some(s),
|
||||
Some(any_value::Value::BytesValue(b)) => {
|
||||
Some(String::from_utf8_lossy(&b).to_string())
|
||||
}
|
||||
_ => None,
|
||||
});
|
||||
|
||||
for scope_spans in resource_spans.scope_spans {
|
||||
let scope = scope_spans.scope.unwrap_or_default();
|
||||
for span in scope_spans.spans {
|
||||
spans.push(parse_span(
|
||||
service_name.clone(),
|
||||
&resource_attrs,
|
||||
&scope,
|
||||
span,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
spans
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use opentelemetry_proto::tonic::trace::v1::Status;
|
||||
|
||||
@@ -15,15 +15,13 @@
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{ColumnDataType, RowInsertRequests};
|
||||
use common_grpc::precision::Precision;
|
||||
use itertools::Itertools;
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use opentelemetry_proto::tonic::common::v1::any_value;
|
||||
use pipeline::{GreptimePipelineParams, PipelineWay};
|
||||
use session::context::QueryContextRef;
|
||||
|
||||
use super::span::{parse, TraceSpan};
|
||||
use super::{
|
||||
DURATION_NANO_COLUMN, PARENT_SPAN_ID_COLUMN, SERVICE_NAME_COLUMN, SPAN_ATTRIBUTES_COLUMN,
|
||||
SPAN_ID_COLUMN, SPAN_KIND_COLUMN, SPAN_NAME_COLUMN, TIMESTAMP_COLUMN, TRACE_ID_COLUMN,
|
||||
};
|
||||
use super::span::{parse_span, TraceSpan, TraceSpans};
|
||||
use crate::error::Result;
|
||||
use crate::otlp::utils::{make_column_data, make_string_column_data};
|
||||
use crate::query_handler::PipelineHandlerRef;
|
||||
@@ -31,6 +29,64 @@ use crate::row_writer::{self, MultiTableData, TableData};
|
||||
|
||||
const APPROXIMATE_COLUMN_COUNT: usize = 24;
|
||||
|
||||
pub const SERVICE_NAME_COLUMN: &str = "service_name";
|
||||
pub const TRACE_ID_COLUMN: &str = "trace_id";
|
||||
pub const TIMESTAMP_COLUMN: &str = "timestamp";
|
||||
pub const DURATION_NANO_COLUMN: &str = "duration_nano";
|
||||
pub const SPAN_ID_COLUMN: &str = "span_id";
|
||||
pub const SPAN_NAME_COLUMN: &str = "span_name";
|
||||
pub const SPAN_KIND_COLUMN: &str = "span_kind";
|
||||
pub const SPAN_ATTRIBUTES_COLUMN: &str = "span_attributes";
|
||||
|
||||
/// The span kind prefix in the database.
|
||||
/// If the span kind is `server`, it will be stored as `SPAN_KIND_SERVER` in the database.
|
||||
pub const SPAN_KIND_PREFIX: &str = "SPAN_KIND_";
|
||||
|
||||
/// Convert OpenTelemetry traces to SpanTraces
|
||||
///
|
||||
/// See
|
||||
/// <https://github.com/open-telemetry/opentelemetry-proto/blob/main/opentelemetry/proto/trace/v1/trace.proto>
|
||||
/// for data structure of OTLP traces.
|
||||
pub fn parse(request: ExportTraceServiceRequest) -> TraceSpans {
|
||||
let span_size = request
|
||||
.resource_spans
|
||||
.iter()
|
||||
.flat_map(|res| res.scope_spans.iter())
|
||||
.flat_map(|scope| scope.spans.iter())
|
||||
.count();
|
||||
let mut spans = Vec::with_capacity(span_size);
|
||||
for resource_spans in request.resource_spans {
|
||||
let resource_attrs = resource_spans
|
||||
.resource
|
||||
.map(|r| r.attributes)
|
||||
.unwrap_or_default();
|
||||
let service_name = resource_attrs
|
||||
.iter()
|
||||
.find_or_first(|kv| kv.key == "service.name")
|
||||
.and_then(|kv| kv.value.clone())
|
||||
.and_then(|v| match v.value {
|
||||
Some(any_value::Value::StringValue(s)) => Some(s),
|
||||
Some(any_value::Value::BytesValue(b)) => {
|
||||
Some(String::from_utf8_lossy(&b).to_string())
|
||||
}
|
||||
_ => None,
|
||||
});
|
||||
|
||||
for scope_spans in resource_spans.scope_spans {
|
||||
let scope = scope_spans.scope.unwrap_or_default();
|
||||
for span in scope_spans.spans {
|
||||
spans.push(parse_span(
|
||||
service_name.clone(),
|
||||
&resource_attrs,
|
||||
&scope,
|
||||
span,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
spans
|
||||
}
|
||||
|
||||
/// Convert SpanTraces to GreptimeDB row insert requests.
|
||||
/// Returns `InsertRequests` and total number of rows to ingest
|
||||
pub fn v0_to_grpc_insert_requests(
|
||||
@@ -62,7 +118,7 @@ pub fn write_span_to_row(writer: &mut TableData, span: TraceSpan) -> Result<()>
|
||||
// write ts
|
||||
row_writer::write_ts_to_nanos(
|
||||
writer,
|
||||
TIMESTAMP_COLUMN,
|
||||
"timestamp",
|
||||
Some(span.start_in_nanosecond as i64),
|
||||
Precision::Nanosecond,
|
||||
&mut row,
|
||||
@@ -75,7 +131,7 @@ pub fn write_span_to_row(writer: &mut TableData, span: TraceSpan) -> Result<()>
|
||||
ValueData::TimestampNanosecondValue(span.end_in_nanosecond as i64),
|
||||
),
|
||||
make_column_data(
|
||||
DURATION_NANO_COLUMN,
|
||||
"duration_nano",
|
||||
ColumnDataType::Uint64,
|
||||
ValueData::U64Value(span.end_in_nanosecond - span.start_in_nanosecond),
|
||||
),
|
||||
@@ -83,14 +139,14 @@ pub fn write_span_to_row(writer: &mut TableData, span: TraceSpan) -> Result<()>
|
||||
row_writer::write_fields(writer, fields.into_iter(), &mut row)?;
|
||||
|
||||
if let Some(service_name) = span.service_name {
|
||||
row_writer::write_tag(writer, SERVICE_NAME_COLUMN, service_name, &mut row)?;
|
||||
row_writer::write_tag(writer, "service_name", service_name, &mut row)?;
|
||||
}
|
||||
|
||||
// tags
|
||||
let iter = vec![
|
||||
(TRACE_ID_COLUMN, span.trace_id),
|
||||
(SPAN_ID_COLUMN, span.span_id),
|
||||
(PARENT_SPAN_ID_COLUMN, span.parent_span_id),
|
||||
("trace_id", span.trace_id),
|
||||
("span_id", span.span_id),
|
||||
("parent_span_id", span.parent_span_id),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(col, val)| (col.to_string(), val));
|
||||
@@ -98,8 +154,8 @@ pub fn write_span_to_row(writer: &mut TableData, span: TraceSpan) -> Result<()>
|
||||
|
||||
// write fields
|
||||
let fields = vec![
|
||||
make_string_column_data(SPAN_KIND_COLUMN, span.span_kind),
|
||||
make_string_column_data(SPAN_NAME_COLUMN, span.span_name),
|
||||
make_string_column_data("span_kind", span.span_kind),
|
||||
make_string_column_data("span_name", span.span_name),
|
||||
make_string_column_data("span_status_code", span.span_status_code),
|
||||
make_string_column_data("span_status_message", span.span_status_message),
|
||||
make_string_column_data("trace_state", span.trace_state),
|
||||
@@ -108,7 +164,7 @@ pub fn write_span_to_row(writer: &mut TableData, span: TraceSpan) -> Result<()>
|
||||
|
||||
row_writer::write_json(
|
||||
writer,
|
||||
SPAN_ATTRIBUTES_COLUMN,
|
||||
"span_attributes",
|
||||
span.span_attributes.into(),
|
||||
&mut row,
|
||||
)?;
|
||||
|
||||
@@ -1,226 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use api::v1::value::ValueData;
|
||||
use api::v1::{ColumnDataType, RowInsertRequests, Value};
|
||||
use common_grpc::precision::Precision;
|
||||
use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
|
||||
use opentelemetry_proto::tonic::common::v1::any_value::Value as OtlpValue;
|
||||
use pipeline::{GreptimePipelineParams, PipelineWay};
|
||||
use session::context::QueryContextRef;
|
||||
|
||||
use super::attributes::Attributes;
|
||||
use super::span::{parse, TraceSpan};
|
||||
use super::{
|
||||
DURATION_NANO_COLUMN, PARENT_SPAN_ID_COLUMN, SERVICE_NAME_COLUMN, SPAN_ID_COLUMN,
|
||||
SPAN_KIND_COLUMN, SPAN_NAME_COLUMN, TIMESTAMP_COLUMN, TRACE_ID_COLUMN,
|
||||
};
|
||||
use crate::error::Result;
|
||||
use crate::otlp::utils::{any_value_to_jsonb, make_column_data, make_string_column_data};
|
||||
use crate::query_handler::PipelineHandlerRef;
|
||||
use crate::row_writer::{self, MultiTableData, TableData};
|
||||
|
||||
const APPROXIMATE_COLUMN_COUNT: usize = 30;
|
||||
|
||||
/// Convert SpanTraces to GreptimeDB row insert requests.
|
||||
/// Returns `InsertRequests` and total number of rows to ingest
|
||||
///
|
||||
/// Compared with v0, this v1 implementation:
|
||||
/// 1. flattens all attribute data into columns.
|
||||
/// 2. treat `span_id` and `parent_trace_id` as fields.
|
||||
/// 3. removed `service_name` column because it's already in
|
||||
/// `resource_attributes.service_name`
|
||||
///
|
||||
/// For other compound data structures like span_links and span_events here we
|
||||
/// are still using `json` data structure.
|
||||
pub fn v1_to_grpc_insert_requests(
|
||||
request: ExportTraceServiceRequest,
|
||||
_pipeline: PipelineWay,
|
||||
_pipeline_params: GreptimePipelineParams,
|
||||
table_name: String,
|
||||
_query_ctx: &QueryContextRef,
|
||||
_pipeline_handler: PipelineHandlerRef,
|
||||
) -> Result<(RowInsertRequests, usize)> {
|
||||
let spans = parse(request);
|
||||
let mut multi_table_writer = MultiTableData::default();
|
||||
|
||||
let one_table_writer = multi_table_writer.get_or_default_table_data(
|
||||
table_name,
|
||||
APPROXIMATE_COLUMN_COUNT,
|
||||
spans.len(),
|
||||
);
|
||||
|
||||
for span in spans {
|
||||
write_span_to_row(one_table_writer, span)?;
|
||||
}
|
||||
|
||||
Ok(multi_table_writer.into_row_insert_requests())
|
||||
}
|
||||
|
||||
pub fn write_span_to_row(writer: &mut TableData, span: TraceSpan) -> Result<()> {
|
||||
let mut row = writer.alloc_one_row();
|
||||
|
||||
// write ts
|
||||
row_writer::write_ts_to_nanos(
|
||||
writer,
|
||||
TIMESTAMP_COLUMN,
|
||||
Some(span.start_in_nanosecond as i64),
|
||||
Precision::Nanosecond,
|
||||
&mut row,
|
||||
)?;
|
||||
// write ts fields
|
||||
let fields = vec![
|
||||
make_column_data(
|
||||
"timestamp_end",
|
||||
ColumnDataType::TimestampNanosecond,
|
||||
ValueData::TimestampNanosecondValue(span.end_in_nanosecond as i64),
|
||||
),
|
||||
make_column_data(
|
||||
DURATION_NANO_COLUMN,
|
||||
ColumnDataType::Uint64,
|
||||
ValueData::U64Value(span.end_in_nanosecond - span.start_in_nanosecond),
|
||||
),
|
||||
];
|
||||
row_writer::write_fields(writer, fields.into_iter(), &mut row)?;
|
||||
|
||||
// tags
|
||||
let tags = vec![
|
||||
(TRACE_ID_COLUMN.to_string(), span.trace_id),
|
||||
(SPAN_ID_COLUMN.to_string(), span.span_id),
|
||||
];
|
||||
row_writer::write_tags(writer, tags.into_iter(), &mut row)?;
|
||||
|
||||
// write fields
|
||||
let fields = vec![
|
||||
make_string_column_data(PARENT_SPAN_ID_COLUMN, span.parent_span_id),
|
||||
make_string_column_data(SPAN_KIND_COLUMN, span.span_kind),
|
||||
make_string_column_data(SPAN_NAME_COLUMN, span.span_name),
|
||||
make_string_column_data("span_status_code", span.span_status_code),
|
||||
make_string_column_data("span_status_message", span.span_status_message),
|
||||
make_string_column_data("trace_state", span.trace_state),
|
||||
make_string_column_data("scope_name", span.scope_name),
|
||||
make_string_column_data("scope_version", span.scope_version),
|
||||
];
|
||||
row_writer::write_fields(writer, fields.into_iter(), &mut row)?;
|
||||
|
||||
if let Some(service_name) = span.service_name {
|
||||
row_writer::write_fields(
|
||||
writer,
|
||||
std::iter::once(make_string_column_data(SERVICE_NAME_COLUMN, service_name)),
|
||||
&mut row,
|
||||
)?;
|
||||
}
|
||||
|
||||
write_attributes(writer, "span_attributes", span.span_attributes, &mut row)?;
|
||||
write_attributes(writer, "scope_attributes", span.scope_attributes, &mut row)?;
|
||||
write_attributes(
|
||||
writer,
|
||||
"resource_attributes",
|
||||
span.resource_attributes,
|
||||
&mut row,
|
||||
)?;
|
||||
|
||||
row_writer::write_json(writer, "span_events", span.span_events.into(), &mut row)?;
|
||||
row_writer::write_json(writer, "span_links", span.span_links.into(), &mut row)?;
|
||||
|
||||
writer.add_row(row);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_attributes(
|
||||
writer: &mut TableData,
|
||||
prefix: &str,
|
||||
attributes: Attributes,
|
||||
row: &mut Vec<Value>,
|
||||
) -> Result<()> {
|
||||
for attr in attributes.take().into_iter() {
|
||||
let key_suffix = attr.key;
|
||||
// skip resource_attributes.service.name because its already copied to
|
||||
// top level as `SERVICE_NAME_COLUMN`
|
||||
if prefix == "resource_attributes" && key_suffix == "service.name" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let key = format!("{}.{}", prefix, key_suffix);
|
||||
match attr.value.and_then(|v| v.value) {
|
||||
Some(OtlpValue::StringValue(v)) => {
|
||||
row_writer::write_fields(
|
||||
writer,
|
||||
std::iter::once(make_string_column_data(&key, v)),
|
||||
row,
|
||||
)?;
|
||||
}
|
||||
Some(OtlpValue::BoolValue(v)) => {
|
||||
row_writer::write_fields(
|
||||
writer,
|
||||
std::iter::once(make_column_data(
|
||||
&key,
|
||||
ColumnDataType::Boolean,
|
||||
ValueData::BoolValue(v),
|
||||
)),
|
||||
row,
|
||||
)?;
|
||||
}
|
||||
Some(OtlpValue::IntValue(v)) => {
|
||||
row_writer::write_fields(
|
||||
writer,
|
||||
std::iter::once(make_column_data(
|
||||
&key,
|
||||
ColumnDataType::Int64,
|
||||
ValueData::I64Value(v),
|
||||
)),
|
||||
row,
|
||||
)?;
|
||||
}
|
||||
Some(OtlpValue::DoubleValue(v)) => {
|
||||
row_writer::write_fields(
|
||||
writer,
|
||||
std::iter::once(make_column_data(
|
||||
&key,
|
||||
ColumnDataType::Float64,
|
||||
ValueData::F64Value(v),
|
||||
)),
|
||||
row,
|
||||
)?;
|
||||
}
|
||||
Some(OtlpValue::ArrayValue(v)) => row_writer::write_json(
|
||||
writer,
|
||||
key,
|
||||
any_value_to_jsonb(OtlpValue::ArrayValue(v)),
|
||||
row,
|
||||
)?,
|
||||
Some(OtlpValue::KvlistValue(v)) => row_writer::write_json(
|
||||
writer,
|
||||
key,
|
||||
any_value_to_jsonb(OtlpValue::KvlistValue(v)),
|
||||
row,
|
||||
)?,
|
||||
Some(OtlpValue::BytesValue(v)) => {
|
||||
row_writer::write_fields(
|
||||
writer,
|
||||
std::iter::once(make_column_data(
|
||||
&key,
|
||||
ColumnDataType::Binary,
|
||||
ValueData::BinaryValue(v),
|
||||
)),
|
||||
row,
|
||||
)?;
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -22,7 +22,6 @@ pub mod dialect;
|
||||
pub mod error;
|
||||
pub mod parser;
|
||||
pub mod parsers;
|
||||
pub mod partition;
|
||||
pub mod statements;
|
||||
pub mod util;
|
||||
|
||||
|
||||
@@ -2475,13 +2475,4 @@ CREATE TABLE log (
|
||||
assert!(extensions.fulltext_index_options.is_some());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_interval_cast() {
|
||||
let s = "select '10s'::INTERVAL";
|
||||
let stmts =
|
||||
ParserContext::create_with_dialect(s, &GreptimeDbDialect {}, ParseOptions::default())
|
||||
.unwrap();
|
||||
assert_eq!("SELECT '10 seconds'::INTERVAL", &stmts[0].to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,165 +0,0 @@
|
||||
// Copyright 2023 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use sqlparser::ast::{BinaryOperator, Expr, Ident, Value};
|
||||
|
||||
use crate::statements::create::Partitions;
|
||||
|
||||
macro_rules! between_string {
|
||||
($col: expr, $left_incl: expr, $right_excl: expr) => {
|
||||
Expr::BinaryOp {
|
||||
op: BinaryOperator::And,
|
||||
left: Box::new(Expr::BinaryOp {
|
||||
op: BinaryOperator::GtEq,
|
||||
left: Box::new($col.clone()),
|
||||
right: Box::new(Expr::Value(Value::SingleQuotedString(
|
||||
$left_incl.to_string(),
|
||||
))),
|
||||
}),
|
||||
right: Box::new(Expr::BinaryOp {
|
||||
op: BinaryOperator::Lt,
|
||||
left: Box::new($col.clone()),
|
||||
right: Box::new(Expr::Value(Value::SingleQuotedString(
|
||||
$right_excl.to_string(),
|
||||
))),
|
||||
}),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! or {
|
||||
($left: expr, $right: expr) => {
|
||||
Expr::BinaryOp {
|
||||
op: BinaryOperator::Or,
|
||||
left: Box::new($left),
|
||||
right: Box::new($right),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn partition_rule_for_hexstring(ident: &str) -> Partitions {
|
||||
let ident = Ident::new(ident);
|
||||
let ident_expr = Expr::Identifier(ident.clone());
|
||||
|
||||
// rules are like:
|
||||
//
|
||||
// "trace_id < '1'",
|
||||
// "trace_id >= '1' AND trace_id < '2'",
|
||||
// "trace_id >= '2' AND trace_id < '3'",
|
||||
// "trace_id >= '3' AND trace_id < '4'",
|
||||
// "trace_id >= '4' AND trace_id < '5'",
|
||||
// "trace_id >= '5' AND trace_id < '6'",
|
||||
// "trace_id >= '6' AND trace_id < '7'",
|
||||
// "trace_id >= '7' AND trace_id < '8'",
|
||||
// "trace_id >= '8' AND trace_id < '9'",
|
||||
// "trace_id >= '9' AND trace_id < 'A'",
|
||||
// "trace_id >= 'A' AND trace_id < 'B' OR trace_id >= 'a' AND trace_id < 'b'",
|
||||
// "trace_id >= 'B' AND trace_id < 'C' OR trace_id >= 'b' AND trace_id < 'c'",
|
||||
// "trace_id >= 'C' AND trace_id < 'D' OR trace_id >= 'c' AND trace_id < 'd'",
|
||||
// "trace_id >= 'D' AND trace_id < 'E' OR trace_id >= 'd' AND trace_id < 'e'",
|
||||
// "trace_id >= 'E' AND trace_id < 'F' OR trace_id >= 'e' AND trace_id < 'f'",
|
||||
// "trace_id >= 'F' AND trace_id < 'a' OR trace_id >= 'f'",
|
||||
let rules = vec![
|
||||
Expr::BinaryOp {
|
||||
left: Box::new(ident_expr.clone()),
|
||||
op: BinaryOperator::Lt,
|
||||
right: Box::new(Expr::Value(Value::SingleQuotedString("1".to_string()))),
|
||||
},
|
||||
// [left, right)
|
||||
between_string!(ident_expr, "1", "2"),
|
||||
between_string!(ident_expr, "2", "3"),
|
||||
between_string!(ident_expr, "3", "4"),
|
||||
between_string!(ident_expr, "4", "5"),
|
||||
between_string!(ident_expr, "5", "6"),
|
||||
between_string!(ident_expr, "6", "7"),
|
||||
between_string!(ident_expr, "7", "8"),
|
||||
between_string!(ident_expr, "8", "9"),
|
||||
between_string!(ident_expr, "9", "A"),
|
||||
or!(
|
||||
between_string!(ident_expr, "A", "B"),
|
||||
between_string!(ident_expr, "a", "b")
|
||||
),
|
||||
or!(
|
||||
between_string!(ident_expr, "B", "C"),
|
||||
between_string!(ident_expr, "b", "c")
|
||||
),
|
||||
or!(
|
||||
between_string!(ident_expr, "C", "D"),
|
||||
between_string!(ident_expr, "c", "d")
|
||||
),
|
||||
or!(
|
||||
between_string!(ident_expr, "D", "E"),
|
||||
between_string!(ident_expr, "d", "e")
|
||||
),
|
||||
or!(
|
||||
between_string!(ident_expr, "E", "F"),
|
||||
between_string!(ident_expr, "e", "f")
|
||||
),
|
||||
or!(
|
||||
between_string!(ident_expr, "F", "a"),
|
||||
Expr::BinaryOp {
|
||||
left: Box::new(ident_expr.clone()),
|
||||
op: BinaryOperator::GtEq,
|
||||
right: Box::new(Expr::Value(Value::SingleQuotedString("f".to_string()))),
|
||||
}
|
||||
),
|
||||
];
|
||||
|
||||
Partitions {
|
||||
column_list: vec![ident],
|
||||
exprs: rules,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use sqlparser::ast::Expr;
|
||||
use sqlparser::dialect::GenericDialect;
|
||||
use sqlparser::parser::Parser;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_rules() {
|
||||
let expr = vec![
|
||||
"trace_id < '1'",
|
||||
"trace_id >= '1' AND trace_id < '2'",
|
||||
"trace_id >= '2' AND trace_id < '3'",
|
||||
"trace_id >= '3' AND trace_id < '4'",
|
||||
"trace_id >= '4' AND trace_id < '5'",
|
||||
"trace_id >= '5' AND trace_id < '6'",
|
||||
"trace_id >= '6' AND trace_id < '7'",
|
||||
"trace_id >= '7' AND trace_id < '8'",
|
||||
"trace_id >= '8' AND trace_id < '9'",
|
||||
"trace_id >= '9' AND trace_id < 'A'",
|
||||
"trace_id >= 'A' AND trace_id < 'B' OR trace_id >= 'a' AND trace_id < 'b'",
|
||||
"trace_id >= 'B' AND trace_id < 'C' OR trace_id >= 'b' AND trace_id < 'c'",
|
||||
"trace_id >= 'C' AND trace_id < 'D' OR trace_id >= 'c' AND trace_id < 'd'",
|
||||
"trace_id >= 'D' AND trace_id < 'E' OR trace_id >= 'd' AND trace_id < 'e'",
|
||||
"trace_id >= 'E' AND trace_id < 'F' OR trace_id >= 'e' AND trace_id < 'f'",
|
||||
"trace_id >= 'F' AND trace_id < 'a' OR trace_id >= 'f'",
|
||||
];
|
||||
|
||||
let dialect = GenericDialect {};
|
||||
let results = expr
|
||||
.into_iter()
|
||||
.map(|s| {
|
||||
let mut parser = Parser::new(&dialect).try_with_sql(s).unwrap();
|
||||
parser.parse_expr().unwrap()
|
||||
})
|
||||
.collect::<Vec<Expr>>();
|
||||
|
||||
assert_eq!(results, partition_rule_for_hexstring("trace_id").exprs);
|
||||
}
|
||||
}
|
||||
@@ -99,21 +99,15 @@ impl TransformRule for ExpandIntervalTransformRule {
|
||||
Expr::Cast {
|
||||
expr: cast_exp,
|
||||
data_type,
|
||||
kind,
|
||||
format,
|
||||
..
|
||||
} => {
|
||||
if DataType::Interval == *data_type {
|
||||
match &**cast_exp {
|
||||
Expr::Value(Value::SingleQuotedString(value))
|
||||
| Expr::Value(Value::DoubleQuotedString(value)) => {
|
||||
let interval_value =
|
||||
let interval_name =
|
||||
normalize_interval_name(value).unwrap_or_else(|| value.to_string());
|
||||
*expr = Expr::Cast {
|
||||
kind: kind.clone(),
|
||||
expr: single_quoted_string_expr(interval_value),
|
||||
data_type: DataType::Interval,
|
||||
format: std::mem::take(format),
|
||||
}
|
||||
*expr = create_interval(single_quoted_string_expr(interval_name));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -129,6 +123,16 @@ fn single_quoted_string_expr(string: String) -> Box<Expr> {
|
||||
Box::new(Expr::Value(Value::SingleQuotedString(string)))
|
||||
}
|
||||
|
||||
fn create_interval(value: Box<Expr>) -> Expr {
|
||||
Expr::Interval(Interval {
|
||||
value,
|
||||
leading_field: None,
|
||||
leading_precision: None,
|
||||
last_field: None,
|
||||
fractional_seconds_precision: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn update_existing_interval_with_value(interval: &Interval, value: Box<Expr>) -> Expr {
|
||||
Expr::Interval(Interval {
|
||||
value,
|
||||
@@ -195,23 +199,14 @@ fn expand_interval_abbreviation(interval_str: &str) -> Option<String> {
|
||||
mod tests {
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use sqlparser::ast::{BinaryOperator, CastKind, DataType, Expr, Interval, Value};
|
||||
use sqlparser::ast::{BinaryOperator, DataType, Expr, Interval, Value};
|
||||
|
||||
use crate::statements::transform::expand_interval::{
|
||||
normalize_interval_name, single_quoted_string_expr, ExpandIntervalTransformRule,
|
||||
create_interval, normalize_interval_name, single_quoted_string_expr,
|
||||
ExpandIntervalTransformRule,
|
||||
};
|
||||
use crate::statements::transform::TransformRule;
|
||||
|
||||
fn create_interval(value: Box<Expr>) -> Expr {
|
||||
Expr::Interval(Interval {
|
||||
value,
|
||||
leading_field: None,
|
||||
leading_precision: None,
|
||||
last_field: None,
|
||||
fractional_seconds_precision: None,
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_transform_interval_basic_conversions() {
|
||||
let test_cases = vec![
|
||||
@@ -384,14 +379,15 @@ mod tests {
|
||||
assert_eq!(control_flow, ControlFlow::Continue(()));
|
||||
assert_eq!(
|
||||
cast_to_interval_expr,
|
||||
Expr::Cast {
|
||||
kind: CastKind::Cast,
|
||||
expr: Box::new(Expr::Value(Value::SingleQuotedString(
|
||||
Expr::Interval(Interval {
|
||||
value: Box::new(Expr::Value(Value::SingleQuotedString(
|
||||
"3 years 2 months".to_string()
|
||||
))),
|
||||
data_type: DataType::Interval,
|
||||
format: None,
|
||||
}
|
||||
leading_field: None,
|
||||
leading_precision: None,
|
||||
last_field: None,
|
||||
fractional_seconds_precision: None,
|
||||
})
|
||||
);
|
||||
|
||||
let mut cast_to_i64_expr = Expr::Cast {
|
||||
|
||||
@@ -40,7 +40,7 @@ use common_procedure::options::ProcedureConfig;
|
||||
use common_procedure::ProcedureManagerRef;
|
||||
use common_wal::config::{DatanodeWalConfig, MetasrvWalConfig};
|
||||
use datanode::datanode::DatanodeBuilder;
|
||||
use flow::{FlownodeBuilder, FrontendClient};
|
||||
use flow::FlownodeBuilder;
|
||||
use frontend::instance::builder::FrontendBuilder;
|
||||
use frontend::instance::{FrontendInstance, Instance, StandaloneDatanodeManager};
|
||||
use meta_srv::metasrv::{FLOW_ID_SEQ, TABLE_ID_SEQ};
|
||||
@@ -164,15 +164,12 @@ impl GreptimeDbStandaloneBuilder {
|
||||
Some(procedure_manager.clone()),
|
||||
);
|
||||
|
||||
let fe_server_addr = opts.frontend_options().grpc.bind_addr.clone();
|
||||
let frontend_client = FrontendClient::from_static_grpc_addr(fe_server_addr);
|
||||
let flow_builder = FlownodeBuilder::new(
|
||||
Default::default(),
|
||||
plugins.clone(),
|
||||
table_metadata_manager.clone(),
|
||||
catalog_manager.clone(),
|
||||
flow_metadata_manager.clone(),
|
||||
Arc::new(frontend_client),
|
||||
);
|
||||
let flownode = Arc::new(flow_builder.build().await.unwrap());
|
||||
|
||||
|
||||
@@ -96,8 +96,7 @@ macro_rules! http_tests {
|
||||
test_pipeline_dispatcher,
|
||||
|
||||
test_otlp_metrics,
|
||||
test_otlp_traces_v0,
|
||||
test_otlp_traces_v1,
|
||||
test_otlp_traces,
|
||||
test_otlp_logs,
|
||||
test_loki_pb_logs,
|
||||
test_loki_json_logs,
|
||||
@@ -1321,7 +1320,7 @@ transform:
|
||||
|
||||
// 3. check schema
|
||||
|
||||
let expected_schema = "[[\"logs1\",\"CREATE TABLE IF NOT EXISTS \\\"logs1\\\" (\\n \\\"id1\\\" INT NULL,\\n \\\"id2\\\" INT NULL,\\n \\\"logger\\\" STRING NULL,\\n \\\"type\\\" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'),\\n \\\"log\\\" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'false'),\\n \\\"time\\\" TIMESTAMP(9) NOT NULL,\\n TIME INDEX (\\\"time\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]";
|
||||
let expected_schema = "[[\"logs1\",\"CREATE TABLE IF NOT EXISTS \\\"logs1\\\" (\\n \\\"id1\\\" INT NULL,\\n \\\"id2\\\" INT NULL,\\n \\\"logger\\\" STRING NULL,\\n \\\"type\\\" STRING NULL SKIPPING INDEX WITH(granularity = '0', type = 'BLOOM'),\\n \\\"log\\\" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'false'),\\n \\\"time\\\" TIMESTAMP(9) NOT NULL,\\n TIME INDEX (\\\"time\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]";
|
||||
validate_data(
|
||||
"pipeline_schema",
|
||||
&client,
|
||||
@@ -2054,7 +2053,7 @@ pub async fn test_otlp_metrics(store_type: StorageType) {
|
||||
guard.remove_all().await;
|
||||
}
|
||||
|
||||
pub async fn test_otlp_traces_v0(store_type: StorageType) {
|
||||
pub async fn test_otlp_traces(store_type: StorageType) {
|
||||
// init
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "test_otlp_traces").await;
|
||||
@@ -2126,99 +2125,6 @@ pub async fn test_otlp_traces_v0(store_type: StorageType) {
|
||||
guard.remove_all().await;
|
||||
}
|
||||
|
||||
pub async fn test_otlp_traces_v1(store_type: StorageType) {
|
||||
// init
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "test_otlp_traces").await;
|
||||
const TRACE_V1: &str = "greptime_trace_v1";
|
||||
|
||||
let content = r#"
|
||||
{"resourceSpans":[{"resource":{"attributes":[{"key":"service.name","value":{"stringValue":"telemetrygen"}}],"droppedAttributesCount":0},"scopeSpans":[{"scope":{"name":"telemetrygen","version":"","attributes":[],"droppedAttributesCount":0},"spans":[{"traceId":"c05d7a4ec8e1f231f02ed6e8da8655b4","spanId":"9630f2916e2f7909","traceState":"","parentSpanId":"d24f921c75f68e23","flags":256,"name":"okey-dokey-0","kind":2,"startTimeUnixNano":"1736480942444376000","endTimeUnixNano":"1736480942444499000","attributes":[{"key":"net.peer.ip","value":{"stringValue":"1.2.3.4"}},{"key":"peer.service","value":{"stringValue":"telemetrygen-client"}}],"droppedAttributesCount":0,"events":[],"droppedEventsCount":0,"links":[],"droppedLinksCount":0,"status":{"message":"","code":0}},{"traceId":"c05d7a4ec8e1f231f02ed6e8da8655b4","spanId":"d24f921c75f68e23","traceState":"","parentSpanId":"","flags":256,"name":"lets-go","kind":3,"startTimeUnixNano":"1736480942444376000","endTimeUnixNano":"1736480942444499000","attributes":[{"key":"net.peer.ip","value":{"stringValue":"1.2.3.4"}},{"key":"peer.service","value":{"stringValue":"telemetrygen-server"}}],"droppedAttributesCount":0,"events":[],"droppedEventsCount":0,"links":[],"droppedLinksCount":0,"status":{"message":"","code":0}},{"traceId":"cc9e0991a2e63d274984bd44ee669203","spanId":"8f847259b0f6e1ab","traceState":"","parentSpanId":"eba7be77e3558179","flags":256,"name":"okey-dokey-0","kind":2,"startTimeUnixNano":"1736480942444589000","endTimeUnixNano":"1736480942444712000","attributes":[{"key":"net.peer.ip","value":{"stringValue":"1.2.3.4"}},{"key":"peer.service","value":{"stringValue":"telemetrygen-client"}}],"droppedAttributesCount":0,"events":[],"droppedEventsCount":0,"links":[],"droppedLinksCount":0,"status":{"message":"","code":0}},{"traceId":"cc9e0991a2e63d274984bd44ee669203","spanId":"eba7be77e3558179","traceState":"","parentSpanId":"","flags":256,"name":"lets-go","kind":3,"startTimeUnixNano":"1736480942444589000","endTimeUnixNano":"1736480942444712000","attributes":[{"key":"net.peer.ip","value":{"stringValue":"1.2.3.4"}},{"key":"peer.service","value":{"stringValue":"telemetrygen-server"}}],"droppedAttributesCount":0,"events":[],"droppedEventsCount":0,"links":[],"droppedLinksCount":0,"status":{"message":"","code":0}}],"schemaUrl":""}],"schemaUrl":"https://opentelemetry.io/schemas/1.4.0"}]}
|
||||
"#;
|
||||
|
||||
let req: ExportTraceServiceRequest = serde_json::from_str(content).unwrap();
|
||||
let body = req.encode_to_vec();
|
||||
|
||||
// handshake
|
||||
let client = TestClient::new(app).await;
|
||||
|
||||
// write traces data
|
||||
let res = send_req(
|
||||
&client,
|
||||
vec![
|
||||
(
|
||||
HeaderName::from_static("content-type"),
|
||||
HeaderValue::from_static("application/x-protobuf"),
|
||||
),
|
||||
(
|
||||
HeaderName::from_static("x-greptime-log-pipeline-name"),
|
||||
HeaderValue::from_static(TRACE_V1),
|
||||
),
|
||||
(
|
||||
HeaderName::from_static("x-greptime-trace-table-name"),
|
||||
HeaderValue::from_static("mytable"),
|
||||
),
|
||||
],
|
||||
"/v1/otlp/v1/traces",
|
||||
body.clone(),
|
||||
false,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(StatusCode::OK, res.status());
|
||||
|
||||
// select traces data
|
||||
let expected = r#"[[1736480942444376000,1736480942444499000,123000,"c05d7a4ec8e1f231f02ed6e8da8655b4","9630f2916e2f7909","d24f921c75f68e23","SPAN_KIND_SERVER","okey-dokey-0","STATUS_CODE_UNSET","","","telemetrygen","","telemetrygen","1.2.3.4","telemetrygen-client",[],[]],[1736480942444376000,1736480942444499000,123000,"c05d7a4ec8e1f231f02ed6e8da8655b4","d24f921c75f68e23","","SPAN_KIND_CLIENT","lets-go","STATUS_CODE_UNSET","","","telemetrygen","","telemetrygen","1.2.3.4","telemetrygen-server",[],[]],[1736480942444589000,1736480942444712000,123000,"cc9e0991a2e63d274984bd44ee669203","8f847259b0f6e1ab","eba7be77e3558179","SPAN_KIND_SERVER","okey-dokey-0","STATUS_CODE_UNSET","","","telemetrygen","","telemetrygen","1.2.3.4","telemetrygen-client",[],[]],[1736480942444589000,1736480942444712000,123000,"cc9e0991a2e63d274984bd44ee669203","eba7be77e3558179","","SPAN_KIND_CLIENT","lets-go","STATUS_CODE_UNSET","","","telemetrygen","","telemetrygen","1.2.3.4","telemetrygen-server",[],[]]]"#;
|
||||
validate_data("otlp_traces", &client, "select * from mytable;", expected).await;
|
||||
|
||||
let expected_ddl = r#"[["mytable","CREATE TABLE IF NOT EXISTS \"mytable\" (\n \"timestamp\" TIMESTAMP(9) NOT NULL,\n \"timestamp_end\" TIMESTAMP(9) NULL,\n \"duration_nano\" BIGINT UNSIGNED NULL,\n \"trace_id\" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'),\n \"span_id\" STRING NULL,\n \"parent_span_id\" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'),\n \"span_kind\" STRING NULL,\n \"span_name\" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'),\n \"span_status_code\" STRING NULL,\n \"span_status_message\" STRING NULL,\n \"trace_state\" STRING NULL,\n \"scope_name\" STRING NULL,\n \"scope_version\" STRING NULL,\n \"service_name\" STRING NULL,\n \"span_attributes.net.peer.ip\" STRING NULL,\n \"span_attributes.peer.service\" STRING NULL,\n \"span_events\" JSON NULL,\n \"span_links\" JSON NULL,\n TIME INDEX (\"timestamp\"),\n PRIMARY KEY (\"trace_id\", \"span_id\")\n)\nPARTITION ON COLUMNS (\"trace_id\") (\n trace_id < '1',\n trace_id >= '1' AND trace_id < '2',\n trace_id >= '2' AND trace_id < '3',\n trace_id >= '3' AND trace_id < '4',\n trace_id >= '4' AND trace_id < '5',\n trace_id >= '5' AND trace_id < '6',\n trace_id >= '6' AND trace_id < '7',\n trace_id >= '7' AND trace_id < '8',\n trace_id >= '8' AND trace_id < '9',\n trace_id >= '9' AND trace_id < 'A',\n trace_id >= 'A' AND trace_id < 'B' OR trace_id >= 'a' AND trace_id < 'b',\n trace_id >= 'B' AND trace_id < 'C' OR trace_id >= 'b' AND trace_id < 'c',\n trace_id >= 'C' AND trace_id < 'D' OR trace_id >= 'c' AND trace_id < 'd',\n trace_id >= 'D' AND trace_id < 'E' OR trace_id >= 'd' AND trace_id < 'e',\n trace_id >= 'E' AND trace_id < 'F' OR trace_id >= 'e' AND trace_id < 'f',\n trace_id >= 'F' AND trace_id < 'a' OR trace_id >= 'f'\n)\nENGINE=mito\nWITH(\n append_mode = 'true'\n)"]]"#;
|
||||
validate_data(
|
||||
"otlp_traces",
|
||||
&client,
|
||||
"show create table mytable;",
|
||||
expected_ddl,
|
||||
)
|
||||
.await;
|
||||
|
||||
// drop table
|
||||
let res = client.get("/v1/sql?sql=drop table mytable;").send().await;
|
||||
assert_eq!(res.status(), StatusCode::OK);
|
||||
|
||||
// write traces data with gzip
|
||||
let res = send_req(
|
||||
&client,
|
||||
vec![
|
||||
(
|
||||
HeaderName::from_static("content-type"),
|
||||
HeaderValue::from_static("application/x-protobuf"),
|
||||
),
|
||||
(
|
||||
HeaderName::from_static("x-greptime-log-pipeline-name"),
|
||||
HeaderValue::from_static(TRACE_V1),
|
||||
),
|
||||
(
|
||||
HeaderName::from_static("x-greptime-trace-table-name"),
|
||||
HeaderValue::from_static("mytable"),
|
||||
),
|
||||
],
|
||||
"/v1/otlp/v1/traces",
|
||||
body.clone(),
|
||||
true,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(StatusCode::OK, res.status());
|
||||
|
||||
// select traces data again
|
||||
validate_data(
|
||||
"otlp_traces_with_gzip",
|
||||
&client,
|
||||
"select * from mytable;",
|
||||
expected,
|
||||
)
|
||||
.await;
|
||||
|
||||
guard.remove_all().await;
|
||||
}
|
||||
|
||||
pub async fn test_otlp_logs(store_type: StorageType) {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let (app, mut guard) = setup_test_http_app_with_frontend(store_type, "test_otlp_logs").await;
|
||||
|
||||
@@ -25,11 +25,11 @@ SELECT INTERVAL '-2 months';
|
||||
|
||||
SELECT '3 hours'::INTERVAL;
|
||||
|
||||
+-----------------+
|
||||
| Utf8("3 hours") |
|
||||
+-----------------+
|
||||
| 3 hours |
|
||||
+-----------------+
|
||||
+--------------------------------------------------------------------------------------------------+
|
||||
| IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 10800000000000 }") |
|
||||
+--------------------------------------------------------------------------------------------------+
|
||||
| 3 hours |
|
||||
+--------------------------------------------------------------------------------------------------+
|
||||
|
||||
SELECT INTERVAL '1 year 2 months 3 days 4 hours' + INTERVAL '1 year';
|
||||
|
||||
@@ -128,11 +128,11 @@ SELECT INTERVAL '1y2w3d4h';
|
||||
|
||||
SELECT '3y2mon'::INTERVAL;
|
||||
|
||||
+--------------------------+
|
||||
| Utf8("3 years 2 months") |
|
||||
+--------------------------+
|
||||
| 38 mons |
|
||||
+--------------------------+
|
||||
+--------------------------------------------------------------------------------------+
|
||||
| IntervalMonthDayNano("IntervalMonthDayNano { months: 38, days: 0, nanoseconds: 0 }") |
|
||||
+--------------------------------------------------------------------------------------+
|
||||
| 38 mons |
|
||||
+--------------------------------------------------------------------------------------+
|
||||
|
||||
SELECT INTERVAL '7 days' - INTERVAL '1d';
|
||||
|
||||
@@ -169,11 +169,11 @@ SELECT INTERVAL '-P3Y3M700DT133H17M36.789S';
|
||||
|
||||
SELECT 'P3Y3M700DT133H17M36.789S'::INTERVAL;
|
||||
|
||||
+---------------------------------------+
|
||||
| Utf8("163343856789 milliseconds") |
|
||||
+---------------------------------------+
|
||||
| 45373 hours 17 mins 36.789000000 secs |
|
||||
+---------------------------------------+
|
||||
+------------------------------------------------------------------------------------------------------+
|
||||
| IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 163343856789000000 }") |
|
||||
+------------------------------------------------------------------------------------------------------+
|
||||
| 45373 hours 17 mins 36.789000000 secs |
|
||||
+------------------------------------------------------------------------------------------------------+
|
||||
|
||||
SELECT INTERVAL '2h' + INTERVAL 'P3Y3M700DT133H17M36.789S';
|
||||
|
||||
@@ -185,115 +185,115 @@ SELECT INTERVAL '2h' + INTERVAL 'P3Y3M700DT133H17M36.789S';
|
||||
|
||||
select '2022-01-01T00:00:01'::timestamp + '1 days'::interval;
|
||||
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + Utf8("1 days") |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| 2022-01-02T00:00:01 |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }") |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| 2022-01-02T00:00:01 |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
select '2022-01-01T00:00:01'::timestamp + '2 days'::interval;
|
||||
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + Utf8("2 days") |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| 2022-01-03T00:00:01 |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 2, nanoseconds: 0 }") |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| 2022-01-03T00:00:01 |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
select '2022-01-01T00:00:01'::timestamp - '1 days'::interval;
|
||||
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) - Utf8("1 days") |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| 2021-12-31T00:00:01 |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }") |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| 2021-12-31T00:00:01 |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
select '2022-01-01T00:00:01'::timestamp - '2 days'::interval;
|
||||
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) - Utf8("2 days") |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| 2021-12-30T00:00:01 |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 2, nanoseconds: 0 }") |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| 2021-12-30T00:00:01 |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
select '2022-01-01T00:00:01'::timestamp + '1 month'::interval;
|
||||
|
||||
+------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + Utf8("1 month") |
|
||||
+------------------------------------------------------------------------------------------------+
|
||||
| 2022-02-01T00:00:01 |
|
||||
+------------------------------------------------------------------------------------------------+
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + IntervalMonthDayNano("IntervalMonthDayNano { months: 1, days: 0, nanoseconds: 0 }") |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| 2022-02-01T00:00:01 |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
select '2022-01-01T00:00:01'::timestamp + '2 months'::interval;
|
||||
|
||||
+-------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + Utf8("2 months") |
|
||||
+-------------------------------------------------------------------------------------------------+
|
||||
| 2022-03-01T00:00:01 |
|
||||
+-------------------------------------------------------------------------------------------------+
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + IntervalMonthDayNano("IntervalMonthDayNano { months: 2, days: 0, nanoseconds: 0 }") |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| 2022-03-01T00:00:01 |
|
||||
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
select '2022-01-01T00:00:01'::timestamp + '1 year'::interval;
|
||||
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + Utf8("1 year") |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| 2023-01-01T00:00:01 |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2022-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + IntervalMonthDayNano("IntervalMonthDayNano { months: 12, days: 0, nanoseconds: 0 }") |
|
||||
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| 2023-01-01T00:00:01 |
|
||||
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
select '2023-01-01T00:00:01'::timestamp + '2 years'::interval;
|
||||
|
||||
+------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2023-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + Utf8("2 years") |
|
||||
+------------------------------------------------------------------------------------------------+
|
||||
| 2025-01-01T00:00:01 |
|
||||
+------------------------------------------------------------------------------------------------+
|
||||
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| arrow_cast(Utf8("2023-01-01T00:00:01"),Utf8("Timestamp(Millisecond, None)")) + IntervalMonthDayNano("IntervalMonthDayNano { months: 24, days: 0, nanoseconds: 0 }") |
|
||||
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
| 2025-01-01T00:00:01 |
|
||||
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|
||||
|
||||
-- DATE + INTERVAL
|
||||
SELECT DATE '2000-10-30' + '1 days'::interval;
|
||||
|
||||
+-------------------------------------+
|
||||
| Utf8("2000-10-30") + Utf8("1 days") |
|
||||
+-------------------------------------+
|
||||
| 2000-10-31 |
|
||||
+-------------------------------------+
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
| Utf8("2000-10-30") + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }") |
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
| 2000-10-31 |
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
|
||||
SELECT DATE '2000-10-30' + '2 months'::interval;
|
||||
|
||||
+---------------------------------------+
|
||||
| Utf8("2000-10-30") + Utf8("2 months") |
|
||||
+---------------------------------------+
|
||||
| 2000-12-30 |
|
||||
+---------------------------------------+
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
| Utf8("2000-10-30") + IntervalMonthDayNano("IntervalMonthDayNano { months: 2, days: 0, nanoseconds: 0 }") |
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
| 2000-12-30 |
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
|
||||
SELECT DATE '2000-10-30' + '2 years'::interval;
|
||||
|
||||
+--------------------------------------+
|
||||
| Utf8("2000-10-30") + Utf8("2 years") |
|
||||
+--------------------------------------+
|
||||
| 2002-10-30 |
|
||||
+--------------------------------------+
|
||||
+-----------------------------------------------------------------------------------------------------------+
|
||||
| Utf8("2000-10-30") + IntervalMonthDayNano("IntervalMonthDayNano { months: 24, days: 0, nanoseconds: 0 }") |
|
||||
+-----------------------------------------------------------------------------------------------------------+
|
||||
| 2002-10-30 |
|
||||
+-----------------------------------------------------------------------------------------------------------+
|
||||
|
||||
-- DATE - INTERVAL
|
||||
SELECT DATE '2000-10-30' - '1 days'::interval;
|
||||
|
||||
+-------------------------------------+
|
||||
| Utf8("2000-10-30") - Utf8("1 days") |
|
||||
+-------------------------------------+
|
||||
| 2000-10-29 |
|
||||
+-------------------------------------+
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
| Utf8("2000-10-30") - IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }") |
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
| 2000-10-29 |
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
|
||||
SELECT DATE '2000-10-30' - '2 months'::interval;
|
||||
|
||||
+---------------------------------------+
|
||||
| Utf8("2000-10-30") - Utf8("2 months") |
|
||||
+---------------------------------------+
|
||||
| 2000-08-30 |
|
||||
+---------------------------------------+
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
| Utf8("2000-10-30") - IntervalMonthDayNano("IntervalMonthDayNano { months: 2, days: 0, nanoseconds: 0 }") |
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
| 2000-08-30 |
|
||||
+----------------------------------------------------------------------------------------------------------+
|
||||
|
||||
SELECT DATE '2000-10-30' - '2 years'::interval;
|
||||
|
||||
+--------------------------------------+
|
||||
| Utf8("2000-10-30") - Utf8("2 years") |
|
||||
+--------------------------------------+
|
||||
| 1998-10-30 |
|
||||
+--------------------------------------+
|
||||
+-----------------------------------------------------------------------------------------------------------+
|
||||
| Utf8("2000-10-30") - IntervalMonthDayNano("IntervalMonthDayNano { months: 24, days: 0, nanoseconds: 0 }") |
|
||||
+-----------------------------------------------------------------------------------------------------------+
|
||||
| 1998-10-30 |
|
||||
+-----------------------------------------------------------------------------------------------------------+
|
||||
|
||||
|
||||
Reference in New Issue
Block a user