mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 23:49:58 +00:00
Compare commits
132 Commits
v0.1.0-alp
...
replace-ar
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8630cdb38 | ||
|
|
0f3dcc1b38 | ||
|
|
7c696dae08 | ||
|
|
61d8bc2ea1 | ||
|
|
142dee41d6 | ||
|
|
e3785fca70 | ||
|
|
ce6d1cb7d1 | ||
|
|
dbb3034ecb | ||
|
|
fda9e80cbf | ||
|
|
756c068166 | ||
|
|
652d59a643 | ||
|
|
fa971c6513 | ||
|
|
36c929e1a7 | ||
|
|
6a4e2e5975 | ||
|
|
a712382fba | ||
|
|
4b644aa482 | ||
|
|
9ad6ddb26e | ||
|
|
4defde055c | ||
|
|
c5661ee362 | ||
|
|
9b093463cc | ||
|
|
61e0f1a11c | ||
|
|
95b2d8654f | ||
|
|
249ebc6937 | ||
|
|
42fdc7251a | ||
|
|
c1b8981f61 | ||
|
|
949cd3e3af | ||
|
|
b26982c5d7 | ||
|
|
d0892bf0b7 | ||
|
|
fff530cb50 | ||
|
|
b936d8b18a | ||
|
|
1bde1ba399 | ||
|
|
3687bc7346 | ||
|
|
4fdf26810c | ||
|
|
587bdc9800 | ||
|
|
7f59758e69 | ||
|
|
58c26def6b | ||
|
|
6f3baf96b0 | ||
|
|
a898f846d1 | ||
|
|
a562199455 | ||
|
|
fb0b4eb826 | ||
|
|
a521ab5041 | ||
|
|
833216d317 | ||
|
|
2ba99259e1 | ||
|
|
551cde23b1 | ||
|
|
90c832b33d | ||
|
|
8959dbcef8 | ||
|
|
653906d4fa | ||
|
|
829ff491c4 | ||
|
|
b32438e78c | ||
|
|
0ccb8b4302 | ||
|
|
b48ae21b71 | ||
|
|
2034b40f33 | ||
|
|
3c0adb00f3 | ||
|
|
8c66b7d000 | ||
|
|
99371fd31b | ||
|
|
fe505fecfd | ||
|
|
55e6be7af1 | ||
|
|
f9bfb121db | ||
|
|
cc1ec26416 | ||
|
|
504059a699 | ||
|
|
7151deb4ed | ||
|
|
6fb413ae50 | ||
|
|
beb07fc895 | ||
|
|
4275e47bdb | ||
|
|
6720bc5f7c | ||
|
|
4052563248 | ||
|
|
952e1bd626 | ||
|
|
8232015998 | ||
|
|
d82a3a7d58 | ||
|
|
0599465685 | ||
|
|
13d51250ba | ||
|
|
6127706b5b | ||
|
|
2e17e9c4b5 | ||
|
|
b0cbfa7ffb | ||
|
|
20172338e8 | ||
|
|
9c53f9b24c | ||
|
|
6d24f7ebb6 | ||
|
|
68c2de8e45 | ||
|
|
a17dcbc511 | ||
|
|
53ab19ea5a | ||
|
|
84c44cf540 | ||
|
|
020b9936cd | ||
|
|
75dcf2467b | ||
|
|
eea5393f96 | ||
|
|
3d312d389d | ||
|
|
fdc73fb52f | ||
|
|
2a36e26d19 | ||
|
|
baef640fe3 | ||
|
|
5fddb799f7 | ||
|
|
f372229b18 | ||
|
|
4085fc7899 | ||
|
|
30940e692a | ||
|
|
b371ce0f48 | ||
|
|
ac7f52d303 | ||
|
|
051768b735 | ||
|
|
c5b0d2431f | ||
|
|
4038dd4067 | ||
|
|
8be0f05570 | ||
|
|
69f06eec8b | ||
|
|
7b37e99a45 | ||
|
|
c09775d17f | ||
|
|
4a9cf49637 | ||
|
|
9f865b50ab | ||
|
|
b407ebf6bb | ||
|
|
c144a1b20e | ||
|
|
d0686f9c19 | ||
|
|
221f3e9d2e | ||
|
|
0791c65149 | ||
|
|
61c4a3691a | ||
|
|
d7626fd6af | ||
|
|
62fcb54258 | ||
|
|
e3201a4705 | ||
|
|
571a84d91b | ||
|
|
2b6b979d5a | ||
|
|
b6fa316c65 | ||
|
|
ca5734edb3 | ||
|
|
5428ad364e | ||
|
|
663c725838 | ||
|
|
c94b544e4a | ||
|
|
f465040acc | ||
|
|
22ae983280 | ||
|
|
e1f326295f | ||
|
|
6d762aa9dc | ||
|
|
d4b09f69ab | ||
|
|
1f0b39cc8d | ||
|
|
dee5ccec9e | ||
|
|
f8788273d5 | ||
|
|
df465308cc | ||
|
|
e7b4d2b9cd | ||
|
|
bf408e3b96 | ||
|
|
73e6e2e01b | ||
|
|
8faa6b0f09 |
4
.env.example
Normal file
4
.env.example
Normal file
@@ -0,0 +1,4 @@
|
||||
# Settings for s3 test
|
||||
GT_S3_BUCKET=S3 bucket
|
||||
GT_S3_ACCESS_KEY_ID=S3 access key id
|
||||
GT_S3_ACCESS_KEY=S3 secret access key
|
||||
4
.github/pull_request_template.md
vendored
4
.github/pull_request_template.md
vendored
@@ -13,7 +13,7 @@ Please explain IN DETAIL what the changes are in this PR and why they are needed
|
||||
|
||||
## Checklist
|
||||
|
||||
- [] I have written the necessary rustdoc comments.
|
||||
- [] I have added the necessary unit tests and integration tests.
|
||||
- [ ] I have written the necessary rustdoc comments.
|
||||
- [ ] I have added the necessary unit tests and integration tests.
|
||||
|
||||
## Refer to a related PR or issue link (optional)
|
||||
|
||||
24
.github/workflows/develop.yml
vendored
24
.github/workflows/develop.yml
vendored
@@ -26,6 +26,13 @@ env:
|
||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
||||
|
||||
jobs:
|
||||
typos:
|
||||
name: Spell Check with Typos
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: crate-ci/typos@v1.0.4
|
||||
|
||||
check:
|
||||
name: Check
|
||||
if: github.event.pull_request.draft == false
|
||||
@@ -42,6 +49,23 @@ jobs:
|
||||
- name: Run cargo check
|
||||
run: cargo check --workspace --all-targets
|
||||
|
||||
toml:
|
||||
name: Toml Check
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
- name: Rust Cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
- name: Install taplo
|
||||
run: cargo install taplo-cli --version ^0.8 --locked
|
||||
- name: Run taplo
|
||||
run: taplo format --check --option "indent_string= "
|
||||
|
||||
# Use coverage to run test.
|
||||
# test:
|
||||
# name: Test Suite
|
||||
|
||||
25
.github/workflows/doc-issue.yml
vendored
Normal file
25
.github/workflows/doc-issue.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
name: Create Issue in docs repo on doc related changes
|
||||
|
||||
on:
|
||||
issues:
|
||||
types:
|
||||
- labeled
|
||||
pull_request_target:
|
||||
types:
|
||||
- labeled
|
||||
|
||||
jobs:
|
||||
doc_issue:
|
||||
if: github.event.label.name == 'doc update required'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: create an issue in doc repo
|
||||
uses: dacbd/create-issue-action@main
|
||||
with:
|
||||
owner: GreptimeTeam
|
||||
repo: docs
|
||||
token: ${{ secrets.DOCS_REPO_TOKEN }}
|
||||
title: Update docs for ${{ github.event.issue.title || github.event.pull_request.title }}
|
||||
body: |
|
||||
A document change request is generated from
|
||||
${{ github.event.issue.html_url || github.event.pull_request.html_url }}
|
||||
57
.github/workflows/release.yml
vendored
57
.github/workflows/release.yml
vendored
@@ -2,6 +2,9 @@ on:
|
||||
push:
|
||||
tags:
|
||||
- "v*.*.*"
|
||||
schedule:
|
||||
# At 00:00 on Monday.
|
||||
- cron: '0 0 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
name: Release
|
||||
@@ -9,6 +12,12 @@ name: Release
|
||||
env:
|
||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
||||
|
||||
# FIXME(zyy17): Would be better to use `gh release list -L 1 | cut -f 3` to get the latest release version tag, but for a long time, we will stay at 'v0.1.0-alpha-*'.
|
||||
SCHEDULED_BUILD_VERSION_PREFIX: v0.1.0-alpha
|
||||
|
||||
# In the future, we can change SCHEDULED_PERIOD to nightly.
|
||||
SCHEDULED_PERIOD: weekly
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build binary
|
||||
@@ -106,8 +115,32 @@ jobs:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
|
||||
- name: Configure scheduled build version # the version would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}, like v0.1.0-alpha-20221119-weekly.
|
||||
shell: bash
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
buildTime=`date "+%Y%m%d"`
|
||||
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
|
||||
echo "SCHEDULED_BUILD_VERSION=${SCHEDULED_BUILD_VERSION}" >> $GITHUB_ENV
|
||||
|
||||
- name: Create scheduled build git tag
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
git tag ${{ env.SCHEDULED_BUILD_VERSION }}
|
||||
|
||||
- name: Publish scheduled release # configure the different release title and tags.
|
||||
uses: softprops/action-gh-release@v1
|
||||
if: github.event_name == 'schedule'
|
||||
with:
|
||||
name: "Release ${{ env.SCHEDULED_BUILD_VERSION }}"
|
||||
tag_name: ${{ env.SCHEDULED_BUILD_VERSION }}
|
||||
generate_release_notes: true
|
||||
files: |
|
||||
**/greptime-*
|
||||
|
||||
- name: Publish release
|
||||
uses: softprops/action-gh-release@v1
|
||||
if: github.event_name != 'schedule'
|
||||
with:
|
||||
name: "Release ${{ github.ref_name }}"
|
||||
files: |
|
||||
@@ -145,12 +178,12 @@ jobs:
|
||||
tar xvf greptime-linux-arm64.tgz
|
||||
rm greptime-linux-arm64.tgz
|
||||
|
||||
- name: Login to GitHub Container Registry
|
||||
- name: Login to UCloud Container Registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
registry: uhub.service.ucloud.cn
|
||||
username: ${{ secrets.UCLOUD_USERNAME }}
|
||||
password: ${{ secrets.UCLOUD_PASSWORD }}
|
||||
|
||||
- name: Login to Dockerhub
|
||||
uses: docker/login-action@v2
|
||||
@@ -158,11 +191,20 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Configure scheduled build image tag # the tag would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}
|
||||
shell: bash
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
buildTime=`date "+%Y%m%d"`
|
||||
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
|
||||
echo "IMAGE_TAG=${SCHEDULED_BUILD_VERSION:1}" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure tag # If the release tag is v0.1.0, then the image version tag will be 0.1.0.
|
||||
shell: bash
|
||||
if: github.event_name != 'schedule'
|
||||
run: |
|
||||
VERSION=${{ github.ref_name }}
|
||||
echo "VERSION=${VERSION:1}" >> $GITHUB_ENV
|
||||
echo "IMAGE_TAG=${VERSION:1}" >> $GITHUB_ENV
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
@@ -179,5 +221,6 @@ jobs:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: |
|
||||
greptime/greptimedb:latest
|
||||
greptime/greptimedb:${{ env.VERSION }}
|
||||
ghcr.io/greptimeteam/greptimedb:${{ env.VERSION }}
|
||||
greptime/greptimedb:${{ env.IMAGE_TAG }}
|
||||
uhub.service.ucloud.cn/greptime/greptimedb:latest
|
||||
uhub.service.ucloud.cn/greptime/greptimedb:${{ env.IMAGE_TAG }}
|
||||
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -18,6 +18,7 @@ debug/
|
||||
|
||||
# JetBrains IDE config directory
|
||||
.idea/
|
||||
*.iml
|
||||
|
||||
# VSCode IDE config directory
|
||||
.vscode/
|
||||
@@ -31,3 +32,6 @@ logs/
|
||||
|
||||
# Benchmark dataset
|
||||
benchmarks/data
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
@@ -9,7 +9,7 @@ repos:
|
||||
rev: e6a795bc6b2c0958f9ef52af4863bbd7cc17238f
|
||||
hooks:
|
||||
- id: cargo-sort
|
||||
args: ["--workspace", "--print"]
|
||||
args: ["--workspace"]
|
||||
|
||||
- repo: https://github.com/doublify/pre-commit-rust
|
||||
rev: v1.0
|
||||
|
||||
2963
Cargo.lock
generated
2963
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -11,11 +11,11 @@ members = [
|
||||
"src/common/function",
|
||||
"src/common/function-macro",
|
||||
"src/common/grpc",
|
||||
"src/common/grpc-expr",
|
||||
"src/common/query",
|
||||
"src/common/recordbatch",
|
||||
"src/common/runtime",
|
||||
"src/common/substrait",
|
||||
"src/common/insert",
|
||||
"src/common/telemetry",
|
||||
"src/common/time",
|
||||
"src/datanode",
|
||||
@@ -24,15 +24,19 @@ members = [
|
||||
"src/log-store",
|
||||
"src/meta-client",
|
||||
"src/meta-srv",
|
||||
"src/mito",
|
||||
"src/object-store",
|
||||
"src/promql",
|
||||
"src/query",
|
||||
"src/script",
|
||||
"src/servers",
|
||||
"src/session",
|
||||
"src/sql",
|
||||
"src/storage",
|
||||
"src/store-api",
|
||||
"src/table",
|
||||
"src/mito",
|
||||
"tests-integration",
|
||||
"tests/runner",
|
||||
]
|
||||
|
||||
[profile.release]
|
||||
|
||||
67
Makefile
Normal file
67
Makefile
Normal file
@@ -0,0 +1,67 @@
|
||||
IMAGE_REGISTRY ?= greptimedb
|
||||
IMAGE_TAG ?= latest
|
||||
|
||||
##@ Build
|
||||
|
||||
.PHONY: build
|
||||
build: ## Build debug version greptime.
|
||||
cargo build
|
||||
|
||||
.PHONY: release
|
||||
release: ## Build release version greptime.
|
||||
cargo build --release
|
||||
|
||||
.PHONY: clean
|
||||
clean: ## Clean the project.
|
||||
cargo clean
|
||||
|
||||
.PHONY: fmt
|
||||
fmt: ## Format all the Rust code.
|
||||
cargo fmt --all
|
||||
|
||||
.PHONY: docker-image
|
||||
docker-image: ## Build docker image.
|
||||
docker build --network host -f docker/Dockerfile -t ${IMAGE_REGISTRY}:${IMAGE_TAG} .
|
||||
|
||||
##@ Test
|
||||
|
||||
.PHONY: unit-test
|
||||
unit-test: ## Run unit test.
|
||||
cargo test --workspace
|
||||
|
||||
.PHONY: integration-test
|
||||
integration-test: ## Run integation test.
|
||||
cargo test integration
|
||||
|
||||
.PHONY: sqlness-test
|
||||
sqlness-test: ## Run sqlness test.
|
||||
cargo run --bin sqlness-runner
|
||||
|
||||
.PHONY: check
|
||||
check: ## Cargo check all the targets.
|
||||
cargo check --workspace --all-targets
|
||||
|
||||
.PHONY: clippy
|
||||
clippy: ## Check clippy rules.
|
||||
cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr
|
||||
|
||||
.PHONY: fmt-check
|
||||
fmt-check: ## Check code format.
|
||||
cargo fmt --all -- --check
|
||||
|
||||
##@ General
|
||||
|
||||
# The help target prints out all targets with their descriptions organized
|
||||
# beneath their categories. The categories are represented by '##@' and the
|
||||
# target descriptions by '##'. The awk commands is responsible for reading the
|
||||
# entire set of makefiles included in this invocation, looking for lines of the
|
||||
# file as xyz: ## something, and then pretty-format the target and help. Then,
|
||||
# if there's a line with ##@ something, that gets pretty-printed as a category.
|
||||
# More info on the usage of ANSI control characters for terminal formatting:
|
||||
# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
|
||||
# More info on the awk command:
|
||||
# https://linuxcommand.org/lc3_adv_awk.php
|
||||
|
||||
.PHONY: help
|
||||
help: ## Display help messages.
|
||||
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
|
||||
22
README.md
22
README.md
@@ -1,7 +1,12 @@
|
||||
<p align="center">
|
||||
<img src="/docs/logo-text-padding.png" alt="GreptimeDB Logo" width="400px"></img>
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: light)" srcset="/docs/logo-text-padding.png">
|
||||
<source media="(prefers-color-scheme: dark)" srcset="/docs/logo-text-padding-dark.png">
|
||||
<img alt="GreptimeDB Logo" src="/docs/logo-text-padding.png" width="400px">
|
||||
</picture>
|
||||
</p>
|
||||
|
||||
|
||||
<h3 align="center">
|
||||
The next-generation hybrid timeseries/analytics processing database in the cloud
|
||||
</h3>
|
||||
@@ -53,8 +58,9 @@ To compile GreptimeDB from source, you'll need:
|
||||
install correct Rust version for you.
|
||||
- Protobuf: `protoc` is required for compiling `.proto` files. `protobuf` is
|
||||
available from major package manager on macos and linux distributions. You can
|
||||
find an installation instructions
|
||||
[here](https://grpc.io/docs/protoc-installation/).
|
||||
find an installation instructions [here](https://grpc.io/docs/protoc-installation/).
|
||||
**Note that `protoc` version needs to be >= 3.15** because we have used the `optional`
|
||||
keyword. You can check it with `protoc --version`.
|
||||
|
||||
#### Build with Docker
|
||||
|
||||
@@ -114,7 +120,10 @@ about Kubernetes deployment, check our [docs](https://docs.greptime.com/).
|
||||
4. Query the data:
|
||||
|
||||
```SQL
|
||||
mysql> SELECT * FROM monitor;
|
||||
SELECT * FROM monitor;
|
||||
```
|
||||
|
||||
```TEXT
|
||||
+-------+---------------------+------+--------+
|
||||
| host | ts | cpu | memory |
|
||||
+-------+---------------------+------+--------+
|
||||
@@ -156,6 +165,8 @@ break things. Benchmark on development branch may not represent its potential
|
||||
performance. We release pre-built binaries constantly for functional
|
||||
evaluation. Do not use it in production at the moment.
|
||||
|
||||
For future plans, check out [GreptimeDB roadmap](https://github.com/GreptimeTeam/greptimedb/issues/669).
|
||||
|
||||
## Community
|
||||
|
||||
Our core team is thrilled too see you participate in any ways you like. When you are stuck, try to
|
||||
@@ -169,7 +180,7 @@ community, please check out:
|
||||
|
||||
In addition, you may:
|
||||
|
||||
- View our official [Blog](https://greptime.com/blog)
|
||||
- View our official [Blog](https://greptime.com/blogs/index)
|
||||
- Connect us with [Linkedin](https://www.linkedin.com/company/greptime/)
|
||||
- Follow us on [Twitter](https://twitter.com/greptime)
|
||||
|
||||
@@ -189,3 +200,4 @@ Please refer to [contribution guidelines](CONTRIBUTING.md) for more information.
|
||||
- GreptimeDB's query engine is powered by [Apache Arrow DataFusion](https://github.com/apache/arrow-datafusion).
|
||||
- [OpenDAL](https://github.com/datafuselabs/opendal) from [Datafuse Labs](https://github.com/datafuselabs) gives GreptimeDB a very general and elegant data access abstraction layer.
|
||||
- GreptimeDB’s meta service is based on [etcd](https://etcd.io/).
|
||||
- GreptimeDB uses [RustPython](https://github.com/RustPython/RustPython) for experimental embedded python scripting.
|
||||
|
||||
@@ -5,10 +5,10 @@ edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
arrow = "10"
|
||||
arrow = "26.0.0"
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
client = { path = "../src/client" }
|
||||
indicatif = "0.17.1"
|
||||
itertools = "0.10.5"
|
||||
parquet = { version = "*" }
|
||||
parquet = "26.0.0"
|
||||
tokio = { version = "1.21", features = ["full"] }
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use arrow::array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray};
|
||||
@@ -28,14 +27,11 @@ use arrow::datatypes::{DataType, Float64Type, Int64Type};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use clap::Parser;
|
||||
use client::admin::Admin;
|
||||
use client::api::v1::codec::InsertBatch;
|
||||
use client::api::v1::column::Values;
|
||||
use client::api::v1::{insert_expr, Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
|
||||
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
|
||||
use client::{Client, Database, Select};
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
|
||||
use parquet::file::reader::FileReader;
|
||||
use parquet::file::serialized_reader::SerializedFileReader;
|
||||
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
||||
use tokio::task::JoinSet;
|
||||
|
||||
const DATABASE_NAME: &str = "greptime";
|
||||
@@ -87,10 +83,14 @@ async fn write_data(
|
||||
pb_style: ProgressStyle,
|
||||
) -> u128 {
|
||||
let file = std::fs::File::open(&path).unwrap();
|
||||
let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
|
||||
let row_num = file_reader.metadata().file_metadata().num_rows();
|
||||
let record_batch_reader = ParquetFileArrowReader::new(file_reader)
|
||||
.get_record_reader(batch_size)
|
||||
let record_batch_reader_builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
|
||||
let row_num = record_batch_reader_builder
|
||||
.metadata()
|
||||
.file_metadata()
|
||||
.num_rows();
|
||||
let record_batch_reader = record_batch_reader_builder
|
||||
.with_batch_size(batch_size)
|
||||
.build()
|
||||
.unwrap();
|
||||
let progress_bar = mpb.add(ProgressBar::new(row_num as _));
|
||||
progress_bar.set_style(pb_style);
|
||||
@@ -100,16 +100,13 @@ async fn write_data(
|
||||
|
||||
for record_batch in record_batch_reader {
|
||||
let record_batch = record_batch.unwrap();
|
||||
let row_count = record_batch.num_rows();
|
||||
let insert_batch = convert_record_batch(record_batch).into();
|
||||
let (columns, row_count) = convert_record_batch(record_batch);
|
||||
let insert_expr = InsertExpr {
|
||||
schema_name: "public".to_string(),
|
||||
table_name: TABLE_NAME.to_string(),
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: vec![insert_batch],
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
region_number: 0,
|
||||
columns,
|
||||
row_count,
|
||||
};
|
||||
let now = Instant::now();
|
||||
db.insert(insert_expr).await.unwrap();
|
||||
@@ -125,7 +122,7 @@ async fn write_data(
|
||||
total_rpc_elapsed_ms
|
||||
}
|
||||
|
||||
fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
|
||||
fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
|
||||
let schema = record_batch.schema();
|
||||
let fields = schema.fields();
|
||||
let row_count = record_batch.num_rows();
|
||||
@@ -143,10 +140,7 @@ fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
|
||||
columns.push(column);
|
||||
}
|
||||
|
||||
InsertBatch {
|
||||
columns,
|
||||
row_count: row_count as _,
|
||||
}
|
||||
(columns, row_count as _)
|
||||
}
|
||||
|
||||
fn build_values(column: &ArrayRef) -> Values {
|
||||
@@ -217,9 +211,10 @@ fn build_values(column: &ArrayRef) -> Values {
|
||||
| DataType::FixedSizeList(_, _)
|
||||
| DataType::LargeList(_)
|
||||
| DataType::Struct(_)
|
||||
| DataType::Union(_, _)
|
||||
| DataType::Union(_, _, _)
|
||||
| DataType::Dictionary(_, _)
|
||||
| DataType::Decimal(_, _)
|
||||
| DataType::Decimal128(_, _)
|
||||
| DataType::Decimal256(_, _)
|
||||
| DataType::Map(_, _) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,3 +7,4 @@ coverage:
|
||||
patch: off
|
||||
ignore:
|
||||
- "**/error*.rs" # ignore all error.rs files
|
||||
- "tests/runner/*.rs" # ignore integration test runner
|
||||
|
||||
@@ -3,15 +3,16 @@ mode = 'distributed'
|
||||
rpc_addr = '127.0.0.1:3001'
|
||||
wal_dir = '/tmp/greptimedb/wal'
|
||||
rpc_runtime_size = 8
|
||||
mysql_addr = '127.0.0.1:3306'
|
||||
mysql_addr = '127.0.0.1:4406'
|
||||
mysql_runtime_size = 4
|
||||
enable_memory_catalog = false
|
||||
|
||||
[storage]
|
||||
type = 'File'
|
||||
data_dir = '/tmp/greptimedb/data/'
|
||||
|
||||
[meta_client_opts]
|
||||
metasrv_addr = '1.1.1.1:3002'
|
||||
metasrv_addrs = ['127.0.0.1:3002']
|
||||
timeout_millis = 3000
|
||||
connect_timeout_millis = 5000
|
||||
tcp_nodelay = false
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
mode = 'distributed'
|
||||
datanode_rpc_addr = '127.0.0.1:3001'
|
||||
http_addr = '127.0.0.1:4000'
|
||||
|
||||
[http_options]
|
||||
addr = '127.0.0.1:4000'
|
||||
timeout = "30s"
|
||||
|
||||
[meta_client_opts]
|
||||
metasrv_addr = '1.1.1.1:3002'
|
||||
metasrv_addrs = ['127.0.0.1:3002']
|
||||
timeout_millis = 3000
|
||||
connect_timeout_millis = 5000
|
||||
tcp_nodelay = false
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
node_id = 0
|
||||
mode = 'standalone'
|
||||
http_addr = '127.0.0.1:4000'
|
||||
datanode_mysql_addr = '127.0.0.1:3306'
|
||||
datanode_mysql_runtime_size = 4
|
||||
wal_dir = '/tmp/greptimedb/wal/'
|
||||
enable_memory_catalog = false
|
||||
|
||||
[http_options]
|
||||
addr = '127.0.0.1:4000'
|
||||
timeout = "30s"
|
||||
|
||||
[storage]
|
||||
type = 'File'
|
||||
|
||||
@@ -55,7 +55,7 @@ The DataFusion basically execute aggregate like this:
|
||||
2. Call `update_batch` on each accumulator with partitioned data, to let you update your aggregate calculation.
|
||||
3. Call `state` to get each accumulator's internal state, the medial calculation result.
|
||||
4. Call `merge_batch` to merge all accumulator's internal state to one.
|
||||
5. Execute `evalute` on the chosen one to get the final calculation result.
|
||||
5. Execute `evaluate` on the chosen one to get the final calculation result.
|
||||
|
||||
Once you know the meaning of each method, you can easily write your accumulator. You can refer to `Median` accumulator or `SUM` accumulator defined in file `my_sum_udaf_example.rs` for more details.
|
||||
|
||||
@@ -63,7 +63,7 @@ Once you know the meaning of each method, you can easily write your accumulator.
|
||||
|
||||
You can call `register_aggregate_function` method in query engine to register your aggregate function. To do that, you have to new an instance of struct `AggregateFunctionMeta`. The struct has three fields, first is the name of your aggregate function's name. The function name is case-sensitive due to DataFusion's restriction. We strongly recommend using lowercase for your name. If you have to use uppercase name, wrap your aggregate function with quotation marks. For example, if you define an aggregate function named "my_aggr", you can use "`SELECT MY_AGGR(x)`"; if you define "my_AGGR", you have to use "`SELECT "my_AGGR"(x)`".
|
||||
|
||||
The second field is arg_counts ,the count of the arguments. Like accumulator `percentile`, caculating the p_number of the column. We need to input the value of column and the value of p to cacalate, and so the count of the arguments is two.
|
||||
The second field is arg_counts ,the count of the arguments. Like accumulator `percentile`, calculating the p_number of the column. We need to input the value of column and the value of p to cacalate, and so the count of the arguments is two.
|
||||
|
||||
The third field is a function about how to create your accumulator creator that you defined in step 1 above. Create creator, that's a bit intertwined, but it is how we make DataFusion use a newly created aggregate function each time it executes a SQL, preventing the stored input types from affecting each other. The key detail can be starting looking at our `DfContextProviderAdapter` struct's `get_aggregate_meta` method.
|
||||
|
||||
|
||||
BIN
docs/logo-text-padding-dark.png
Normal file
BIN
docs/logo-text-padding-dark.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 25 KiB |
@@ -7,8 +7,8 @@ license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
common-base = { path = "../common/base" }
|
||||
common-time = { path = "../common/time" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
prost = "0.11"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
@@ -20,9 +20,7 @@ fn main() {
|
||||
.file_descriptor_set_path(default_out_dir.join("greptime_fd.bin"))
|
||||
.compile(
|
||||
&[
|
||||
"greptime/v1/insert.proto",
|
||||
"greptime/v1/select.proto",
|
||||
"greptime/v1/physical_plan.proto",
|
||||
"greptime/v1/greptime.proto",
|
||||
"greptime/v1/meta/common.proto",
|
||||
"greptime/v1/meta/heartbeat.proto",
|
||||
|
||||
@@ -20,6 +20,7 @@ message AdminExpr {
|
||||
CreateExpr create = 2;
|
||||
AlterExpr alter = 3;
|
||||
CreateDatabaseExpr create_database = 4;
|
||||
DropTableExpr drop_table = 5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,18 +52,33 @@ message AlterExpr {
|
||||
string table_name = 3;
|
||||
oneof kind {
|
||||
AddColumns add_columns = 4;
|
||||
DropColumns drop_columns = 5;
|
||||
}
|
||||
}
|
||||
|
||||
message DropTableExpr {
|
||||
string catalog_name = 1;
|
||||
string schema_name = 2;
|
||||
string table_name = 3;
|
||||
}
|
||||
|
||||
message AddColumns {
|
||||
repeated AddColumn add_columns = 1;
|
||||
}
|
||||
|
||||
message DropColumns {
|
||||
repeated DropColumn drop_columns = 1;
|
||||
}
|
||||
|
||||
message AddColumn {
|
||||
ColumnDef column_def = 1;
|
||||
bool is_key = 2;
|
||||
}
|
||||
|
||||
message DropColumn {
|
||||
string name = 1;
|
||||
}
|
||||
|
||||
message CreateDatabaseExpr {
|
||||
//TODO(hl): maybe rename to schema_name?
|
||||
string database_name = 1;
|
||||
|
||||
@@ -32,7 +32,10 @@ message Column {
|
||||
|
||||
repeated int32 date_values = 14;
|
||||
repeated int64 datetime_values = 15;
|
||||
repeated int64 ts_millis_values = 16;
|
||||
repeated int64 ts_second_values = 16;
|
||||
repeated int64 ts_millisecond_values = 17;
|
||||
repeated int64 ts_microsecond_values = 18;
|
||||
repeated int64 ts_nanosecond_values = 19;
|
||||
}
|
||||
// The array of non-null values in this column.
|
||||
//
|
||||
@@ -75,5 +78,8 @@ enum ColumnDataType {
|
||||
STRING = 12;
|
||||
DATE = 13;
|
||||
DATETIME = 14;
|
||||
TIMESTAMP = 15;
|
||||
TIMESTAMP_SECOND = 15;
|
||||
TIMESTAMP_MILLISECOND = 16;
|
||||
TIMESTAMP_MICROSECOND = 17;
|
||||
TIMESTAMP_NANOSECOND = 18;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ syntax = "proto3";
|
||||
|
||||
package greptime.v1;
|
||||
|
||||
import "greptime/v1/column.proto";
|
||||
import "greptime/v1/common.proto";
|
||||
|
||||
message DatabaseRequest {
|
||||
@@ -28,39 +29,23 @@ message SelectExpr {
|
||||
oneof expr {
|
||||
string sql = 1;
|
||||
bytes logical_plan = 2;
|
||||
PhysicalPlan physical_plan = 15;
|
||||
}
|
||||
}
|
||||
|
||||
message PhysicalPlan {
|
||||
bytes original_ql = 1;
|
||||
bytes plan = 2;
|
||||
}
|
||||
|
||||
message InsertExpr {
|
||||
string schema_name = 1;
|
||||
string table_name = 2;
|
||||
|
||||
message Values {
|
||||
repeated bytes values = 1;
|
||||
}
|
||||
// Data is represented here.
|
||||
repeated Column columns = 3;
|
||||
|
||||
oneof expr {
|
||||
Values values = 3;
|
||||
// The row_count of all columns, which include null and non-null values.
|
||||
//
|
||||
// Note: the row_count of all columns in a InsertExpr must be same.
|
||||
uint32 row_count = 4;
|
||||
|
||||
// TODO(LFC): Remove field "sql" in InsertExpr.
|
||||
// When Frontend instance received an insertion SQL (`insert into ...`), it's anticipated to parse the SQL and
|
||||
// assemble the values to insert to feed Datanode. In other words, inserting data through Datanode instance's GRPC
|
||||
// interface shouldn't use SQL directly.
|
||||
// Then why the "sql" field exists here? It's because the Frontend needs table schema to create the values to insert,
|
||||
// which is currently not able to find anywhere. (Maybe the table schema is suppose to be fetched from Meta?)
|
||||
// The "sql" field is meant to be removed in the future.
|
||||
string sql = 4;
|
||||
}
|
||||
|
||||
/// The region number of current insert request.
|
||||
// The region number of current insert request.
|
||||
uint32 region_number = 5;
|
||||
map<string, bytes> options = 6;
|
||||
}
|
||||
|
||||
// TODO(jiachun)
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.codec;
|
||||
|
||||
import "greptime/v1/column.proto";
|
||||
|
||||
message InsertBatch {
|
||||
repeated Column columns = 1;
|
||||
uint32 row_count = 2;
|
||||
}
|
||||
|
||||
message RegionNumber {
|
||||
uint32 id = 1;
|
||||
}
|
||||
@@ -39,7 +39,7 @@ message NodeStat {
|
||||
uint64 wcus = 2;
|
||||
// Table number in this node
|
||||
uint64 table_num = 3;
|
||||
// Regon number in this node
|
||||
// Region number in this node
|
||||
uint64 region_num = 4;
|
||||
|
||||
double cpu_usage = 5;
|
||||
|
||||
@@ -5,6 +5,8 @@ package greptime.v1.meta;
|
||||
import "greptime/v1/meta/common.proto";
|
||||
|
||||
service Router {
|
||||
rpc Create(CreateRequest) returns (RouteResponse) {}
|
||||
|
||||
// Fetch routing information for tables. The smallest unit is the complete
|
||||
// routing information(all regions) of a table.
|
||||
//
|
||||
@@ -26,7 +28,14 @@ service Router {
|
||||
//
|
||||
rpc Route(RouteRequest) returns (RouteResponse) {}
|
||||
|
||||
rpc Create(CreateRequest) returns (RouteResponse) {}
|
||||
rpc Delete(DeleteRequest) returns (RouteResponse) {}
|
||||
}
|
||||
|
||||
message CreateRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
TableName table_name = 2;
|
||||
repeated Partition partitions = 3;
|
||||
}
|
||||
|
||||
message RouteRequest {
|
||||
@@ -35,6 +44,12 @@ message RouteRequest {
|
||||
repeated TableName table_names = 2;
|
||||
}
|
||||
|
||||
message DeleteRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
TableName table_name = 2;
|
||||
}
|
||||
|
||||
message RouteResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
@@ -42,13 +57,6 @@ message RouteResponse {
|
||||
repeated TableRoute table_routes = 3;
|
||||
}
|
||||
|
||||
message CreateRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
TableName table_name = 2;
|
||||
repeated Partition partitions = 3;
|
||||
}
|
||||
|
||||
message TableRoute {
|
||||
Table table = 1;
|
||||
repeated RegionRoute region_routes = 2;
|
||||
|
||||
@@ -20,6 +20,9 @@ service Store {
|
||||
|
||||
// DeleteRange deletes the given range from the key-value store.
|
||||
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse);
|
||||
|
||||
// MoveValue atomically renames the key to the given updated key.
|
||||
rpc MoveValue(MoveValueRequest) returns (MoveValueResponse);
|
||||
}
|
||||
|
||||
message RangeRequest {
|
||||
@@ -136,3 +139,21 @@ message DeleteRangeResponse {
|
||||
// returned.
|
||||
repeated KeyValue prev_kvs = 3;
|
||||
}
|
||||
|
||||
message MoveValueRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
// If from_key dose not exist, return the value of to_key (if it exists).
|
||||
// If from_key exists, move the value of from_key to to_key (i.e. rename),
|
||||
// and return the value.
|
||||
bytes from_key = 2;
|
||||
bytes to_key = 3;
|
||||
}
|
||||
|
||||
message MoveValueResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
// If from_key dose not exist, return the value of to_key (if it exists).
|
||||
// If from_key exists, return the value of from_key.
|
||||
KeyValue kv = 2;
|
||||
}
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.codec;
|
||||
|
||||
message PhysicalPlanNode {
|
||||
oneof PhysicalPlanType {
|
||||
ProjectionExecNode projection = 1;
|
||||
MockInputExecNode mock = 99;
|
||||
// TODO(fys): impl other physical plan node
|
||||
}
|
||||
}
|
||||
|
||||
message ProjectionExecNode {
|
||||
PhysicalPlanNode input = 1;
|
||||
repeated PhysicalExprNode expr = 2;
|
||||
repeated string expr_name = 3;
|
||||
}
|
||||
|
||||
message PhysicalExprNode {
|
||||
oneof ExprType {
|
||||
PhysicalColumn column = 1;
|
||||
// TODO(fys): impl other physical expr node
|
||||
}
|
||||
}
|
||||
|
||||
message PhysicalColumn {
|
||||
string name = 1;
|
||||
uint64 index = 2;
|
||||
}
|
||||
|
||||
message MockInputExecNode {
|
||||
string name = 1;
|
||||
}
|
||||
@@ -33,6 +33,28 @@ pub enum Error {
|
||||
from: ConcreteDataType,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to convert column default constraint, column: {}, source: {}",
|
||||
column,
|
||||
source
|
||||
))]
|
||||
ConvertColumnDefaultConstraint {
|
||||
column: String,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Invalid column default constraint, column: {}, source: {}",
|
||||
column,
|
||||
source
|
||||
))]
|
||||
InvalidColumnDefaultConstraint {
|
||||
column: String,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -40,6 +62,8 @@ impl ErrorExt for Error {
|
||||
match self {
|
||||
Error::UnknownColumnDataType { .. } => StatusCode::InvalidArguments,
|
||||
Error::IntoColumnDataType { .. } => StatusCode::Unexpected,
|
||||
Error::ConvertColumnDefaultConstraint { source, .. }
|
||||
| Error::InvalidColumnDefaultConstraint { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use common_base::BitVec;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::prelude::*;
|
||||
@@ -56,7 +57,16 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
||||
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
||||
ColumnDataType::Timestamp => ConcreteDataType::timestamp_millis_datatype(),
|
||||
ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
ColumnDataType::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
}
|
||||
ColumnDataType::TimestampMicrosecond => {
|
||||
ConcreteDataType::timestamp_microsecond_datatype()
|
||||
}
|
||||
ColumnDataType::TimestampNanosecond => {
|
||||
ConcreteDataType::timestamp_nanosecond_datatype()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -81,7 +91,12 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
ConcreteDataType::String(_) => ColumnDataType::String,
|
||||
ConcreteDataType::Date(_) => ColumnDataType::Date,
|
||||
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
|
||||
ConcreteDataType::Timestamp(_) => ColumnDataType::Timestamp,
|
||||
ConcreteDataType::Timestamp(unit) => match unit {
|
||||
TimestampType::Second(_) => ColumnDataType::TimestampSecond,
|
||||
TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
|
||||
TimestampType::Microsecond(_) => ColumnDataType::TimestampMicrosecond,
|
||||
TimestampType::Nanosecond(_) => ColumnDataType::TimestampNanosecond,
|
||||
},
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => {
|
||||
return error::IntoColumnDataTypeSnafu { from: datatype }.fail()
|
||||
}
|
||||
@@ -153,8 +168,20 @@ impl Values {
|
||||
datetime_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::Timestamp => Values {
|
||||
ts_millis_values: Vec::with_capacity(capacity),
|
||||
ColumnDataType::TimestampSecond => Values {
|
||||
ts_second_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::TimestampMillisecond => Values {
|
||||
ts_millisecond_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::TimestampMicrosecond => Values {
|
||||
ts_microsecond_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::TimestampNanosecond => Values {
|
||||
ts_nanosecond_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
@@ -187,9 +214,12 @@ impl Column {
|
||||
Value::Binary(val) => values.binary_values.push(val.to_vec()),
|
||||
Value::Date(val) => values.date_values.push(val.val()),
|
||||
Value::DateTime(val) => values.datetime_values.push(val.val()),
|
||||
Value::Timestamp(val) => values
|
||||
.ts_millis_values
|
||||
.push(val.convert_to(TimeUnit::Millisecond)),
|
||||
Value::Timestamp(val) => match val.unit() {
|
||||
TimeUnit::Second => values.ts_second_values.push(val.value()),
|
||||
TimeUnit::Millisecond => values.ts_millisecond_values.push(val.value()),
|
||||
TimeUnit::Microsecond => values.ts_microsecond_values.push(val.value()),
|
||||
TimeUnit::Nanosecond => values.ts_nanosecond_values.push(val.value()),
|
||||
},
|
||||
Value::List(_) => unreachable!(),
|
||||
});
|
||||
self.null_mask = null_mask.into_vec();
|
||||
@@ -200,7 +230,10 @@ impl Column {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::vectors::BooleanVector;
|
||||
use datatypes::vectors::{
|
||||
BooleanVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -258,8 +291,8 @@ mod tests {
|
||||
let values = values.datetime_values;
|
||||
assert_eq!(2, values.capacity());
|
||||
|
||||
let values = Values::with_capacity(ColumnDataType::Timestamp, 2);
|
||||
let values = values.ts_millis_values;
|
||||
let values = Values::with_capacity(ColumnDataType::TimestampMillisecond, 2);
|
||||
let values = values.ts_millisecond_values;
|
||||
assert_eq!(2, values.capacity());
|
||||
}
|
||||
|
||||
@@ -326,8 +359,8 @@ mod tests {
|
||||
ColumnDataTypeWrapper(ColumnDataType::Datetime).into()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ColumnDataTypeWrapper(ColumnDataType::Timestamp).into()
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond).into()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -394,8 +427,8 @@ mod tests {
|
||||
ConcreteDataType::datetime_datatype().try_into().unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ColumnDataTypeWrapper(ColumnDataType::Timestamp),
|
||||
ConcreteDataType::timestamp_millis_datatype()
|
||||
ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond),
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
.try_into()
|
||||
.unwrap()
|
||||
);
|
||||
@@ -412,7 +445,48 @@ mod tests {
|
||||
assert!(result.is_err());
|
||||
assert_eq!(
|
||||
result.unwrap_err().to_string(),
|
||||
"Failed to create column datatype from List(ListType { inner: Boolean(BooleanType) })"
|
||||
"Failed to create column datatype from List(ListType { item_type: Boolean(BooleanType) })"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_put_timestamp_values() {
|
||||
let mut column = Column {
|
||||
column_name: "test".to_string(),
|
||||
semantic_type: 0,
|
||||
values: Some(Values {
|
||||
..Default::default()
|
||||
}),
|
||||
null_mask: vec![],
|
||||
datatype: 0,
|
||||
};
|
||||
|
||||
let vector = Arc::new(TimestampNanosecondVector::from_vec(vec![1, 2, 3]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![1, 2, 3],
|
||||
column.values.as_ref().unwrap().ts_nanosecond_values
|
||||
);
|
||||
|
||||
let vector = Arc::new(TimestampMillisecondVector::from_vec(vec![4, 5, 6]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![4, 5, 6],
|
||||
column.values.as_ref().unwrap().ts_millisecond_values
|
||||
);
|
||||
|
||||
let vector = Arc::new(TimestampMicrosecondVector::from_vec(vec![7, 8, 9]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![7, 8, 9],
|
||||
column.values.as_ref().unwrap().ts_microsecond_values
|
||||
);
|
||||
|
||||
let vector = Arc::new(TimestampSecondVector::from_vec(vec![10, 11, 12]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![10, 11, 12],
|
||||
column.values.as_ref().unwrap().ts_second_values
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
pub use prost::DecodeError;
|
||||
use prost::Message;
|
||||
|
||||
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, RegionNumber, SelectResult};
|
||||
use crate::v1::codec::SelectResult;
|
||||
use crate::v1::meta::TableRouteValue;
|
||||
|
||||
macro_rules! impl_convert_with_bytes {
|
||||
@@ -36,10 +36,7 @@ macro_rules! impl_convert_with_bytes {
|
||||
};
|
||||
}
|
||||
|
||||
impl_convert_with_bytes!(InsertBatch);
|
||||
impl_convert_with_bytes!(SelectResult);
|
||||
impl_convert_with_bytes!(PhysicalPlanNode);
|
||||
impl_convert_with_bytes!(RegionNumber);
|
||||
impl_convert_with_bytes!(TableRouteValue);
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -51,52 +48,6 @@ mod tests {
|
||||
|
||||
const SEMANTIC_TAG: i32 = 0;
|
||||
|
||||
#[test]
|
||||
fn test_convert_insert_batch() {
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let bytes: Vec<u8> = insert_batch.into();
|
||||
let insert: InsertBatch = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(8, insert.row_count);
|
||||
assert_eq!(1, insert.columns.len());
|
||||
|
||||
let column = &insert.columns[0];
|
||||
assert_eq!("foo", column.column_name);
|
||||
assert_eq!(SEMANTIC_TAG, column.semantic_type);
|
||||
assert_eq!(vec![1], column.null_mask);
|
||||
assert_eq!(
|
||||
vec![2, 3, 4, 5, 6, 7, 8],
|
||||
column.values.as_ref().unwrap().i32_values
|
||||
);
|
||||
}
|
||||
|
||||
#[should_panic]
|
||||
#[test]
|
||||
fn test_convert_insert_batch_wrong() {
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let mut bytes: Vec<u8> = insert_batch.into();
|
||||
|
||||
// modify some bytes
|
||||
bytes[0] = 0b1;
|
||||
bytes[1] = 0b1;
|
||||
|
||||
let insert: InsertBatch = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(8, insert.row_count);
|
||||
assert_eq!(1, insert.columns.len());
|
||||
|
||||
let column = &insert.columns[0];
|
||||
assert_eq!("foo", column.column_name);
|
||||
assert_eq!(SEMANTIC_TAG, column.semantic_type);
|
||||
assert_eq!(vec![1], column.null_mask);
|
||||
assert_eq!(
|
||||
vec![2, 3, 4, 5, 6, 7, 8],
|
||||
column.values.as_ref().unwrap().i32_values
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_select_result() {
|
||||
let select_result = mock_select_result();
|
||||
@@ -143,35 +94,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_region_id() {
|
||||
let region_id = RegionNumber { id: 12 };
|
||||
|
||||
let bytes: Vec<u8> = region_id.into();
|
||||
let region_id: RegionNumber = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(12, region_id.id);
|
||||
}
|
||||
|
||||
fn mock_insert_batch() -> InsertBatch {
|
||||
let values = column::Values {
|
||||
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
|
||||
..Default::default()
|
||||
};
|
||||
let null_mask = vec![1];
|
||||
let column = Column {
|
||||
column_name: "foo".to_string(),
|
||||
semantic_type: SEMANTIC_TAG,
|
||||
values: Some(values),
|
||||
null_mask,
|
||||
..Default::default()
|
||||
};
|
||||
InsertBatch {
|
||||
columns: vec![column],
|
||||
row_count: 8,
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_select_result() -> SelectResult {
|
||||
let values = column::Values {
|
||||
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
|
||||
|
||||
@@ -21,4 +21,5 @@ pub mod codec {
|
||||
tonic::include_proto!("greptime.v1.codec");
|
||||
}
|
||||
|
||||
mod column_def;
|
||||
pub mod meta;
|
||||
|
||||
38
src/api/src/v1/column_def.rs
Normal file
38
src/api/src/v1/column_def.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::helper::ColumnDataTypeWrapper;
|
||||
use crate::v1::ColumnDef;
|
||||
|
||||
impl ColumnDef {
|
||||
pub fn try_as_column_schema(&self) -> Result<ColumnSchema> {
|
||||
let data_type = ColumnDataTypeWrapper::try_new(self.datatype)?;
|
||||
|
||||
let constraint = match &self.default_constraint {
|
||||
None => None,
|
||||
Some(v) => Some(
|
||||
ColumnDefaultConstraint::try_from(&v[..])
|
||||
.context(error::ConvertColumnDefaultConstraintSnafu { column: &self.name })?,
|
||||
),
|
||||
};
|
||||
|
||||
ColumnSchema::new(&self.name, data_type.into(), self.is_nullable)
|
||||
.with_default_constraint(constraint)
|
||||
.context(error::InvalidColumnDefaultConstraintSnafu { column: &self.name })
|
||||
}
|
||||
}
|
||||
@@ -145,10 +145,12 @@ gen_set_header!(HeartbeatRequest);
|
||||
gen_set_header!(RouteRequest);
|
||||
gen_set_header!(CreateRequest);
|
||||
gen_set_header!(RangeRequest);
|
||||
gen_set_header!(DeleteRequest);
|
||||
gen_set_header!(PutRequest);
|
||||
gen_set_header!(BatchPutRequest);
|
||||
gen_set_header!(CompareAndPutRequest);
|
||||
gen_set_header!(DeleteRangeRequest);
|
||||
gen_set_header!(MoveValueRequest);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -19,15 +19,12 @@ common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
lazy_static = "1.4"
|
||||
meta-client = { path = "../meta-client" }
|
||||
opendal = "0.17"
|
||||
regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
@@ -39,9 +36,8 @@ tokio = { version = "1.18", features = ["full"] }
|
||||
[dev-dependencies]
|
||||
chrono = "0.4"
|
||||
log-store = { path = "../log-store" }
|
||||
mito = { path = "../mito", features = ["test"] }
|
||||
object-store = { path = "../object-store" }
|
||||
opendal = "0.17"
|
||||
storage = { path = "../storage" }
|
||||
mito = { path = "../mito" }
|
||||
tempdir = "0.3"
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::any::Any;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::prelude::{Snafu, StatusCode};
|
||||
use datafusion::error::DataFusionError;
|
||||
use datatypes::arrow;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::RawSchema;
|
||||
use snafu::{Backtrace, ErrorCompat};
|
||||
|
||||
@@ -51,14 +51,12 @@ pub enum Error {
|
||||
SystemCatalog { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display(
|
||||
"System catalog table type mismatch, expected: binary, found: {:?} source: {}",
|
||||
"System catalog table type mismatch, expected: binary, found: {:?}",
|
||||
data_type,
|
||||
source
|
||||
))]
|
||||
SystemCatalogTypeMismatch {
|
||||
data_type: arrow::datatypes::DataType,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
data_type: ConcreteDataType,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid system catalog entry type: {:?}", entry_type))]
|
||||
@@ -94,7 +92,7 @@ pub enum Error {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Table {} already exists", table))]
|
||||
#[snafu(display("Table `{}` already exists", table))]
|
||||
TableExists { table: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Schema {} already exists", schema))]
|
||||
@@ -109,6 +107,12 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Operation {} not implemented yet", operation))]
|
||||
Unimplemented {
|
||||
operation: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to open table, table info: {}, source: {}", table_info, source))]
|
||||
OpenTable {
|
||||
table_info: String,
|
||||
@@ -185,8 +189,8 @@ pub enum Error {
|
||||
source: meta_client::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid table schema in catalog, source: {:?}", source))]
|
||||
InvalidSchemaInCatalog {
|
||||
#[snafu(display("Invalid table info in catalog, source: {}", source))]
|
||||
InvalidTableInfoInCatalog {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
@@ -216,11 +220,13 @@ impl ErrorExt for Error {
|
||||
| Error::ValueDeserialize { .. }
|
||||
| Error::Io { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
Error::RegisterTable { .. } | Error::SystemCatalogTypeMismatch { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
|
||||
Error::ReadSystemCatalog { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
|
||||
Error::InvalidCatalogValue { source, .. } => source.status_code(),
|
||||
|
||||
Error::RegisterTable { .. } => StatusCode::Internal,
|
||||
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
|
||||
Error::SchemaExists { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
@@ -233,8 +239,10 @@ impl ErrorExt for Error {
|
||||
Error::SystemCatalogTableScan { source } => source.status_code(),
|
||||
Error::SystemCatalogTableScanExec { source } => source.status_code(),
|
||||
Error::InvalidTableSchema { source, .. } => source.status_code(),
|
||||
Error::InvalidSchemaInCatalog { .. } => StatusCode::Unexpected,
|
||||
Error::InvalidTableInfoInCatalog { .. } => StatusCode::Unexpected,
|
||||
Error::Internal { source, .. } => source.status_code(),
|
||||
|
||||
Error::Unimplemented { .. } => StatusCode::Unsupported,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -256,7 +264,6 @@ impl From<Error> for DataFusionError {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_error::mock::MockError;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use snafu::GenerateImplicitData;
|
||||
|
||||
use super::*;
|
||||
@@ -305,11 +312,8 @@ mod tests {
|
||||
assert_eq!(
|
||||
StatusCode::Internal,
|
||||
Error::SystemCatalogTypeMismatch {
|
||||
data_type: DataType::Boolean,
|
||||
source: datatypes::error::Error::UnsupportedArrowType {
|
||||
arrow_type: DataType::Boolean,
|
||||
backtrace: Backtrace::generate()
|
||||
}
|
||||
data_type: ConcreteDataType::binary_datatype(),
|
||||
backtrace: Backtrace::generate(),
|
||||
}
|
||||
.status_code()
|
||||
);
|
||||
|
||||
@@ -15,44 +15,56 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use common_catalog::error::{
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
|
||||
};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::{RawTableMeta, TableId, TableVersion};
|
||||
use table::metadata::{RawTableInfo, TableId, TableVersion};
|
||||
|
||||
use crate::consts::{
|
||||
CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_GLOBAL_KEY_PREFIX, TABLE_REGIONAL_KEY_PREFIX,
|
||||
};
|
||||
use crate::error::{
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
|
||||
};
|
||||
const CATALOG_KEY_PREFIX: &str = "__c";
|
||||
const SCHEMA_KEY_PREFIX: &str = "__s";
|
||||
const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
|
||||
const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
|
||||
|
||||
const ALPHANUMERICS_NAME_PATTERN: &str = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||
|
||||
lazy_static! {
|
||||
static ref CATALOG_KEY_PATTERN: Regex =
|
||||
Regex::new(&format!("^{}-([a-zA-Z_]+)$", CATALOG_KEY_PREFIX)).unwrap();
|
||||
static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-({})$",
|
||||
CATALOG_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)$",
|
||||
SCHEMA_KEY_PREFIX
|
||||
"^{}-({})-({})$",
|
||||
SCHEMA_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN, ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z0-9_]+)$",
|
||||
TABLE_GLOBAL_KEY_PREFIX
|
||||
"^{}-({})-({})-({})$",
|
||||
TABLE_GLOBAL_KEY_PREFIX,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z0-9_]+)-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX
|
||||
"^{}-({})-({})-({})-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
@@ -126,17 +138,20 @@ impl TableGlobalKey {
|
||||
|
||||
/// Table global info contains necessary info for a datanode to create table regions, including
|
||||
/// table id, table meta(schema...), region id allocation across datanodes.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct TableGlobalValue {
|
||||
/// Table id is the same across all datanodes.
|
||||
pub id: TableId,
|
||||
/// Id of datanode that created the global table info kv. only for debugging.
|
||||
pub node_id: u64,
|
||||
// TODO(LFC): Maybe remove it?
|
||||
/// Allocation of region ids across all datanodes.
|
||||
pub regions_id_map: HashMap<u64, Vec<u32>>,
|
||||
// TODO(LFC): Too much for assembling the table schema that DistTable needs, find another way.
|
||||
pub meta: RawTableMeta,
|
||||
pub table_info: RawTableInfo,
|
||||
}
|
||||
|
||||
impl TableGlobalValue {
|
||||
pub fn table_id(&self) -> TableId {
|
||||
self.table_info.ident.table_id
|
||||
}
|
||||
}
|
||||
|
||||
/// Table regional info that varies between datanode, so it contains a `node_id` field.
|
||||
@@ -258,6 +273,10 @@ macro_rules! define_catalog_value {
|
||||
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
|
||||
}
|
||||
|
||||
pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self, Error> {
|
||||
Self::parse(&String::from_utf8_lossy(bytes.as_ref()))
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
|
||||
Ok(serde_json::to_string(self)
|
||||
.context(SerializeCatalogEntryValueSnafu)?
|
||||
@@ -279,6 +298,7 @@ define_catalog_value!(
|
||||
mod tests {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, RawSchema, Schema};
|
||||
use table::metadata::{RawTableMeta, TableIdent, TableType};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -339,11 +359,23 @@ mod tests {
|
||||
region_numbers: vec![1],
|
||||
};
|
||||
|
||||
let table_info = RawTableInfo {
|
||||
ident: TableIdent {
|
||||
table_id: 42,
|
||||
version: 1,
|
||||
},
|
||||
name: "table_1".to_string(),
|
||||
desc: Some("blah".to_string()),
|
||||
catalog_name: "catalog_1".to_string(),
|
||||
schema_name: "schema_1".to_string(),
|
||||
meta,
|
||||
table_type: TableType::Base,
|
||||
};
|
||||
|
||||
let value = TableGlobalValue {
|
||||
id: 42,
|
||||
node_id: 0,
|
||||
regions_id_map: HashMap::from([(0, vec![1, 2, 3])]),
|
||||
meta,
|
||||
table_info,
|
||||
};
|
||||
let serialized = serde_json::to_string(&value).unwrap();
|
||||
let deserialized = TableGlobalValue::parse(&serialized).unwrap();
|
||||
@@ -15,6 +15,7 @@
|
||||
#![feature(assert_matches)]
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::info;
|
||||
@@ -28,6 +29,7 @@ use crate::error::{CreateTableSnafu, Result};
|
||||
pub use crate::schema::{SchemaProvider, SchemaProviderRef};
|
||||
|
||||
pub mod error;
|
||||
pub mod helper;
|
||||
pub mod local;
|
||||
pub mod remote;
|
||||
pub mod schema;
|
||||
@@ -83,12 +85,17 @@ pub trait CatalogManager: CatalogList {
|
||||
/// Starts a catalog manager.
|
||||
async fn start(&self) -> Result<()>;
|
||||
|
||||
/// Registers a table given given catalog/schema to catalog manager,
|
||||
/// returns table registered.
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize>;
|
||||
/// Registers a table within given catalog/schema to catalog manager,
|
||||
/// returns whether the table registered.
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool>;
|
||||
|
||||
/// Register a schema with catalog name and schema name.
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize>;
|
||||
/// Deregisters a table within given catalog/schema to catalog manager,
|
||||
/// returns whether the table deregistered.
|
||||
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool>;
|
||||
|
||||
/// Register a schema with catalog name and schema name. Retuens whether the
|
||||
/// schema registered.
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool>;
|
||||
|
||||
/// Register a system table, should be called before starting the manager.
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest)
|
||||
@@ -123,6 +130,25 @@ pub struct RegisterTableRequest {
|
||||
pub table: TableRef,
|
||||
}
|
||||
|
||||
impl Debug for RegisterTableRequest {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("RegisterTableRequest")
|
||||
.field("catalog", &self.catalog)
|
||||
.field("schema", &self.schema)
|
||||
.field("table_name", &self.table_name)
|
||||
.field("table_id", &self.table_id)
|
||||
.field("table", &self.table.table_info())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DeregisterTableRequest {
|
||||
pub catalog: String,
|
||||
pub schema: String,
|
||||
pub table_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RegisterSchemaRequest {
|
||||
pub catalog: String,
|
||||
|
||||
@@ -21,7 +21,7 @@ use common_catalog::consts::{
|
||||
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::{error, info};
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, UInt8Vector};
|
||||
use futures_util::lock::Mutex;
|
||||
@@ -36,7 +36,7 @@ use table::TableRef;
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu, Result,
|
||||
SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu, SystemCatalogTypeMismatchSnafu,
|
||||
TableExistsSnafu, TableNotFoundSnafu,
|
||||
TableExistsSnafu, TableNotFoundSnafu, UnimplementedSnafu,
|
||||
};
|
||||
use crate::local::memory::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use crate::system::{
|
||||
@@ -46,8 +46,8 @@ use crate::system::{
|
||||
use crate::tables::SystemCatalog;
|
||||
use crate::{
|
||||
format_full_table_name, handle_system_table_request, CatalogList, CatalogManager,
|
||||
CatalogProvider, CatalogProviderRef, RegisterSchemaRequest, RegisterSystemTableRequest,
|
||||
RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
CatalogProvider, CatalogProviderRef, DeregisterTableRequest, RegisterSchemaRequest,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
|
||||
@@ -57,6 +57,7 @@ pub struct LocalCatalogManager {
|
||||
engine: TableEngineRef,
|
||||
next_table_id: AtomicU32,
|
||||
init_lock: Mutex<bool>,
|
||||
register_lock: Mutex<()>,
|
||||
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
|
||||
}
|
||||
|
||||
@@ -76,6 +77,7 @@ impl LocalCatalogManager {
|
||||
engine,
|
||||
next_table_id: AtomicU32::new(MIN_USER_TABLE_ID),
|
||||
init_lock: Mutex::new(false),
|
||||
register_lock: Mutex::new(()),
|
||||
system_table_requests: Mutex::new(Vec::default()),
|
||||
})
|
||||
}
|
||||
@@ -143,27 +145,34 @@ impl LocalCatalogManager {
|
||||
/// Convert `RecordBatch` to a vector of `Entry`.
|
||||
fn record_batch_to_entry(rb: RecordBatch) -> Result<Vec<Entry>> {
|
||||
ensure!(
|
||||
rb.df_recordbatch.columns().len() >= 6,
|
||||
rb.num_columns() >= 6,
|
||||
SystemCatalogSnafu {
|
||||
msg: format!("Length mismatch: {}", rb.df_recordbatch.columns().len())
|
||||
msg: format!("Length mismatch: {}", rb.num_columns())
|
||||
}
|
||||
);
|
||||
|
||||
let entry_type = UInt8Vector::try_from_arrow_array(&rb.df_recordbatch.columns()[0])
|
||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.df_recordbatch.columns()[ENTRY_TYPE_INDEX]
|
||||
.data_type()
|
||||
.clone(),
|
||||
let entry_type = rb
|
||||
.column(ENTRY_TYPE_INDEX)
|
||||
.as_any()
|
||||
.downcast_ref::<UInt8Vector>()
|
||||
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.column(ENTRY_TYPE_INDEX).data_type(),
|
||||
})?;
|
||||
|
||||
let key = BinaryVector::try_from_arrow_array(&rb.df_recordbatch.columns()[1])
|
||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.df_recordbatch.columns()[KEY_INDEX].data_type().clone(),
|
||||
let key = rb
|
||||
.column(KEY_INDEX)
|
||||
.as_any()
|
||||
.downcast_ref::<BinaryVector>()
|
||||
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.column(KEY_INDEX).data_type(),
|
||||
})?;
|
||||
|
||||
let value = BinaryVector::try_from_arrow_array(&rb.df_recordbatch.columns()[3])
|
||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.df_recordbatch.columns()[VALUE_INDEX].data_type().clone(),
|
||||
let value = rb
|
||||
.column(VALUE_INDEX)
|
||||
.as_any()
|
||||
.downcast_ref::<BinaryVector>()
|
||||
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.column(VALUE_INDEX).data_type(),
|
||||
})?;
|
||||
|
||||
let mut res = Vec::with_capacity(rb.num_rows());
|
||||
@@ -241,6 +250,7 @@ impl LocalCatalogManager {
|
||||
schema_name: t.schema_name.clone(),
|
||||
table_name: t.table_name.clone(),
|
||||
table_id: t.table_id,
|
||||
region_numbers: vec![0],
|
||||
};
|
||||
|
||||
let option = self
|
||||
@@ -308,7 +318,7 @@ impl CatalogManager for LocalCatalogManager {
|
||||
self.init().await
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let started = self.init_lock.lock().await;
|
||||
|
||||
ensure!(
|
||||
@@ -331,27 +341,50 @@ impl CatalogManager for LocalCatalogManager {
|
||||
schema_info: format!("{}.{}", catalog_name, schema_name),
|
||||
})?;
|
||||
|
||||
if schema.table_exist(&request.table_name)? {
|
||||
return TableExistsSnafu {
|
||||
table: format_full_table_name(catalog_name, schema_name, &request.table_name),
|
||||
{
|
||||
let _lock = self.register_lock.lock().await;
|
||||
if let Some(existing) = schema.table(&request.table_name)? {
|
||||
if existing.table_info().ident.table_id != request.table_id {
|
||||
error!(
|
||||
"Unexpected table register request: {:?}, existing: {:?}",
|
||||
request,
|
||||
existing.table_info()
|
||||
);
|
||||
return TableExistsSnafu {
|
||||
table: format_full_table_name(
|
||||
catalog_name,
|
||||
schema_name,
|
||||
&request.table_name,
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
// Try to register table with same table id, just ignore.
|
||||
Ok(false)
|
||||
} else {
|
||||
// table does not exist
|
||||
self.system
|
||||
.register_table(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
request.table_name.clone(),
|
||||
request.table_id,
|
||||
)
|
||||
.await?;
|
||||
schema.register_table(request.table_name, request.table)?;
|
||||
Ok(true)
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
self.system
|
||||
.register_table(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
request.table_name.clone(),
|
||||
request.table_id,
|
||||
)
|
||||
.await?;
|
||||
|
||||
schema.register_table(request.table_name, request.table)?;
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
|
||||
UnimplementedSnafu {
|
||||
operation: "deregister table",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let started = self.init_lock.lock().await;
|
||||
ensure!(
|
||||
*started,
|
||||
@@ -366,17 +399,21 @@ impl CatalogManager for LocalCatalogManager {
|
||||
.catalogs
|
||||
.catalog(catalog_name)?
|
||||
.context(CatalogNotFoundSnafu { catalog_name })?;
|
||||
if catalog.schema(schema_name)?.is_some() {
|
||||
return SchemaExistsSnafu {
|
||||
schema: schema_name,
|
||||
}
|
||||
.fail();
|
||||
|
||||
{
|
||||
let _lock = self.register_lock.lock().await;
|
||||
ensure!(
|
||||
catalog.schema(schema_name)?.is_none(),
|
||||
SchemaExistsSnafu {
|
||||
schema: schema_name,
|
||||
}
|
||||
);
|
||||
self.system
|
||||
.register_schema(request.catalog, schema_name.clone())
|
||||
.await?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(true)
|
||||
}
|
||||
self.system
|
||||
.register_schema(request.catalog, schema_name.clone())
|
||||
.await?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use common_catalog::consts::MIN_USER_TABLE_ID;
|
||||
use common_telemetry::error;
|
||||
use snafu::OptionExt;
|
||||
use table::metadata::TableId;
|
||||
use table::table::TableIdProvider;
|
||||
@@ -27,8 +28,8 @@ use table::TableRef;
|
||||
use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
|
||||
use crate::schema::SchemaProvider;
|
||||
use crate::{
|
||||
CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, RegisterSchemaRequest,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, SchemaProviderRef,
|
||||
CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, DeregisterTableRequest,
|
||||
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// Simple in-memory list of catalogs
|
||||
@@ -69,7 +70,7 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
@@ -84,10 +85,28 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
})?;
|
||||
schema
|
||||
.register_table(request.table_name, request.table)
|
||||
.map(|v| if v.is_some() { 0 } else { 1 })
|
||||
.map(|v| v.is_none())
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
.context(CatalogNotFoundSnafu {
|
||||
catalog_name: &request.catalog,
|
||||
})?
|
||||
.clone();
|
||||
let schema = catalog
|
||||
.schema(&request.schema)?
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
schema_info: format!("{}.{}", &request.catalog, &request.schema),
|
||||
})?;
|
||||
schema
|
||||
.deregister_table(&request.table_name)
|
||||
.map(|v| v.is_some())
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
@@ -95,11 +114,12 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
catalog_name: &request.catalog,
|
||||
})?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, _request: RegisterSystemTableRequest) -> Result<()> {
|
||||
unimplemented!()
|
||||
// TODO(ruihang): support register system table request
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>> {
|
||||
@@ -251,11 +271,21 @@ impl SchemaProvider for MemorySchemaProvider {
|
||||
}
|
||||
|
||||
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
|
||||
if self.table_exist(name.as_str())? {
|
||||
return TableExistsSnafu { table: name }.fail()?;
|
||||
}
|
||||
let mut tables = self.tables.write().unwrap();
|
||||
Ok(tables.insert(name, table))
|
||||
if let Some(existing) = tables.get(name.as_str()) {
|
||||
// if table with the same name but different table id exists, then it's a fatal bug
|
||||
if existing.table_info().ident.table_id != table.table_info().ident.table_id {
|
||||
error!(
|
||||
"Unexpected table register: {:?}, existing: {:?}",
|
||||
table.table_info(),
|
||||
existing.table_info()
|
||||
);
|
||||
return TableExistsSnafu { table: name }.fail()?;
|
||||
}
|
||||
Ok(Some(existing.clone()))
|
||||
} else {
|
||||
Ok(tables.insert(name, table))
|
||||
}
|
||||
}
|
||||
|
||||
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
@@ -315,7 +345,7 @@ mod tests {
|
||||
.unwrap()
|
||||
.is_none());
|
||||
assert!(provider.table_exist(table_name).unwrap());
|
||||
let other_table = NumbersTable::default();
|
||||
let other_table = NumbersTable::new(12);
|
||||
let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
|
||||
let err = result.err().unwrap();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
@@ -340,4 +370,34 @@ mod tests {
|
||||
.downcast_ref::<MemoryCatalogManager>()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
pub async fn test_catalog_deregister_table() {
|
||||
let catalog = MemoryCatalogManager::default();
|
||||
let schema = catalog
|
||||
.schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let register_table_req = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "numbers".to_string(),
|
||||
table_id: 2333,
|
||||
table: Arc::new(NumbersTable::default()),
|
||||
};
|
||||
catalog.register_table(register_table_req).await.unwrap();
|
||||
assert!(schema.table_exist("numbers").unwrap());
|
||||
|
||||
let deregister_table_req = DeregisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "numbers".to_string(),
|
||||
};
|
||||
catalog
|
||||
.deregister_table(deregister_table_req)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!schema.table_exist("numbers").unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,10 +20,6 @@ use std::sync::Arc;
|
||||
use arc_swap::ArcSwap;
|
||||
use async_stream::stream;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
use common_catalog::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
use common_telemetry::{debug, info};
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
@@ -37,13 +33,17 @@ use tokio::sync::Mutex;
|
||||
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, InvalidTableSchemaSnafu,
|
||||
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu,
|
||||
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, UnimplementedSnafu,
|
||||
};
|
||||
use crate::helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
use crate::remote::{Kv, KvBackendRef};
|
||||
use crate::{
|
||||
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
|
||||
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider,
|
||||
SchemaProviderRef,
|
||||
DeregisterTableRequest, RegisterSchemaRequest, RegisterSystemTableRequest,
|
||||
RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// Catalog manager based on metasrv.
|
||||
@@ -154,8 +154,8 @@ impl RemoteCatalogManager {
|
||||
}
|
||||
let table_key = TableGlobalKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
let table_value = TableGlobalValue::parse(&String::from_utf8_lossy(&v))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
let table_value =
|
||||
TableGlobalValue::from_bytes(&v).context(InvalidCatalogValueSnafu)?;
|
||||
|
||||
info!(
|
||||
"Found catalog table entry, key: {}, value: {:?}",
|
||||
@@ -250,10 +250,7 @@ impl RemoteCatalogManager {
|
||||
let table_ref = self.open_or_create_table(&table_key, &table_value).await?;
|
||||
schema.register_table(table_key.table_name.to_string(), table_ref)?;
|
||||
info!("Registered table {}", &table_key.table_name);
|
||||
if table_value.id > max_table_id {
|
||||
info!("Max table id: {} -> {}", max_table_id, table_value.id);
|
||||
max_table_id = table_value.id;
|
||||
}
|
||||
max_table_id = max_table_id.max(table_value.table_id());
|
||||
table_num += 1;
|
||||
}
|
||||
info!(
|
||||
@@ -311,25 +308,33 @@ impl RemoteCatalogManager {
|
||||
..
|
||||
} = table_key;
|
||||
|
||||
let table_id = table_value.table_id();
|
||||
|
||||
let TableGlobalValue {
|
||||
id,
|
||||
meta,
|
||||
table_info,
|
||||
regions_id_map,
|
||||
..
|
||||
} = table_value;
|
||||
|
||||
// unwrap safety: checked in yielding this table when `iter_remote_tables`
|
||||
let region_numbers = regions_id_map.get(&self.node_id).unwrap();
|
||||
|
||||
let request = OpenTableRequest {
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
table_id: *id,
|
||||
table_id,
|
||||
region_numbers: region_numbers.clone(),
|
||||
};
|
||||
match self
|
||||
.engine
|
||||
.open_table(&context, request)
|
||||
.await
|
||||
.with_context(|_| OpenTableSnafu {
|
||||
table_info: format!("{}.{}.{}, id:{}", catalog_name, schema_name, table_name, id,),
|
||||
table_info: format!(
|
||||
"{}.{}.{}, id:{}",
|
||||
catalog_name, schema_name, table_name, table_id
|
||||
),
|
||||
})? {
|
||||
Some(table) => {
|
||||
info!(
|
||||
@@ -344,6 +349,7 @@ impl RemoteCatalogManager {
|
||||
catalog_name, schema_name, table_name
|
||||
);
|
||||
|
||||
let meta = &table_info.meta;
|
||||
let schema = meta
|
||||
.schema
|
||||
.clone()
|
||||
@@ -353,13 +359,13 @@ impl RemoteCatalogManager {
|
||||
schema: meta.schema.clone(),
|
||||
})?;
|
||||
let req = CreateTableRequest {
|
||||
id: *id,
|
||||
id: table_id,
|
||||
catalog_name: catalog_name.clone(),
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
desc: None,
|
||||
schema: Arc::new(schema),
|
||||
region_numbers: regions_id_map.get(&self.node_id).unwrap().clone(), // this unwrap is safe because region_id_map is checked in `iter_remote_tables`
|
||||
region_numbers: region_numbers.clone(),
|
||||
primary_key_indices: meta.primary_key_indices.clone(),
|
||||
create_if_not_exists: true,
|
||||
table_options: meta.options.clone(),
|
||||
@@ -371,7 +377,7 @@ impl RemoteCatalogManager {
|
||||
.context(CreateTableSnafu {
|
||||
table_info: format!(
|
||||
"{}.{}.{}, id:{}",
|
||||
&catalog_name, &schema_name, &table_name, id
|
||||
&catalog_name, &schema_name, &table_name, table_id
|
||||
),
|
||||
})
|
||||
}
|
||||
@@ -405,7 +411,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let catalog_name = request.catalog;
|
||||
let schema_name = request.schema;
|
||||
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
|
||||
@@ -424,10 +430,17 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
.fail();
|
||||
}
|
||||
schema_provider.register_table(request.table_name, request.table)?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
|
||||
UnimplementedSnafu {
|
||||
operation: "deregister table",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let catalog_name = request.catalog;
|
||||
let schema_name = request.schema;
|
||||
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
|
||||
@@ -435,7 +448,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
})?;
|
||||
let schema_provider = self.new_schema_provider(&catalog_name, &schema_name);
|
||||
catalog_provider.register_schema(schema_name, schema_provider)?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
|
||||
|
||||
@@ -21,14 +21,13 @@ use common_catalog::consts::{
|
||||
SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::{PhysicalPlanRef, RuntimeEnv};
|
||||
use common_query::physical_plan::{PhysicalPlanRef, SessionContext};
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use common_time::util;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVector};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaBuilder, SchemaRef};
|
||||
use datatypes::vectors::{BinaryVector, TimestampVector, UInt8Vector};
|
||||
use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
@@ -43,7 +42,6 @@ use crate::error::{
|
||||
|
||||
pub const ENTRY_TYPE_INDEX: usize = 0;
|
||||
pub const KEY_INDEX: usize = 1;
|
||||
pub const TIMESTAMP_INDEX: usize = 2;
|
||||
pub const VALUE_INDEX: usize = 3;
|
||||
|
||||
pub struct SystemCatalogTable {
|
||||
@@ -87,6 +85,7 @@ impl SystemCatalogTable {
|
||||
schema_name: INFORMATION_SCHEMA_NAME.to_string(),
|
||||
table_name: SYSTEM_CATALOG_TABLE_NAME.to_string(),
|
||||
table_id: SYSTEM_CATALOG_TABLE_ID,
|
||||
region_numbers: vec![0],
|
||||
};
|
||||
let schema = Arc::new(build_system_catalog_schema());
|
||||
let ctx = EngineContext::default();
|
||||
@@ -110,7 +109,7 @@ impl SystemCatalogTable {
|
||||
desc: Some("System catalog table".to_string()),
|
||||
schema: schema.clone(),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX, TIMESTAMP_INDEX],
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX],
|
||||
create_if_not_exists: true,
|
||||
table_options: HashMap::new(),
|
||||
};
|
||||
@@ -127,14 +126,14 @@ impl SystemCatalogTable {
|
||||
/// Create a stream of all entries inside system catalog table
|
||||
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
|
||||
let full_projection = None;
|
||||
let ctx = SessionContext::new();
|
||||
let scan = self
|
||||
.table
|
||||
.scan(&full_projection, &[], None)
|
||||
.await
|
||||
.context(error::SystemCatalogTableScanSnafu)?;
|
||||
let stream = scan
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.execute(0, ctx.task_ctx())
|
||||
.context(error::SystemCatalogTableScanExecSnafu)?;
|
||||
Ok(stream)
|
||||
}
|
||||
@@ -162,7 +161,7 @@ fn build_system_catalog_schema() -> Schema {
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"timestamp".to_string(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
@@ -173,12 +172,12 @@ fn build_system_catalog_schema() -> Schema {
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"gmt_created".to_string(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"gmt_modified".to_string(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
];
|
||||
@@ -223,7 +222,7 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
|
||||
// Timestamp in key part is intentionally left to 0
|
||||
columns_values.insert(
|
||||
"timestamp".to_string(),
|
||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(0)])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[0])) as _,
|
||||
);
|
||||
|
||||
columns_values.insert(
|
||||
@@ -231,18 +230,15 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
|
||||
Arc::new(BinaryVector::from_slice(&[value])) as _,
|
||||
);
|
||||
|
||||
let now = util::current_time_millis();
|
||||
columns_values.insert(
|
||||
"gmt_created".to_string(),
|
||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(
|
||||
util::current_time_millis(),
|
||||
)])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
|
||||
);
|
||||
|
||||
columns_values.insert(
|
||||
"gmt_modified".to_string(),
|
||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(
|
||||
util::current_time_millis(),
|
||||
)])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
|
||||
);
|
||||
|
||||
InsertRequest {
|
||||
@@ -384,7 +380,7 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
pub fn test_decode_catalog_enrty() {
|
||||
pub fn test_decode_catalog_entry() {
|
||||
let entry = decode_system_catalog(
|
||||
Some(EntryType::Catalog as u8),
|
||||
Some("some_catalog".as_bytes()),
|
||||
@@ -456,7 +452,7 @@ mod tests {
|
||||
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
|
||||
let dir = TempDir::new("system-table-test").unwrap();
|
||||
let store_dir = dir.path().to_string_lossy();
|
||||
let accessor = opendal::services::fs::Builder::default()
|
||||
let accessor = object_store::backend::fs::Builder::default()
|
||||
.root(&store_dir)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
@@ -26,9 +26,9 @@ use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_recordbatch::error::Result as RecordBatchResult;
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream};
|
||||
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::value::ValueRef;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use futures::Stream;
|
||||
use snafu::ResultExt;
|
||||
@@ -149,26 +149,33 @@ fn tables_to_record_batch(
|
||||
engine: &str,
|
||||
) -> Vec<VectorRef> {
|
||||
let mut catalog_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut schema_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut table_name_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut engine_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
|
||||
for table_name in table_names {
|
||||
catalog_vec.push(&Value::String(catalog_name.into()));
|
||||
schema_vec.push(&Value::String(schema_name.into()));
|
||||
table_name_vec.push(&Value::String(table_name.into()));
|
||||
engine_vec.push(&Value::String(engine.into()));
|
||||
// Safety: All these vectors are string type.
|
||||
catalog_vec
|
||||
.push_value_ref(ValueRef::String(catalog_name))
|
||||
.unwrap();
|
||||
schema_vec
|
||||
.push_value_ref(ValueRef::String(schema_name))
|
||||
.unwrap();
|
||||
table_name_vec
|
||||
.push_value_ref(ValueRef::String(&table_name))
|
||||
.unwrap();
|
||||
engine_vec.push_value_ref(ValueRef::String(engine)).unwrap();
|
||||
}
|
||||
|
||||
vec![
|
||||
catalog_vec.finish(),
|
||||
schema_vec.finish(),
|
||||
table_name_vec.finish(),
|
||||
engine_vec.finish(),
|
||||
catalog_vec.to_vector(),
|
||||
schema_vec.to_vector(),
|
||||
table_name_vec.to_vector(),
|
||||
engine_vec.to_vector(),
|
||||
]
|
||||
}
|
||||
|
||||
@@ -340,9 +347,7 @@ fn build_schema_for_tables() -> Schema {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_query::physical_plan::RuntimeEnv;
|
||||
use datatypes::arrow::array::Utf8Array;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use common_query::physical_plan::SessionContext;
|
||||
use futures_util::StreamExt;
|
||||
use table::table::numbers::NumbersTable;
|
||||
|
||||
@@ -366,57 +371,47 @@ mod tests {
|
||||
|
||||
let tables = Tables::new(catalog_list, "test_engine".to_string());
|
||||
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
|
||||
let mut tables_stream = tables_stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
let session_ctx = SessionContext::new();
|
||||
let mut tables_stream = tables_stream.execute(0, session_ctx.task_ctx()).unwrap();
|
||||
|
||||
if let Some(t) = tables_stream.next().await {
|
||||
let batch = t.unwrap().df_recordbatch;
|
||||
let batch = t.unwrap();
|
||||
assert_eq!(1, batch.num_rows());
|
||||
assert_eq!(4, batch.num_columns());
|
||||
assert_eq!(&DataType::Utf8, batch.column(0).data_type());
|
||||
assert_eq!(&DataType::Utf8, batch.column(1).data_type());
|
||||
assert_eq!(&DataType::Utf8, batch.column(2).data_type());
|
||||
assert_eq!(&DataType::Utf8, batch.column(3).data_type());
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(0).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(1).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(2).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(3).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
"greptime",
|
||||
batch
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(0).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"public",
|
||||
batch
|
||||
.column(1)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(1).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"test_table",
|
||||
batch
|
||||
.column(2)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(2).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"test_engine",
|
||||
batch
|
||||
.column(3)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(3).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
} else {
|
||||
panic!("Record batch should not be empty!")
|
||||
|
||||
132
src/catalog/tests/local_catalog_tests.rs
Normal file
132
src/catalog/tests/local_catalog_tests.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::local::LocalCatalogManager;
|
||||
use catalog::{CatalogManager, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_telemetry::{error, info};
|
||||
use mito::config::EngineConfig;
|
||||
use table::table::numbers::NumbersTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
async fn create_local_catalog_manager() -> Result<LocalCatalogManager, catalog::error::Error> {
|
||||
let (_dir, object_store) =
|
||||
mito::table::test_util::new_test_object_store("setup_mock_engine_and_table").await;
|
||||
let mock_engine = Arc::new(mito::table::test_util::MockMitoEngine::new(
|
||||
EngineConfig::default(),
|
||||
mito::table::test_util::MockEngine::default(),
|
||||
object_store,
|
||||
));
|
||||
let catalog_manager = LocalCatalogManager::try_new(mock_engine).await.unwrap();
|
||||
catalog_manager.start().await?;
|
||||
Ok(catalog_manager)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_duplicate_register() {
|
||||
let catalog_manager = create_local_catalog_manager().await.unwrap();
|
||||
let request = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id: 42,
|
||||
table: Arc::new(NumbersTable::new(42)),
|
||||
};
|
||||
assert!(catalog_manager
|
||||
.register_table(request.clone())
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
// register table with same table id will succeed with 0 as return val.
|
||||
assert!(!catalog_manager.register_table(request).await.unwrap());
|
||||
|
||||
let err = catalog_manager
|
||||
.register_table(RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id: 43,
|
||||
table: Arc::new(NumbersTable::new(43)),
|
||||
})
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("Table `greptime.public.test_table` already exists"),
|
||||
"Actual error message: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_register() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let rt = Arc::new(tokio::runtime::Builder::new_multi_thread().build().unwrap());
|
||||
let catalog_manager =
|
||||
Arc::new(rt.block_on(async { create_local_catalog_manager().await.unwrap() }));
|
||||
|
||||
let succeed: Arc<Mutex<Option<TableRef>>> = Arc::new(Mutex::new(None));
|
||||
|
||||
let mut handles = Vec::with_capacity(8);
|
||||
for i in 0..8 {
|
||||
let catalog = catalog_manager.clone();
|
||||
let succeed = succeed.clone();
|
||||
let handle = rt.spawn(async move {
|
||||
let table_id = 42 + i;
|
||||
let table = Arc::new(NumbersTable::new(table_id));
|
||||
let req = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id,
|
||||
table: table.clone(),
|
||||
};
|
||||
match catalog.register_table(req).await {
|
||||
Ok(res) => {
|
||||
if res {
|
||||
let mut succeed = succeed.lock().await;
|
||||
info!("Successfully registered table: {}", table_id);
|
||||
*succeed = Some(table);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
error!("Failed to register table {}", table_id);
|
||||
}
|
||||
}
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
rt.block_on(async move {
|
||||
for handle in handles {
|
||||
handle.await.unwrap();
|
||||
}
|
||||
let guard = succeed.lock().await;
|
||||
let table = guard.as_ref().unwrap();
|
||||
let table_registered = catalog_manager
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table")
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
table_registered.table_info().ident.table_id,
|
||||
table.table_info().ident.table_id
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -217,7 +217,7 @@ impl TableEngine for MockTableEngine {
|
||||
&self,
|
||||
_ctx: &EngineContext,
|
||||
_request: DropTableRequest,
|
||||
) -> table::Result<()> {
|
||||
) -> table::Result<bool> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,12 +22,12 @@ mod tests {
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use catalog::remote::{
|
||||
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
|
||||
};
|
||||
use catalog::{CatalogList, CatalogManager, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_catalog::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use datatypes::schema::Schema;
|
||||
use futures_util::StreamExt;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
@@ -202,7 +202,7 @@ mod tests {
|
||||
table_id,
|
||||
table,
|
||||
};
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert!(catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert_eq!(
|
||||
HashSet::from([table_name, "numbers".to_string()]),
|
||||
default_schema
|
||||
@@ -287,7 +287,7 @@ mod tests {
|
||||
.register_schema(schema_name.clone(), schema.clone())
|
||||
.expect("Register schema should not fail");
|
||||
assert!(prev.is_none());
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert!(catalog_manager.register_table(reg_req).await.unwrap());
|
||||
|
||||
assert_eq!(
|
||||
HashSet::from([schema_name.clone()]),
|
||||
|
||||
@@ -11,13 +11,11 @@ async-stream = "0.3"
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-grpc-expr = { path = "../common/grpc-expr" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-insert = { path = "../common/insert" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../datatypes" }
|
||||
enum_dispatch = "0.3"
|
||||
parking_lot = "0.12"
|
||||
|
||||
@@ -12,11 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::*;
|
||||
use client::{Client, Database};
|
||||
|
||||
fn main() {
|
||||
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
|
||||
.unwrap();
|
||||
@@ -29,21 +27,21 @@ async fn run() {
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
let db = Database::new("greptime", client);
|
||||
|
||||
let (columns, row_count) = insert_data();
|
||||
|
||||
let expr = InsertExpr {
|
||||
schema_name: "public".to_string(),
|
||||
table_name: "demo".to_string(),
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: insert_batches(),
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
region_number: 0,
|
||||
columns,
|
||||
row_count,
|
||||
};
|
||||
db.insert(expr).await.unwrap();
|
||||
}
|
||||
|
||||
fn insert_batches() -> Vec<Vec<u8>> {
|
||||
fn insert_data() -> (Vec<Column>, u32) {
|
||||
const SEMANTIC_TAG: i32 = 0;
|
||||
const SEMANTIC_FEILD: i32 = 1;
|
||||
const SEMANTIC_FIELD: i32 = 1;
|
||||
const SEMANTIC_TS: i32 = 2;
|
||||
|
||||
let row_count = 4;
|
||||
@@ -71,7 +69,7 @@ fn insert_batches() -> Vec<Vec<u8>> {
|
||||
};
|
||||
let cpu_column = Column {
|
||||
column_name: "cpu".to_string(),
|
||||
semantic_type: SEMANTIC_FEILD,
|
||||
semantic_type: SEMANTIC_FIELD,
|
||||
values: Some(cpu_vals),
|
||||
null_mask: vec![2],
|
||||
..Default::default()
|
||||
@@ -83,7 +81,7 @@ fn insert_batches() -> Vec<Vec<u8>> {
|
||||
};
|
||||
let mem_column = Column {
|
||||
column_name: "memory".to_string(),
|
||||
semantic_type: SEMANTIC_FEILD,
|
||||
semantic_type: SEMANTIC_FIELD,
|
||||
values: Some(mem_vals),
|
||||
null_mask: vec![4],
|
||||
..Default::default()
|
||||
@@ -101,9 +99,8 @@ fn insert_batches() -> Vec<Vec<u8>> {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let insert_batch = InsertBatch {
|
||||
columns: vec![host_column, cpu_column, mem_column, ts_column],
|
||||
(
|
||||
vec![host_column, cpu_column, mem_column, ts_column],
|
||||
row_count,
|
||||
};
|
||||
vec![insert_batch.into()]
|
||||
)
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ async fn run() {
|
||||
column_defs: vec![
|
||||
ColumnDef {
|
||||
name: "timestamp".to_string(),
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
},
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use client::{Client, Database};
|
||||
use common_grpc::MockExecution;
|
||||
use datafusion::physical_plan::expressions::Column;
|
||||
use datafusion::physical_plan::projection::ProjectionExec;
|
||||
use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
|
||||
use tracing::{event, Level};
|
||||
|
||||
fn main() {
|
||||
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
|
||||
.unwrap();
|
||||
|
||||
run();
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn run() {
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
let db = Database::new("greptime", client);
|
||||
|
||||
let physical = mock_physical_plan();
|
||||
let result = db.physical_plan(physical, None).await;
|
||||
|
||||
event!(Level::INFO, "result: {:#?}", result);
|
||||
}
|
||||
|
||||
fn mock_physical_plan() -> Arc<dyn ExecutionPlan> {
|
||||
let id_expr = Arc::new(Column::new("id", 0)) as Arc<dyn PhysicalExpr>;
|
||||
let age_expr = Arc::new(Column::new("age", 2)) as Arc<dyn PhysicalExpr>;
|
||||
let expr = vec![(id_expr, "id".to_string()), (age_expr, "age".to_string())];
|
||||
|
||||
let input =
|
||||
Arc::new(MockExecution::new("mock_input_exec".to_string())) as Arc<dyn ExecutionPlan>;
|
||||
let projection = ProjectionExec::try_new(expr, input).unwrap();
|
||||
Arc::new(projection)
|
||||
}
|
||||
@@ -58,7 +58,19 @@ impl Admin {
|
||||
header: Some(header),
|
||||
expr: Some(admin_expr::Expr::Alter(expr)),
|
||||
};
|
||||
Ok(self.do_requests(vec![expr]).await?.remove(0))
|
||||
self.do_request(expr).await
|
||||
}
|
||||
|
||||
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<AdminResult> {
|
||||
let header = ExprHeader {
|
||||
version: PROTOCOL_VERSION,
|
||||
};
|
||||
let expr = AdminExpr {
|
||||
header: Some(header),
|
||||
expr: Some(admin_expr::Expr::DropTable(expr)),
|
||||
};
|
||||
|
||||
self.do_request(expr).await
|
||||
}
|
||||
|
||||
/// Invariants: the lengths of input vec (`Vec<AdminExpr>`) and output vec (`Vec<AdminResult>`) are equal.
|
||||
|
||||
@@ -18,22 +18,17 @@ use api::v1::codec::SelectResult as GrpcSelectResult;
|
||||
use api::v1::column::SemanticType;
|
||||
use api::v1::{
|
||||
object_expr, object_result, select_expr, DatabaseRequest, ExprHeader, InsertExpr,
|
||||
MutateResult as GrpcMutateResult, ObjectExpr, ObjectResult as GrpcObjectResult, PhysicalPlan,
|
||||
SelectExpr,
|
||||
MutateResult as GrpcMutateResult, ObjectExpr, ObjectResult as GrpcObjectResult, SelectExpr,
|
||||
};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_grpc::{AsExcutionPlan, DefaultAsPlanImpl};
|
||||
use common_insert::column_to_vector;
|
||||
use common_grpc_expr::column_to_vector;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::physical_plan::ExecutionPlan;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
ColumnToVectorSnafu, ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu,
|
||||
};
|
||||
use crate::error::{ColumnToVectorSnafu, ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu};
|
||||
use crate::{error, Client, Result};
|
||||
|
||||
pub const PROTOCOL_VERSION: u32 = 1;
|
||||
@@ -94,24 +89,6 @@ impl Database {
|
||||
self.do_select(select_expr).await
|
||||
}
|
||||
|
||||
pub async fn physical_plan(
|
||||
&self,
|
||||
physical: Arc<dyn ExecutionPlan>,
|
||||
original_ql: Option<String>,
|
||||
) -> Result<ObjectResult> {
|
||||
let plan = DefaultAsPlanImpl::try_from_physical_plan(physical.clone())
|
||||
.context(EncodePhysicalSnafu { physical })?
|
||||
.bytes;
|
||||
let original_ql = original_ql.unwrap_or_default();
|
||||
let select_expr = SelectExpr {
|
||||
expr: Some(select_expr::Expr::PhysicalPlan(PhysicalPlan {
|
||||
original_ql: original_ql.into_bytes(),
|
||||
plan,
|
||||
})),
|
||||
};
|
||||
self.do_select(select_expr).await
|
||||
}
|
||||
|
||||
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<ObjectResult> {
|
||||
let select_expr = SelectExpr {
|
||||
expr: Some(select_expr::Expr::LogicalPlan(logical_plan)),
|
||||
@@ -341,12 +318,11 @@ mod tests {
|
||||
|
||||
fn create_test_column(vector: VectorRef) -> Column {
|
||||
let wrapper: ColumnDataTypeWrapper = vector.data_type().try_into().unwrap();
|
||||
let array = vector.to_arrow_array();
|
||||
Column {
|
||||
column_name: "test".to_string(),
|
||||
semantic_type: 1,
|
||||
values: Some(values(&[array.clone()]).unwrap()),
|
||||
null_mask: null_mask(&vec![array], vector.len()),
|
||||
values: Some(values(&[vector.clone()]).unwrap()),
|
||||
null_mask: null_mask(&[vector.clone()], vector.len()),
|
||||
datatype: wrapper.datatype() as i32,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,7 +103,7 @@ pub enum Error {
|
||||
#[snafu(display("Failed to convert column to vector, source: {}", source))]
|
||||
ColumnToVector {
|
||||
#[snafu(backtrace)]
|
||||
source: common_insert::error::Error,
|
||||
source: common_grpc_expr::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ name = "greptime"
|
||||
path = "src/bin/greptime.rs"
|
||||
|
||||
[dependencies]
|
||||
anymap = "1.0.0-beta.2"
|
||||
clap = { version = "3.1", features = ["derive"] }
|
||||
common-error = { path = "../common/error" }
|
||||
common-telemetry = { path = "../common/telemetry", features = [
|
||||
@@ -18,8 +19,10 @@ common-telemetry = { path = "../common/telemetry", features = [
|
||||
datanode = { path = "../datanode" }
|
||||
frontend = { path = "../frontend" }
|
||||
futures = "0.3"
|
||||
meta-client = { path = "../meta-client" }
|
||||
meta-srv = { path = "../meta-srv" }
|
||||
serde = "1.0"
|
||||
servers = { path = "../servers" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tokio = { version = "1.18", features = ["full"] }
|
||||
toml = "0.5"
|
||||
@@ -27,3 +30,6 @@ toml = "0.5"
|
||||
[dev-dependencies]
|
||||
serde = "1.0"
|
||||
tempdir = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
build-data = "0.1.3"
|
||||
|
||||
29
src/cmd/build.rs
Normal file
29
src/cmd/build.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
const DEFAULT_VALUE: &str = "unknown";
|
||||
fn main() {
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_COMMIT={}",
|
||||
build_data::get_git_commit().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
|
||||
);
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_BRANCH={}",
|
||||
build_data::get_git_branch().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
|
||||
);
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_DIRTY={}",
|
||||
build_data::get_git_dirty().map_or(DEFAULT_VALUE.to_string(), |v| v.to_string())
|
||||
);
|
||||
}
|
||||
@@ -20,7 +20,7 @@ use cmd::{datanode, frontend, metasrv, standalone};
|
||||
use common_telemetry::logging::{error, info};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[clap(name = "greptimedb")]
|
||||
#[clap(name = "greptimedb", version = print_version())]
|
||||
struct Command {
|
||||
#[clap(long, default_value = "/tmp/greptimedb/logs")]
|
||||
log_dir: String,
|
||||
@@ -70,6 +70,19 @@ impl fmt::Display for SubCommand {
|
||||
}
|
||||
}
|
||||
|
||||
fn print_version() -> &'static str {
|
||||
concat!(
|
||||
"\nbranch: ",
|
||||
env!("GIT_BRANCH"),
|
||||
"\ncommit: ",
|
||||
env!("GIT_COMMIT"),
|
||||
"\ndirty: ",
|
||||
env!("GIT_DIRTY"),
|
||||
"\nversion: ",
|
||||
env!("CARGO_PKG_VERSION")
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let cmd = Command::parse();
|
||||
|
||||
@@ -14,8 +14,9 @@
|
||||
|
||||
use clap::Parser;
|
||||
use common_telemetry::logging;
|
||||
use datanode::datanode::{Datanode, DatanodeOptions};
|
||||
use frontend::frontend::Mode;
|
||||
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig};
|
||||
use meta_client::MetaClientOpts;
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{Error, MissingConfigSnafu, Result, StartDatanodeSnafu};
|
||||
@@ -46,7 +47,7 @@ impl SubCommand {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
#[derive(Debug, Parser, Default)]
|
||||
struct StartCommand {
|
||||
#[clap(long)]
|
||||
node_id: Option<u64>,
|
||||
@@ -58,6 +59,10 @@ struct StartCommand {
|
||||
metasrv_addr: Option<String>,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
#[clap(long)]
|
||||
data_dir: Option<String>,
|
||||
#[clap(long)]
|
||||
wal_dir: Option<String>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
@@ -98,7 +103,13 @@ impl TryFrom<StartCommand> for DatanodeOptions {
|
||||
}
|
||||
|
||||
if let Some(meta_addr) = cmd.metasrv_addr {
|
||||
opts.meta_client_opts.metasrv_addr = meta_addr;
|
||||
opts.meta_client_opts
|
||||
.get_or_insert_with(MetaClientOpts::default)
|
||||
.metasrv_addrs = meta_addr
|
||||
.split(',')
|
||||
.map(&str::trim)
|
||||
.map(&str::to_string)
|
||||
.collect::<_>();
|
||||
opts.mode = Mode::Distributed;
|
||||
}
|
||||
|
||||
@@ -108,6 +119,14 @@ impl TryFrom<StartCommand> for DatanodeOptions {
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
if let Some(data_dir) = cmd.data_dir {
|
||||
opts.storage = ObjectStoreConfig::File { data_dir };
|
||||
}
|
||||
|
||||
if let Some(wal_dir) = cmd.wal_dir {
|
||||
opts.wal_dir = wal_dir;
|
||||
}
|
||||
Ok(opts)
|
||||
}
|
||||
}
|
||||
@@ -117,39 +136,41 @@ mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use datanode::datanode::ObjectStoreConfig;
|
||||
use frontend::frontend::Mode;
|
||||
use servers::Mode;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let cmd = StartCommand {
|
||||
node_id: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/datanode.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
..Default::default()
|
||||
};
|
||||
let options: DatanodeOptions = cmd.try_into().unwrap();
|
||||
assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
|
||||
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal_dir);
|
||||
assert_eq!("127.0.0.1:3306".to_string(), options.mysql_addr);
|
||||
assert_eq!("127.0.0.1:4406".to_string(), options.mysql_addr);
|
||||
assert_eq!(4, options.mysql_runtime_size);
|
||||
assert_eq!(
|
||||
"1.1.1.1:3002".to_string(),
|
||||
options.meta_client_opts.metasrv_addr
|
||||
);
|
||||
assert_eq!(5000, options.meta_client_opts.connect_timeout_millis);
|
||||
assert_eq!(3000, options.meta_client_opts.timeout_millis);
|
||||
assert!(!options.meta_client_opts.tcp_nodelay);
|
||||
let MetaClientOpts {
|
||||
metasrv_addrs: metasrv_addr,
|
||||
timeout_millis,
|
||||
connect_timeout_millis,
|
||||
tcp_nodelay,
|
||||
} = options.meta_client_opts.unwrap();
|
||||
|
||||
assert_eq!(vec!["127.0.0.1:3002".to_string()], metasrv_addr);
|
||||
assert_eq!(5000, connect_timeout_millis);
|
||||
assert_eq!(3000, timeout_millis);
|
||||
assert!(!tcp_nodelay);
|
||||
|
||||
match options.storage {
|
||||
ObjectStoreConfig::File { data_dir } => {
|
||||
assert_eq!("/tmp/greptimedb/data/".to_string(), data_dir)
|
||||
}
|
||||
ObjectStoreConfig::S3 { .. } => unreachable!(),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -157,44 +178,30 @@ mod tests {
|
||||
fn test_try_from_cmd() {
|
||||
assert_eq!(
|
||||
Mode::Standalone,
|
||||
DatanodeOptions::try_from(StartCommand {
|
||||
node_id: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: None
|
||||
})
|
||||
.unwrap()
|
||||
.mode
|
||||
DatanodeOptions::try_from(StartCommand::default())
|
||||
.unwrap()
|
||||
.mode
|
||||
);
|
||||
|
||||
let mode = DatanodeOptions::try_from(StartCommand {
|
||||
node_id: Some(42),
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: Some("127.0.0.1:3002".to_string()),
|
||||
config_file: None,
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap()
|
||||
.mode;
|
||||
assert_matches!(mode, Mode::Distributed);
|
||||
|
||||
assert!(DatanodeOptions::try_from(StartCommand {
|
||||
node_id: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: Some("127.0.0.1:3002".to_string()),
|
||||
config_file: None,
|
||||
..Default::default()
|
||||
})
|
||||
.is_err());
|
||||
|
||||
// Providing node_id but leave metasrv_addr absent is ok since metasrv_addr has default value
|
||||
DatanodeOptions::try_from(StartCommand {
|
||||
node_id: Some(42),
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: None,
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
@@ -202,17 +209,23 @@ mod tests {
|
||||
#[test]
|
||||
fn test_merge_config() {
|
||||
let dn_opts = DatanodeOptions::try_from(StartCommand {
|
||||
node_id: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
metasrv_addr: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/datanode.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(Some(42), dn_opts.node_id);
|
||||
assert_eq!("1.1.1.1:3002", dn_opts.meta_client_opts.metasrv_addr);
|
||||
let MetaClientOpts {
|
||||
metasrv_addrs: metasrv_addr,
|
||||
timeout_millis,
|
||||
connect_timeout_millis,
|
||||
tcp_nodelay,
|
||||
} = dn_opts.meta_client_opts.unwrap();
|
||||
assert_eq!(vec!["127.0.0.1:3002".to_string()], metasrv_addr);
|
||||
assert_eq!(3000, timeout_millis);
|
||||
assert_eq!(5000, connect_timeout_millis);
|
||||
assert!(!tcp_nodelay);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,12 +25,6 @@ pub enum Error {
|
||||
source: datanode::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build frontend, source: {}", source))]
|
||||
BuildFrontend {
|
||||
#[snafu(backtrace)]
|
||||
source: frontend::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start frontend, source: {}", source))]
|
||||
StartFrontend {
|
||||
#[snafu(backtrace)]
|
||||
@@ -61,6 +55,12 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Illegal config: {}", msg))]
|
||||
IllegalConfig { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Illegal auth config: {}", source))]
|
||||
IllegalAuthConfig {
|
||||
#[snafu(backtrace)]
|
||||
source: servers::auth::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -75,7 +75,7 @@ impl ErrorExt for Error {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
Error::IllegalConfig { .. } => StatusCode::InvalidArguments,
|
||||
Error::BuildFrontend { source, .. } => source.status_code(),
|
||||
Error::IllegalAuthConfig { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,10 +97,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_start_node_error() {
|
||||
fn throw_datanode_error() -> StdResult<datanode::error::Error> {
|
||||
datanode::error::MissingFieldSnafu {
|
||||
field: "test_field",
|
||||
}
|
||||
.fail()
|
||||
datanode::error::MissingNodeIdSnafu {}.fail()
|
||||
}
|
||||
|
||||
let e = throw_datanode_error()
|
||||
|
||||
@@ -12,17 +12,23 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use anymap::AnyMap;
|
||||
use clap::Parser;
|
||||
use frontend::frontend::{Frontend, FrontendOptions, Mode};
|
||||
use frontend::frontend::{Frontend, FrontendOptions};
|
||||
use frontend::grpc::GrpcOptions;
|
||||
use frontend::influxdb::InfluxdbOptions;
|
||||
use frontend::instance::Instance;
|
||||
use frontend::mysql::MysqlOptions;
|
||||
use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
use meta_client::MetaClientOpts;
|
||||
use servers::auth::UserProviderRef;
|
||||
use servers::http::HttpOptions;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use servers::{auth, Mode};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::error::{self, IllegalAuthConfigSnafu, Result};
|
||||
use crate::toml_loader;
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -68,21 +74,41 @@ pub struct StartCommand {
|
||||
influxdb_enable: Option<bool>,
|
||||
#[clap(long)]
|
||||
metasrv_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
tls_mode: Option<TlsMode>,
|
||||
#[clap(long)]
|
||||
tls_cert_path: Option<String>,
|
||||
#[clap(long)]
|
||||
tls_key_path: Option<String>,
|
||||
#[clap(long)]
|
||||
user_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
async fn run(self) -> Result<()> {
|
||||
let plugins = load_frontend_plugins(&self.user_provider)?;
|
||||
let opts: FrontendOptions = self.try_into()?;
|
||||
let mut frontend = Frontend::new(
|
||||
opts.clone(),
|
||||
Instance::try_new(&opts)
|
||||
Instance::try_new_distributed(&opts)
|
||||
.await
|
||||
.context(error::StartFrontendSnafu)?,
|
||||
plugins,
|
||||
);
|
||||
frontend.start().await.context(error::StartFrontendSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_frontend_plugins(user_provider: &Option<String>) -> Result<AnyMap> {
|
||||
let mut plugins = AnyMap::new();
|
||||
|
||||
if let Some(provider) = user_provider {
|
||||
let provider = auth::user_provider_from_option(provider).context(IllegalAuthConfigSnafu)?;
|
||||
plugins.insert::<UserProviderRef>(provider);
|
||||
}
|
||||
Ok(plugins)
|
||||
}
|
||||
|
||||
impl TryFrom<StartCommand> for FrontendOptions {
|
||||
type Error = error::Error;
|
||||
|
||||
@@ -93,8 +119,13 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
FrontendOptions::default()
|
||||
};
|
||||
|
||||
let tls_option = TlsOption::new(cmd.tls_mode, cmd.tls_cert_path, cmd.tls_key_path);
|
||||
|
||||
if let Some(addr) = cmd.http_addr {
|
||||
opts.http_addr = Some(addr);
|
||||
opts.http_options = Some(HttpOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.grpc_addr {
|
||||
opts.grpc_options = Some(GrpcOptions {
|
||||
@@ -105,12 +136,14 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
if let Some(addr) = cmd.mysql_addr {
|
||||
opts.mysql_options = Some(MysqlOptions {
|
||||
addr,
|
||||
tls: tls_option.clone(),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.postgres_addr {
|
||||
opts.postgres_options = Some(PostgresOptions {
|
||||
addr,
|
||||
tls: tls_option,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
@@ -124,13 +157,13 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
opts.influxdb_options = Some(InfluxdbOptions { enable });
|
||||
}
|
||||
if let Some(metasrv_addr) = cmd.metasrv_addr {
|
||||
opts.metasrv_addr = Some(
|
||||
metasrv_addr
|
||||
.split(',')
|
||||
.into_iter()
|
||||
.map(|x| x.trim().to_string())
|
||||
.collect::<Vec<String>>(),
|
||||
);
|
||||
opts.meta_client_opts
|
||||
.get_or_insert_with(MetaClientOpts::default)
|
||||
.metasrv_addrs = metasrv_addr
|
||||
.split(',')
|
||||
.map(&str::trim)
|
||||
.map(&str::to_string)
|
||||
.collect::<Vec<_>>();
|
||||
opts.mode = Mode::Distributed;
|
||||
}
|
||||
Ok(opts)
|
||||
@@ -139,6 +172,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use servers::auth::{Identity, Password, UserProviderRef};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -152,10 +189,14 @@ mod tests {
|
||||
influxdb_enable: Some(false),
|
||||
config_file: None,
|
||||
metasrv_addr: None,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: None,
|
||||
};
|
||||
|
||||
let opts: FrontendOptions = command.try_into().unwrap();
|
||||
assert_eq!(opts.http_addr, Some("127.0.0.1:1234".to_string()));
|
||||
assert_eq!(opts.http_options.as_ref().unwrap().addr, "127.0.0.1:1234");
|
||||
assert_eq!(opts.mysql_options.as_ref().unwrap().addr, "127.0.0.1:5678");
|
||||
assert_eq!(
|
||||
opts.postgres_options.as_ref().unwrap().addr,
|
||||
@@ -186,4 +227,66 @@ mod tests {
|
||||
|
||||
assert!(!opts.influxdb_options.unwrap().enable);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let command = StartCommand {
|
||||
http_addr: None,
|
||||
grpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
influxdb_enable: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/frontend.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
metasrv_addr: None,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: None,
|
||||
};
|
||||
|
||||
let fe_opts = FrontendOptions::try_from(command).unwrap();
|
||||
assert_eq!(Mode::Distributed, fe_opts.mode);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4000".to_string(),
|
||||
fe_opts.http_options.as_ref().unwrap().addr
|
||||
);
|
||||
assert_eq!(
|
||||
Duration::from_secs(30),
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_try_from_start_command_to_anymap() {
|
||||
let command = StartCommand {
|
||||
http_addr: None,
|
||||
grpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
influxdb_enable: None,
|
||||
config_file: None,
|
||||
metasrv_addr: None,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: Some("static_user_provider:cmd:test=test".to_string()),
|
||||
};
|
||||
|
||||
let plugins = load_frontend_plugins(&command.user_provider);
|
||||
assert!(plugins.is_ok());
|
||||
let plugins = plugins.unwrap();
|
||||
let provider = plugins.get::<UserProviderRef>();
|
||||
assert!(provider.is_some());
|
||||
|
||||
let provider = provider.unwrap();
|
||||
let result = provider
|
||||
.auth(Identity::UserId("test", None), Password::PlainText("test"))
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,11 +12,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use anymap::AnyMap;
|
||||
use clap::Parser;
|
||||
use common_telemetry::info;
|
||||
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig};
|
||||
use datanode::instance::InstanceRef;
|
||||
use frontend::frontend::{Frontend, FrontendOptions, Mode};
|
||||
use frontend::frontend::{Frontend, FrontendOptions};
|
||||
use frontend::grpc::GrpcOptions;
|
||||
use frontend::influxdb::InfluxdbOptions;
|
||||
use frontend::instance::Instance as FeInstance;
|
||||
@@ -25,12 +26,13 @@ use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
use frontend::prometheus::PrometheusOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::http::HttpOptions;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
use tokio::try_join;
|
||||
|
||||
use crate::error::{
|
||||
BuildFrontendSnafu, Error, IllegalConfigSnafu, Result, StartDatanodeSnafu, StartFrontendSnafu,
|
||||
};
|
||||
use crate::error::{Error, IllegalConfigSnafu, Result, StartDatanodeSnafu, StartFrontendSnafu};
|
||||
use crate::frontend::load_frontend_plugins;
|
||||
use crate::toml_loader;
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -60,7 +62,7 @@ impl SubCommand {
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct StandaloneOptions {
|
||||
pub http_addr: Option<String>,
|
||||
pub http_options: Option<HttpOptions>,
|
||||
pub grpc_options: Option<GrpcOptions>,
|
||||
pub mysql_options: Option<MysqlOptions>,
|
||||
pub postgres_options: Option<PostgresOptions>,
|
||||
@@ -70,14 +72,13 @@ pub struct StandaloneOptions {
|
||||
pub mode: Mode,
|
||||
pub wal_dir: String,
|
||||
pub storage: ObjectStoreConfig,
|
||||
pub datanode_mysql_addr: String,
|
||||
pub datanode_mysql_runtime_size: usize,
|
||||
pub enable_memory_catalog: bool,
|
||||
}
|
||||
|
||||
impl Default for StandaloneOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
http_addr: Some("127.0.0.1:4000".to_string()),
|
||||
http_options: Some(HttpOptions::default()),
|
||||
grpc_options: Some(GrpcOptions::default()),
|
||||
mysql_options: Some(MysqlOptions::default()),
|
||||
postgres_options: Some(PostgresOptions::default()),
|
||||
@@ -87,8 +88,7 @@ impl Default for StandaloneOptions {
|
||||
mode: Mode::Standalone,
|
||||
wal_dir: "/tmp/greptimedb/wal".to_string(),
|
||||
storage: ObjectStoreConfig::default(),
|
||||
datanode_mysql_addr: "127.0.0.1:3306".to_string(),
|
||||
datanode_mysql_runtime_size: 4,
|
||||
enable_memory_catalog: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -96,7 +96,7 @@ impl Default for StandaloneOptions {
|
||||
impl StandaloneOptions {
|
||||
fn frontend_options(self) -> FrontendOptions {
|
||||
FrontendOptions {
|
||||
http_addr: self.http_addr,
|
||||
http_options: self.http_options,
|
||||
grpc_options: self.grpc_options,
|
||||
mysql_options: self.mysql_options,
|
||||
postgres_options: self.postgres_options,
|
||||
@@ -104,8 +104,7 @@ impl StandaloneOptions {
|
||||
influxdb_options: self.influxdb_options,
|
||||
prometheus_options: self.prometheus_options,
|
||||
mode: self.mode,
|
||||
datanode_rpc_addr: "127.0.0.1:3001".to_string(),
|
||||
metasrv_addr: None,
|
||||
meta_client_opts: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,8 +112,7 @@ impl StandaloneOptions {
|
||||
DatanodeOptions {
|
||||
wal_dir: self.wal_dir,
|
||||
storage: self.storage,
|
||||
mysql_addr: self.datanode_mysql_addr,
|
||||
mysql_runtime_size: self.datanode_mysql_runtime_size,
|
||||
enable_memory_catalog: self.enable_memory_catalog,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
@@ -136,18 +134,31 @@ struct StartCommand {
|
||||
influxdb_enable: bool,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
#[clap(short = 'm', long = "memory-catalog")]
|
||||
enable_memory_catalog: bool,
|
||||
#[clap(long)]
|
||||
tls_mode: Option<TlsMode>,
|
||||
#[clap(long)]
|
||||
tls_cert_path: Option<String>,
|
||||
#[clap(long)]
|
||||
tls_key_path: Option<String>,
|
||||
#[clap(long)]
|
||||
user_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
async fn run(self) -> Result<()> {
|
||||
let enable_memory_catalog = self.enable_memory_catalog;
|
||||
let config_file = self.config_file.clone();
|
||||
let plugins = load_frontend_plugins(&self.user_provider)?;
|
||||
let fe_opts = FrontendOptions::try_from(self)?;
|
||||
let dn_opts: DatanodeOptions = {
|
||||
let opts: StandaloneOptions = if let Some(path) = config_file {
|
||||
let mut opts: StandaloneOptions = if let Some(path) = config_file {
|
||||
toml_loader::from_file!(&path)?
|
||||
} else {
|
||||
StandaloneOptions::default()
|
||||
};
|
||||
opts.enable_memory_catalog = enable_memory_catalog;
|
||||
opts.datanode_options()
|
||||
};
|
||||
|
||||
@@ -159,13 +170,16 @@ impl StartCommand {
|
||||
let mut datanode = Datanode::new(dn_opts.clone())
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
let mut frontend = build_frontend(fe_opts, &dn_opts, datanode.get_instance()).await?;
|
||||
let mut frontend = build_frontend(fe_opts, plugins, datanode.get_instance()).await?;
|
||||
|
||||
try_join!(
|
||||
async { datanode.start().await.context(StartDatanodeSnafu) },
|
||||
async { frontend.start().await.context(StartFrontendSnafu) }
|
||||
)?;
|
||||
// Start datanode instance before starting services, to avoid requests come in before internal components are started.
|
||||
datanode
|
||||
.start_instance()
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
info!("Datanode instance started");
|
||||
|
||||
frontend.start().await.context(StartFrontendSnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -173,20 +187,12 @@ impl StartCommand {
|
||||
/// Build frontend instance in standalone mode
|
||||
async fn build_frontend(
|
||||
fe_opts: FrontendOptions,
|
||||
dn_opts: &DatanodeOptions,
|
||||
plugins: AnyMap,
|
||||
datanode_instance: InstanceRef,
|
||||
) -> Result<Frontend<FeInstance>> {
|
||||
let grpc_server_addr = &dn_opts.rpc_addr;
|
||||
info!(
|
||||
"Build frontend with datanode gRPC addr: {}",
|
||||
grpc_server_addr
|
||||
);
|
||||
let mut frontend_instance = FeInstance::try_new(&fe_opts)
|
||||
.await
|
||||
.context(BuildFrontendSnafu)?;
|
||||
frontend_instance.set_catalog_manager(datanode_instance.catalog_manager().clone());
|
||||
let mut frontend_instance = FeInstance::new_standalone(datanode_instance.clone());
|
||||
frontend_instance.set_script_handler(datanode_instance);
|
||||
Ok(Frontend::new(fe_opts, frontend_instance))
|
||||
Ok(Frontend::new(fe_opts, frontend_instance, plugins))
|
||||
}
|
||||
|
||||
impl TryFrom<StartCommand> for FrontendOptions {
|
||||
@@ -204,7 +210,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
opts.mode = Mode::Standalone;
|
||||
|
||||
if let Some(addr) = cmd.http_addr {
|
||||
opts.http_addr = Some(addr);
|
||||
opts.http_options = Some(HttpOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.rpc_addr {
|
||||
// frontend grpc addr conflict with datanode default grpc addr
|
||||
@@ -248,12 +257,28 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
opts.influxdb_options = Some(InfluxdbOptions { enable: true });
|
||||
}
|
||||
|
||||
let tls_option = TlsOption::new(cmd.tls_mode, cmd.tls_cert_path, cmd.tls_key_path);
|
||||
|
||||
if let Some(mut mysql_options) = opts.mysql_options {
|
||||
mysql_options.tls = tls_option.clone();
|
||||
opts.mysql_options = Some(mysql_options);
|
||||
}
|
||||
|
||||
if let Some(mut postgres_options) = opts.postgres_options {
|
||||
postgres_options.tls = tls_option;
|
||||
opts.postgres_options = Some(postgres_options);
|
||||
}
|
||||
|
||||
Ok(opts)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use servers::auth::{Identity, Password, UserProviderRef};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -269,12 +294,23 @@ mod tests {
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
influxdb_enable: false,
|
||||
enable_memory_catalog: false,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: None,
|
||||
};
|
||||
|
||||
let fe_opts = FrontendOptions::try_from(cmd).unwrap();
|
||||
assert_eq!(Mode::Standalone, fe_opts.mode);
|
||||
assert_eq!("127.0.0.1:3001".to_string(), fe_opts.datanode_rpc_addr);
|
||||
assert_eq!(Some("127.0.0.1:4000".to_string()), fe_opts.http_addr);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4000".to_string(),
|
||||
fe_opts.http_options.as_ref().unwrap().addr
|
||||
);
|
||||
assert_eq!(
|
||||
Duration::from_secs(30),
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4001".to_string(),
|
||||
fe_opts.grpc_options.unwrap().addr
|
||||
@@ -286,4 +322,33 @@ mod tests {
|
||||
assert_eq!(2, fe_opts.mysql_options.as_ref().unwrap().runtime_size);
|
||||
assert!(fe_opts.influxdb_options.as_ref().unwrap().enable);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_try_from_start_command_to_anymap() {
|
||||
let command = StartCommand {
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
config_file: None,
|
||||
influxdb_enable: false,
|
||||
enable_memory_catalog: false,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: Some("static_user_provider:cmd:test=test".to_string()),
|
||||
};
|
||||
|
||||
let plugins = load_frontend_plugins(&command.user_provider);
|
||||
assert!(plugins.is_ok());
|
||||
let plugins = plugins.unwrap();
|
||||
let provider = plugins.get::<UserProviderRef>();
|
||||
assert!(provider.is_some());
|
||||
let provider = provider.unwrap();
|
||||
let result = provider
|
||||
.auth(Identity::UserId("test", None), Password::PlainText("test"))
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use bitvec::prelude as bv;
|
||||
pub use bitvec::prelude;
|
||||
|
||||
// `Lsb0` provides the best codegen for bit manipulation,
|
||||
// see https://github.com/bitvecto-rs/bitvec/blob/main/doc/order/Lsb0.md
|
||||
pub type BitVec = bv::BitVec<u8, bv::Lsb0>;
|
||||
pub type BitVec = prelude::BitVec<u8>;
|
||||
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod bitset;
|
||||
pub mod bit_vec;
|
||||
pub mod buffer;
|
||||
pub mod bytes;
|
||||
|
||||
pub use bitset::BitVec;
|
||||
pub use bit_vec::BitVec;
|
||||
|
||||
@@ -14,7 +14,6 @@ regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
|
||||
[dev-dependencies]
|
||||
chrono = "0.4"
|
||||
|
||||
@@ -25,9 +25,3 @@ pub const MIN_USER_TABLE_ID: u32 = 1024;
|
||||
pub const SYSTEM_CATALOG_TABLE_ID: u32 = 0;
|
||||
/// scripts table id
|
||||
pub const SCRIPTS_TABLE_ID: u32 = 1;
|
||||
|
||||
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
|
||||
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
|
||||
pub(crate) const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
|
||||
pub(crate) const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
|
||||
pub const TABLE_ID_KEY_PREFIX: &str = "__tid";
|
||||
|
||||
@@ -14,10 +14,3 @@
|
||||
|
||||
pub mod consts;
|
||||
pub mod error;
|
||||
mod helper;
|
||||
|
||||
pub use helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
|
||||
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
|
||||
TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
|
||||
@@ -62,6 +62,19 @@ pub enum StatusCode {
|
||||
/// Runtime resources exhausted, like creating threads failed.
|
||||
RuntimeResourcesExhausted = 6000,
|
||||
// ====== End of server related status code =======
|
||||
|
||||
// ====== Begin of auth related status code =====
|
||||
/// User not exist
|
||||
UserNotFound = 7000,
|
||||
/// Unsupported password type
|
||||
UnsupportedPasswordType = 7001,
|
||||
/// Username and password does not match
|
||||
UserPasswordMismatch = 7002,
|
||||
/// Not found http authorization header
|
||||
AuthHeaderNotFound = 7003,
|
||||
/// Invalid http authorization header
|
||||
InvalidAuthHeader = 7004,
|
||||
// ====== End of auth related status code =====
|
||||
}
|
||||
|
||||
impl StatusCode {
|
||||
|
||||
@@ -9,9 +9,9 @@ arc-swap = "1.0"
|
||||
chrono-tz = "0.6"
|
||||
common-error = { path = "../error" }
|
||||
common-function-macro = { path = "../function-macro" }
|
||||
common-time = { path = "../time" }
|
||||
common-query = { path = "../query" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
common-time = { path = "../time" }
|
||||
datafusion-common = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
libc = "0.2"
|
||||
num = "0.4"
|
||||
@@ -21,20 +21,6 @@ paste = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
statrs = "0.15"
|
||||
|
||||
[dependencies.arrow]
|
||||
features = [
|
||||
"io_csv",
|
||||
"io_json",
|
||||
"io_parquet",
|
||||
"io_parquet_compression",
|
||||
"io_ipc",
|
||||
"ahash",
|
||||
"compute",
|
||||
"serde_types",
|
||||
]
|
||||
package = "arrow2"
|
||||
version = "0.10"
|
||||
|
||||
[dev-dependencies]
|
||||
ron = "0.7"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
||||
@@ -12,5 +12,4 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod error;
|
||||
pub mod scalars;
|
||||
|
||||
@@ -23,6 +23,5 @@ pub(crate) mod test;
|
||||
mod timestamp;
|
||||
pub mod udf;
|
||||
|
||||
pub use aggregate::MedianAccumulatorCreator;
|
||||
pub use function::{Function, FunctionRef};
|
||||
pub use function_registry::{FunctionRegistry, FUNCTION_REGISTRY};
|
||||
|
||||
@@ -16,7 +16,6 @@ mod argmax;
|
||||
mod argmin;
|
||||
mod diff;
|
||||
mod mean;
|
||||
mod median;
|
||||
mod percentile;
|
||||
mod polyval;
|
||||
mod scipy_stats_norm_cdf;
|
||||
@@ -29,7 +28,6 @@ pub use argmin::ArgminAccumulatorCreator;
|
||||
use common_query::logical_plan::AggregateFunctionCreatorRef;
|
||||
pub use diff::DiffAccumulatorCreator;
|
||||
pub use mean::MeanAccumulatorCreator;
|
||||
pub use median::MedianAccumulatorCreator;
|
||||
pub use percentile::PercentileAccumulatorCreator;
|
||||
pub use polyval::PolyvalAccumulatorCreator;
|
||||
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
|
||||
@@ -88,7 +86,6 @@ impl AggregateFunctions {
|
||||
};
|
||||
}
|
||||
|
||||
register_aggr_func!("median", 1, MedianAccumulatorCreator);
|
||||
register_aggr_func!("diff", 1, DiffAccumulatorCreator);
|
||||
register_aggr_func!("mean", 1, MeanAccumulatorCreator);
|
||||
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
|
||||
@@ -20,24 +20,22 @@ use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Resul
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::ConstantVector;
|
||||
use datatypes::types::{LogicalPrimitiveType, WrapperType};
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::ensure;
|
||||
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
|
||||
// return the index of the max value
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Argmax<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
{
|
||||
pub struct Argmax<T> {
|
||||
max: Option<T>,
|
||||
n: u64,
|
||||
}
|
||||
|
||||
impl<T> Argmax<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
T: PartialOrd + Copy,
|
||||
{
|
||||
fn update(&mut self, value: T, index: u64) {
|
||||
if let Some(Ordering::Less) = self.max.partial_cmp(&Some(value)) {
|
||||
@@ -49,8 +47,7 @@ where
|
||||
|
||||
impl<T> Accumulator for Argmax<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType + PartialOrd,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
match self.max {
|
||||
@@ -66,10 +63,10 @@ where
|
||||
|
||||
let column = &values[0];
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
for (i, v) in column.iter_data().enumerate() {
|
||||
if let Some(value) = v {
|
||||
@@ -93,8 +90,8 @@ where
|
||||
|
||||
let max = &states[0];
|
||||
let index = &states[1];
|
||||
let max: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(max) };
|
||||
let index: &<u64 as Scalar>::VectorType = unsafe { VectorHelper::static_cast(index) };
|
||||
let max: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(max) };
|
||||
let index: &<u64 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
|
||||
index
|
||||
.iter_data()
|
||||
.flatten()
|
||||
@@ -122,7 +119,7 @@ impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Argmax::<$S>::default()))
|
||||
Ok(Box::new(Argmax::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -154,7 +151,7 @@ impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -166,21 +163,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(argmax.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(argmax.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, argmax.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(3),
|
||||
@@ -190,7 +185,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(4),
|
||||
@@ -201,7 +196,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(argmax.update_batch(&v).is_ok());
|
||||
|
||||
@@ -20,23 +20,20 @@ use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Resul
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::ConstantVector;
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::ensure;
|
||||
|
||||
// // https://numpy.org/doc/stable/reference/generated/numpy.argmin.html
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Argmin<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
{
|
||||
pub struct Argmin<T> {
|
||||
min: Option<T>,
|
||||
n: u32,
|
||||
}
|
||||
|
||||
impl<T> Argmin<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
T: Copy + PartialOrd,
|
||||
{
|
||||
fn update(&mut self, value: T, index: u32) {
|
||||
match self.min {
|
||||
@@ -56,8 +53,7 @@ where
|
||||
|
||||
impl<T> Accumulator for Argmin<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType + PartialOrd,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
match self.min {
|
||||
@@ -75,10 +71,10 @@ where
|
||||
|
||||
let column = &values[0];
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
for (i, v) in column.iter_data().enumerate() {
|
||||
if let Some(value) = v {
|
||||
@@ -102,8 +98,8 @@ where
|
||||
|
||||
let min = &states[0];
|
||||
let index = &states[1];
|
||||
let min: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(min) };
|
||||
let index: &<u32 as Scalar>::VectorType = unsafe { VectorHelper::static_cast(index) };
|
||||
let min: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(min) };
|
||||
let index: &<u32 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
|
||||
index
|
||||
.iter_data()
|
||||
.flatten()
|
||||
@@ -131,7 +127,7 @@ impl AggregateFunctionCreator for ArgminAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Argmin::<$S>::default()))
|
||||
Ok(Box::new(Argmin::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -163,7 +159,7 @@ impl AggregateFunctionCreator for ArgminAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -175,21 +171,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(argmin.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(argmin.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, argmin.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(3),
|
||||
@@ -199,7 +193,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(4),
|
||||
@@ -210,7 +204,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(argmin.update_batch(&v).is_ok());
|
||||
|
||||
@@ -22,40 +22,32 @@ use common_query::error::{
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::PrimitiveType;
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.diff.html
|
||||
// I is the input type, O is the output type.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Diff<T, SubT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<SubT>,
|
||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
||||
{
|
||||
values: Vec<T>,
|
||||
_phantom: PhantomData<SubT>,
|
||||
pub struct Diff<I, O> {
|
||||
values: Vec<I>,
|
||||
_phantom: PhantomData<O>,
|
||||
}
|
||||
|
||||
impl<T, SubT> Diff<T, SubT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<SubT>,
|
||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
impl<I, O> Diff<I, O> {
|
||||
fn push(&mut self, value: I) {
|
||||
self.values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, SubT> Accumulator for Diff<T, SubT>
|
||||
impl<I, O> Accumulator for Diff<I, O>
|
||||
where
|
||||
T: Primitive + AsPrimitive<SubT>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
||||
for<'a> SubT: Scalar<RefType<'a> = SubT>,
|
||||
I: WrapperType,
|
||||
O: WrapperType,
|
||||
I::Native: AsPrimitive<O::Native>,
|
||||
O::Native: std::ops::Sub<Output = O::Native>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -65,7 +57,7 @@ where
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
I::LogicalType::build_data_type(),
|
||||
))])
|
||||
}
|
||||
|
||||
@@ -78,12 +70,12 @@ where
|
||||
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &<I as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
@@ -109,8 +101,9 @@ where
|
||||
),
|
||||
})?;
|
||||
for state in states.values_iter() {
|
||||
let state = state.context(FromScalarValueSnafu)?;
|
||||
self.update_batch(&[state])?
|
||||
if let Some(state) = state.context(FromScalarValueSnafu)? {
|
||||
self.update_batch(&[state])?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -122,11 +115,14 @@ where
|
||||
let diff = self
|
||||
.values
|
||||
.windows(2)
|
||||
.map(|x| (x[1].as_() - x[0].as_()).into())
|
||||
.map(|x| {
|
||||
let native = x[1].into_native().as_() - x[0].into_native().as_();
|
||||
O::from_native(native).into()
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
let diff = Value::List(ListValue::new(
|
||||
Some(Box::new(diff)),
|
||||
SubT::default().into().data_type(),
|
||||
O::LogicalType::build_data_type(),
|
||||
));
|
||||
Ok(diff)
|
||||
}
|
||||
@@ -143,7 +139,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Diff::<$S,<$S as Primitive>::LargestType>::default()))
|
||||
Ok(Box::new(Diff::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -163,7 +159,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_types[0].logical_type_id(),
|
||||
|$S| {
|
||||
Ok(ConcreteDataType::list_datatype(PrimitiveType::<<$S as Primitive>::LargestType>::default().into()))
|
||||
Ok(ConcreteDataType::list_datatype($S::default().into()))
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
@@ -177,7 +173,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_types[0].logical_type_id(),
|
||||
|$S| {
|
||||
Ok(vec![ConcreteDataType::list_datatype(PrimitiveType::<$S>::default().into())])
|
||||
Ok(vec![ConcreteDataType::list_datatype($S::default().into())])
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
@@ -188,9 +184,10 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
@@ -201,21 +198,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(diff.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(diff.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
@@ -232,7 +227,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
@@ -251,7 +246,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
4,
|
||||
))];
|
||||
let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
|
||||
|
||||
@@ -22,16 +22,14 @@ use common_query::error::{
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, UInt64Vector};
|
||||
use datatypes::types::WrapperType;
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, UInt64Vector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Mean<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64>,
|
||||
{
|
||||
pub struct Mean<T> {
|
||||
sum: f64,
|
||||
n: u64,
|
||||
_phantom: PhantomData<T>,
|
||||
@@ -39,11 +37,12 @@ where
|
||||
|
||||
impl<T> Mean<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
#[inline(always)]
|
||||
fn push(&mut self, value: T) {
|
||||
self.sum += value.as_();
|
||||
self.sum += value.into_native().as_();
|
||||
self.n += 1;
|
||||
}
|
||||
|
||||
@@ -56,8 +55,8 @@ where
|
||||
|
||||
impl<T> Accumulator for Mean<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
Ok(vec![self.sum.into(), self.n.into()])
|
||||
@@ -73,10 +72,10 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
@@ -150,7 +149,7 @@ impl AggregateFunctionCreator for MeanAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Mean::<$S>::default()))
|
||||
Ok(Box::new(Mean::<<$S as LogicalPrimitiveType>::Native>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -182,7 +181,7 @@ impl AggregateFunctionCreator for MeanAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -194,21 +193,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(mean.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(42.0_f64), mean.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(mean.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, mean.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
@@ -218,7 +215,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
@@ -230,7 +227,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(mean.update_batch(&v).is_ok());
|
||||
|
||||
@@ -1,289 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_function_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, Result,
|
||||
};
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::OrdPrimitive;
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num::NumCast;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
// This median calculation algorithm's details can be found at
|
||||
// https://leetcode.cn/problems/find-median-from-data-stream/
|
||||
//
|
||||
// Basically, it uses two heaps, a maximum heap and a minimum. The maximum heap stores numbers that
|
||||
// are not greater than the median, and the minimum heap stores the greater. In a streaming of
|
||||
// numbers, when a number is arrived, we adjust the heaps' tops, so that either one top is the
|
||||
// median or both tops can be averaged to get the median.
|
||||
//
|
||||
// The time complexity to update the median is O(logn), O(1) to get the median; and the space
|
||||
// complexity is O(n). (Ignore the costs for heap expansion.)
|
||||
//
|
||||
// From the point of algorithm, [quick select](https://en.wikipedia.org/wiki/Quickselect) might be
|
||||
// better. But to use quick select here, we need a mutable self in the final calculation(`evaluate`)
|
||||
// to swap stored numbers in the states vector. Though we can make our `evaluate` received
|
||||
// `&mut self`, DataFusion calls our accumulator with `&self` (see `DfAccumulatorAdaptor`). That
|
||||
// means we have to introduce some kinds of interior mutability, and the overhead is not neglectable.
|
||||
//
|
||||
// TODO(LFC): Use quick select to get median when we can modify DataFusion's code, and benchmark with two-heap algorithm.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Median<T>
|
||||
where
|
||||
T: Primitive,
|
||||
{
|
||||
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
|
||||
not_greater: BinaryHeap<OrdPrimitive<T>>,
|
||||
}
|
||||
|
||||
impl<T> Median<T>
|
||||
where
|
||||
T: Primitive,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
let value = OrdPrimitive::<T>(value);
|
||||
|
||||
if self.not_greater.is_empty() {
|
||||
self.not_greater.push(value);
|
||||
return;
|
||||
}
|
||||
// The `unwrap`s below are safe because there are `push`s before them.
|
||||
if value <= *self.not_greater.peek().unwrap() {
|
||||
self.not_greater.push(value);
|
||||
if self.not_greater.len() > self.greater.len() + 1 {
|
||||
self.greater.push(Reverse(self.not_greater.pop().unwrap()));
|
||||
}
|
||||
} else {
|
||||
self.greater.push(Reverse(value));
|
||||
if self.greater.len() > self.not_greater.len() {
|
||||
self.not_greater.push(self.greater.pop().unwrap().0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// UDAFs are built using the trait `Accumulator`, that offers DataFusion the necessary functions
|
||||
// to use them.
|
||||
impl<T> Accumulator for Median<T>
|
||||
where
|
||||
T: Primitive,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
// This function serializes our state to `ScalarValue`, which DataFusion uses to pass this
|
||||
// state between execution stages. Note that this can be arbitrary data.
|
||||
//
|
||||
// The `ScalarValue`s returned here will be passed in as argument `states: &[VectorRef]` to
|
||||
// `merge_batch` function.
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
.greater
|
||||
.iter()
|
||||
.map(|x| &x.0)
|
||||
.chain(self.not_greater.iter())
|
||||
.map(|&n| n.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
))])
|
||||
}
|
||||
|
||||
// DataFusion calls this function to update the accumulator's state for a batch of inputs rows.
|
||||
// It is expected this function to update the accumulator's state.
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(values.len() == 1, InvalidInputStateSnafu);
|
||||
|
||||
// This is a unary accumulator, so only one column is provided.
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// DataFusion executes accumulators in partitions. In some execution stage, DataFusion will
|
||||
// merge states from other accumulators (returned by `state()` method).
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// The states here are returned by the `state` method. Since we only returned a vector
|
||||
// with one value in that method, `states[0]` is fine.
|
||||
let states = &states[0];
|
||||
let states = states
|
||||
.as_any()
|
||||
.downcast_ref::<ListVector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect ListVector, got vector type {}",
|
||||
states.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
for state in states.values_iter() {
|
||||
let state = state.context(FromScalarValueSnafu)?;
|
||||
// merging state is simply accumulate stored numbers from others', so just call update
|
||||
self.update_batch(&[state])?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// DataFusion expects this function to return the final value of this aggregator.
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
if self.not_greater.is_empty() {
|
||||
assert!(
|
||||
self.greater.is_empty(),
|
||||
"not expected in two-heap median algorithm, there must be a bug when implementing it"
|
||||
);
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
|
||||
// unwrap is safe because we checked not_greater heap's len above
|
||||
let not_greater = *self.not_greater.peek().unwrap();
|
||||
let median = if self.not_greater.len() > self.greater.len() {
|
||||
not_greater.into()
|
||||
} else {
|
||||
// unwrap is safe because greater heap len >= not_greater heap len, which is > 0
|
||||
let greater = self.greater.peek().unwrap();
|
||||
|
||||
// the following three NumCast's `unwrap`s are safe because T is primitive
|
||||
let not_greater_v: f64 = NumCast::from(not_greater.as_primitive()).unwrap();
|
||||
let greater_v: f64 = NumCast::from(greater.0.as_primitive()).unwrap();
|
||||
let median: T = NumCast::from((not_greater_v + greater_v) / 2.0).unwrap();
|
||||
median.into()
|
||||
};
|
||||
Ok(median)
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct MedianAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for MedianAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Median::<$S>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"MEDIAN\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
|
||||
// unwrap is safe because we have checked input_types len must equals 1
|
||||
Ok(input_types.into_iter().next().unwrap())
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
Ok(vec![ConcreteDataType::list_datatype(self.output_type()?)])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut median = Median::<i32>::default();
|
||||
assert!(median.update_batch(&[]).is_ok());
|
||||
assert!(median.not_greater.is_empty());
|
||||
assert!(median.greater.is_empty());
|
||||
assert_eq!(Value::Null, median.evaluate().unwrap());
|
||||
|
||||
// test update one not-null value
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(42), median.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, median.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(1), median.evaluate().unwrap());
|
||||
|
||||
// test update null-value batch
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(3), median.evaluate().unwrap());
|
||||
|
||||
// test update with constant vector
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(4), median.evaluate().unwrap());
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,7 @@ use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::OrdPrimitive;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num::NumCast;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -44,15 +44,15 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
// This optional method parameter specifies the method to use when the desired quantile lies between two data points i < j.
|
||||
// If g is the fractional part of the index surrounded by i and alpha and beta are correction constants modifying i and j.
|
||||
// i+g = (q-alpha)/(n-alpha-beta+1)
|
||||
// Below, ‘q’ is the quantile value, ‘n’ is the sample size and alpha and beta are constants. The following formula gives an interpolation “i + g” of where the quantile would be in the sorted sample.
|
||||
// With ‘i’ being the floor and ‘g’ the fractional part of the result.
|
||||
// Below, 'q' is the quantile value, 'n' is the sample size and alpha and beta are constants. The following formula gives an interpolation "i + g" of where the quantile would be in the sorted sample.
|
||||
// With 'i' being the floor and 'g' the fractional part of the result.
|
||||
// the default method is linear where
|
||||
// alpha = 1
|
||||
// beta = 1
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Percentile<T>
|
||||
where
|
||||
T: Primitive,
|
||||
T: WrapperType,
|
||||
{
|
||||
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
|
||||
not_greater: BinaryHeap<OrdPrimitive<T>>,
|
||||
@@ -62,7 +62,7 @@ where
|
||||
|
||||
impl<T> Percentile<T>
|
||||
where
|
||||
T: Primitive,
|
||||
T: WrapperType,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
let value = OrdPrimitive::<T>(value);
|
||||
@@ -93,8 +93,7 @@ where
|
||||
|
||||
impl<T> Accumulator for Percentile<T>
|
||||
where
|
||||
T: Primitive,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -107,7 +106,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.p.into(),
|
||||
])
|
||||
@@ -129,14 +128,14 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"POLYVAL\" function's second argument to be float64",
|
||||
})?;
|
||||
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
||||
@@ -209,10 +208,11 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -259,7 +259,7 @@ impl AggregateFunctionCreator for PercentileAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Percentile::<$S>::default()))
|
||||
Ok(Box::new(Percentile::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -292,7 +292,7 @@ impl AggregateFunctionCreator for PercentileAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -307,8 +307,8 @@ mod test {
|
||||
// test update one not-null value
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![Some(100.0_f64)])),
|
||||
Arc::new(Int32Vector::from(vec![Some(42)])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
|
||||
];
|
||||
assert!(percentile.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(42.0_f64), percentile.evaluate().unwrap());
|
||||
@@ -316,8 +316,8 @@ mod test {
|
||||
// test update one null value
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Option::<i32>::None])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![Some(100.0_f64)])),
|
||||
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
|
||||
];
|
||||
assert!(percentile.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, percentile.evaluate().unwrap());
|
||||
@@ -325,12 +325,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
@@ -342,13 +338,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
@@ -362,13 +353,10 @@ mod test {
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
2,
|
||||
)),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64), Some(100.0_f64)])),
|
||||
];
|
||||
assert!(percentile.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
|
||||
@@ -376,12 +364,8 @@ mod test {
|
||||
// test left border
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(0.0_f64),
|
||||
Some(0.0_f64),
|
||||
Some(0.0_f64),
|
||||
@@ -393,12 +377,8 @@ mod test {
|
||||
// test medium
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(50.0_f64),
|
||||
Some(50.0_f64),
|
||||
Some(50.0_f64),
|
||||
@@ -410,12 +390,8 @@ mod test {
|
||||
// test right border
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
@@ -431,12 +407,8 @@ mod test {
|
||||
// >> 6.400000000000
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(10i32),
|
||||
Some(7),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(40.0_f64),
|
||||
Some(40.0_f64),
|
||||
Some(40.0_f64),
|
||||
@@ -451,12 +423,8 @@ mod test {
|
||||
// >> 9.7000000000000011
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(10i32),
|
||||
Some(7),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(95.0_f64),
|
||||
Some(95.0_f64),
|
||||
Some(95.0_f64),
|
||||
|
||||
@@ -23,9 +23,9 @@ use common_query::error::{
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::PrimitiveType;
|
||||
use datatypes::types::{LogicalPrimitiveType, WrapperType};
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, Int64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Helper, Int64Vector, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -34,8 +34,10 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Polyval<T, PolyT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<PolyT>,
|
||||
PolyT: Primitive + std::ops::Mul<Output = PolyT>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
|
||||
{
|
||||
values: Vec<T>,
|
||||
// DataFusion casts constant in into i64 type.
|
||||
@@ -45,8 +47,10 @@ where
|
||||
|
||||
impl<T, PolyT> Polyval<T, PolyT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<PolyT>,
|
||||
PolyT: Primitive + std::ops::Mul<Output = PolyT>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
@@ -55,11 +59,11 @@ where
|
||||
|
||||
impl<T, PolyT> Accumulator for Polyval<T, PolyT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<PolyT>,
|
||||
PolyT: Primitive + std::ops::Mul<Output = PolyT> + std::iter::Sum<PolyT>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
for<'a> PolyT: Scalar<RefType<'a> = PolyT>,
|
||||
i64: AsPrimitive<PolyT>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType + std::iter::Sum<<PolyT as WrapperType>::Native>,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native> + std::iter::Sum<PolyT::Native>,
|
||||
i64: AsPrimitive<<PolyT as WrapperType>::Native>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -70,7 +74,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.x.into(),
|
||||
])
|
||||
@@ -91,10 +95,10 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
@@ -103,7 +107,7 @@ where
|
||||
});
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<i64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<i64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"POLYVAL\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
||||
@@ -172,12 +176,14 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -196,7 +202,7 @@ where
|
||||
.values
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, &value)| value.as_() * (x.pow((len - 1 - i) as u32)).as_())
|
||||
.map(|(i, &value)| value.into_native().as_() * x.pow((len - 1 - i) as u32).as_())
|
||||
.sum();
|
||||
Ok(polyval.into())
|
||||
}
|
||||
@@ -213,7 +219,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Polyval::<$S,<$S as Primitive>::LargestType>::default()))
|
||||
Ok(Box::new(Polyval::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -234,7 +240,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type,
|
||||
|$S| {
|
||||
Ok(PrimitiveType::<<$S as Primitive>::LargestType>::default().into())
|
||||
Ok(<<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::build_data_type())
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
@@ -254,7 +260,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -268,8 +274,8 @@ mod test {
|
||||
// test update one not-null value
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Some(3)])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(2_i64)])),
|
||||
Arc::new(Int32Vector::from(vec![Some(3)])),
|
||||
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
|
||||
];
|
||||
assert!(polyval.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int64(3), polyval.evaluate().unwrap());
|
||||
@@ -277,8 +283,8 @@ mod test {
|
||||
// test update one null value
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Option::<i32>::None])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(2_i64)])),
|
||||
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
|
||||
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
|
||||
];
|
||||
assert!(polyval.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, polyval.evaluate().unwrap());
|
||||
@@ -286,12 +292,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(3),
|
||||
Some(0),
|
||||
Some(1),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(3), Some(0), Some(1)])),
|
||||
Arc::new(Int64Vector::from(vec![
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
@@ -303,13 +305,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(3),
|
||||
Some(0),
|
||||
None,
|
||||
Some(1),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(3), Some(0), None, Some(1)])),
|
||||
Arc::new(Int64Vector::from(vec![
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
@@ -323,10 +320,10 @@ mod test {
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
2,
|
||||
)),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(5_i64), Some(5_i64)])),
|
||||
Arc::new(Int64Vector::from(vec![Some(5_i64), Some(5_i64)])),
|
||||
];
|
||||
assert!(polyval.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int64(24), polyval.evaluate().unwrap());
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -33,18 +33,12 @@ use statrs::statistics::Statistics;
|
||||
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ScipyStatsNormCdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
pub struct ScipyStatsNormCdf<T> {
|
||||
values: Vec<T>,
|
||||
x: Option<f64>,
|
||||
}
|
||||
|
||||
impl<T> ScipyStatsNormCdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
impl<T> ScipyStatsNormCdf<T> {
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
}
|
||||
@@ -52,8 +46,8 @@ where
|
||||
|
||||
impl<T> Accumulator for ScipyStatsNormCdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType + std::iter::Sum<T>,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -64,7 +58,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.x.into(),
|
||||
])
|
||||
@@ -86,14 +80,14 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"SCIPYSTATSNORMCDF\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
let first = x.get(0);
|
||||
@@ -160,19 +154,19 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
let values = self.values.iter().map(|&v| v.as_()).collect::<Vec<_>>();
|
||||
let mean = values.clone().mean();
|
||||
let std_dev = values.std_dev();
|
||||
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
|
||||
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
|
||||
if mean.is_nan() || std_dev.is_nan() {
|
||||
Ok(Value::Null)
|
||||
} else {
|
||||
@@ -198,7 +192,7 @@ impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(ScipyStatsNormCdf::<$S>::default()))
|
||||
Ok(Box::new(ScipyStatsNormCdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -230,7 +224,7 @@ impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -244,12 +238,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
@@ -264,13 +254,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
None,
|
||||
Some(2.0_f64),
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -33,18 +33,12 @@ use statrs::statistics::Statistics;
|
||||
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ScipyStatsNormPdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
pub struct ScipyStatsNormPdf<T> {
|
||||
values: Vec<T>,
|
||||
x: Option<f64>,
|
||||
}
|
||||
|
||||
impl<T> ScipyStatsNormPdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
impl<T> ScipyStatsNormPdf<T> {
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
}
|
||||
@@ -52,8 +46,8 @@ where
|
||||
|
||||
impl<T> Accumulator for ScipyStatsNormPdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -64,7 +58,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.x.into(),
|
||||
])
|
||||
@@ -86,14 +80,14 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"SCIPYSTATSNORMPDF\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
let first = x.get(0);
|
||||
@@ -160,19 +154,20 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
let values = self.values.iter().map(|&v| v.as_()).collect::<Vec<_>>();
|
||||
let mean = values.clone().mean();
|
||||
let std_dev = values.std_dev();
|
||||
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
|
||||
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
|
||||
|
||||
if mean.is_nan() || std_dev.is_nan() {
|
||||
Ok(Value::Null)
|
||||
} else {
|
||||
@@ -198,7 +193,7 @@ impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(ScipyStatsNormPdf::<$S>::default()))
|
||||
Ok(Box::new(ScipyStatsNormPdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -230,7 +225,7 @@ impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -244,12 +239,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
@@ -264,13 +255,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
None,
|
||||
Some(2.0_f64),
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
use std::iter;
|
||||
|
||||
use common_query::error::Result;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::ConstantVector;
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::ctx::EvalContext;
|
||||
|
||||
pub fn scalar_binary_op<L: Scalar, R: Scalar, O: Scalar, F>(
|
||||
@@ -36,10 +36,9 @@ where
|
||||
|
||||
let result = match (l.is_const(), r.is_const()) {
|
||||
(false, true) => {
|
||||
let left: &<L as Scalar>::VectorType = unsafe { VectorHelper::static_cast(l) };
|
||||
let right: &ConstantVector = unsafe { VectorHelper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(right.inner()) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(l) };
|
||||
let right: &ConstantVector = unsafe { Helper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(right.inner()) };
|
||||
let b = right.get_data(0);
|
||||
|
||||
let it = left.iter_data().map(|a| f(a, b, ctx));
|
||||
@@ -47,8 +46,8 @@ where
|
||||
}
|
||||
|
||||
(false, false) => {
|
||||
let left: &<L as Scalar>::VectorType = unsafe { VectorHelper::static_cast(l) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { VectorHelper::static_cast(r) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(l) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(r) };
|
||||
|
||||
let it = left
|
||||
.iter_data()
|
||||
@@ -58,25 +57,22 @@ where
|
||||
}
|
||||
|
||||
(true, false) => {
|
||||
let left: &ConstantVector = unsafe { VectorHelper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(left.inner()) };
|
||||
let left: &ConstantVector = unsafe { Helper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(left.inner()) };
|
||||
let a = left.get_data(0);
|
||||
|
||||
let right: &<R as Scalar>::VectorType = unsafe { VectorHelper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(r) };
|
||||
let it = right.iter_data().map(|b| f(a, b, ctx));
|
||||
<O as Scalar>::VectorType::from_owned_iterator(it)
|
||||
}
|
||||
|
||||
(true, true) => {
|
||||
let left: &ConstantVector = unsafe { VectorHelper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(left.inner()) };
|
||||
let left: &ConstantVector = unsafe { Helper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(left.inner()) };
|
||||
let a = left.get_data(0);
|
||||
|
||||
let right: &ConstantVector = unsafe { VectorHelper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(right.inner()) };
|
||||
let right: &ConstantVector = unsafe { Helper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(right.inner()) };
|
||||
let b = right.get_data(0);
|
||||
|
||||
let it = iter::repeat(a)
|
||||
|
||||
@@ -13,8 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use chrono_tz::Tz;
|
||||
|
||||
use crate::error::Error;
|
||||
use common_query::error::Error;
|
||||
|
||||
pub struct EvalContext {
|
||||
_tz: Tz,
|
||||
|
||||
@@ -12,10 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_query::error::{self, Result};
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::Helper;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{GetScalarVectorSnafu, Result};
|
||||
use crate::scalars::expression::ctx::EvalContext;
|
||||
|
||||
/// TODO: remove the allow_unused when it's used.
|
||||
@@ -28,7 +29,7 @@ pub fn scalar_unary_op<L: Scalar, O: Scalar, F>(
|
||||
where
|
||||
F: Fn(Option<L::RefType<'_>>, &mut EvalContext) -> Option<O>,
|
||||
{
|
||||
let left = VectorHelper::check_get_scalar::<L>(l).context(GetScalarVectorSnafu)?;
|
||||
let left = Helper::check_get_scalar::<L>(l).context(error::GetScalarVectorSnafu)?;
|
||||
let it = left.iter_data().map(|a| f(a, ctx));
|
||||
let result = <O as Scalar>::VectorType::from_owned_iterator(it);
|
||||
|
||||
|
||||
@@ -16,12 +16,11 @@ use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono_tz::Tz;
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::vectors::VectorRef;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FunctionContext {
|
||||
pub tz: Tz,
|
||||
|
||||
@@ -13,10 +13,12 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod pow;
|
||||
mod rate;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use pow::PowFunction;
|
||||
pub use rate::RateFunction;
|
||||
|
||||
use crate::scalars::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -25,5 +27,6 @@ pub(crate) struct MathFunction;
|
||||
impl MathFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(PowFunction::default()));
|
||||
registry.register(Arc::new(RateFunction::default()))
|
||||
}
|
||||
}
|
||||
@@ -15,15 +15,16 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::LogicalPrimitiveType;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num::traits::Pow;
|
||||
use num_traits::AsPrimitive;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
@@ -46,7 +47,7 @@ impl Function for PowFunction {
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
|
||||
let col = scalar_binary_op::<$S, $T, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
|
||||
let col = scalar_binary_op::<<$S as LogicalPrimitiveType>::Native, <$T as LogicalPrimitiveType>::Native, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
|
||||
Ok(Arc::new(col))
|
||||
},{
|
||||
unreachable!()
|
||||
|
||||
106
src/common/function/src/scalars/math/rate.rs
Normal file
106
src/common/function/src/scalars/math/rate.rs
Normal file
@@ -0,0 +1,106 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::arrow::compute::kernels::{arithmetic, cast};
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
/// generates rates from a sequence of adjacent data points.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct RateFunction;
|
||||
|
||||
impl fmt::Display for RateFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "RATE")
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for RateFunction {
|
||||
fn name(&self) -> &str {
|
||||
"prom_rate"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::uniform(2, ConcreteDataType::numerics(), Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
let val = &columns[0].to_arrow_array();
|
||||
let val_0 = val.slice(0, val.len() - 1);
|
||||
let val_1 = val.slice(1, val.len() - 1);
|
||||
let dv = arithmetic::subtract_dyn(&val_1, &val_0).context(error::ArrowComputeSnafu)?;
|
||||
let ts = &columns[1].to_arrow_array();
|
||||
let ts_0 = ts.slice(0, ts.len() - 1);
|
||||
let ts_1 = ts.slice(1, ts.len() - 1);
|
||||
let dt = arithmetic::subtract_dyn(&ts_1, &ts_0).context(error::ArrowComputeSnafu)?;
|
||||
|
||||
let dv = cast::cast(&dv, &DataType::Float64).context(error::TypeCastSnafu {
|
||||
typ: DataType::Float64,
|
||||
})?;
|
||||
let dt = cast::cast(&dt, &DataType::Float64).context(error::TypeCastSnafu {
|
||||
typ: DataType::Float64,
|
||||
})?;
|
||||
let rate = arithmetic::divide_dyn(&dv, &dt).context(error::ArrowComputeSnafu)?;
|
||||
let v = Helper::try_into_vector(&rate).context(error::FromArrowArraySnafu)?;
|
||||
|
||||
Ok(v)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::vectors::{Float32Vector, Float64Vector, Int64Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_rate_function() {
|
||||
let rate = RateFunction::default();
|
||||
assert_eq!("prom_rate", rate.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::float64_datatype(),
|
||||
rate.return_type(&[]).unwrap()
|
||||
);
|
||||
assert!(matches!(rate.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Uniform(2, valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == ConcreteDataType::numerics()
|
||||
));
|
||||
let values = vec![1.0, 3.0, 6.0];
|
||||
let ts = vec![0, 1, 2];
|
||||
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(Float32Vector::from_vec(values)),
|
||||
Arc::new(Int64Vector::from_vec(ts)),
|
||||
];
|
||||
let vector = rate.eval(FunctionContext::default(), &args).unwrap();
|
||||
let expect: VectorRef = Arc::new(Float64Vector::from_vec(vec![2.0, 3.0]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod clip;
|
||||
#[allow(unused)]
|
||||
mod interp;
|
||||
|
||||
use std::sync::Arc;
|
||||
@@ -15,14 +15,15 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::{Scalar, VectorRef};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use datatypes::arrow::compute;
|
||||
use datatypes::arrow::datatypes::ArrowPrimitiveType;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use paste::paste;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
@@ -34,25 +35,32 @@ macro_rules! define_eval {
|
||||
($O: ident) => {
|
||||
paste! {
|
||||
fn [<eval_ $O>](columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
|
||||
with_match_primitive_type_id!(columns[2].data_type().logical_type_id(), |$R| {
|
||||
// clip(a, min, max) is equals to min(max(a, min), max)
|
||||
let col: VectorRef = Arc::new(scalar_binary_op::<$S, $T, $O, _>(&columns[0], &columns[1], scalar_max, &mut EvalContext::default())?);
|
||||
let col = scalar_binary_op::<$O, $R, $O, _>(&col, &columns[2], scalar_min, &mut EvalContext::default())?;
|
||||
Ok(Arc::new(col))
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
fn cast_vector(input: &VectorRef) -> VectorRef {
|
||||
Arc::new(PrimitiveVector::<<$O as WrapperType>::LogicalType>::try_from_arrow_array(
|
||||
compute::cast(&input.to_arrow_array(), &<<<$O as WrapperType>::LogicalType as LogicalPrimitiveType>::ArrowPrimitive as ArrowPrimitiveType>::DATA_TYPE).unwrap()
|
||||
).unwrap()) as _
|
||||
}
|
||||
let operator_1 = cast_vector(&columns[0]);
|
||||
let operator_2 = cast_vector(&columns[1]);
|
||||
let operator_3 = cast_vector(&columns[2]);
|
||||
|
||||
// clip(a, min, max) is equals to min(max(a, min), max)
|
||||
let col: VectorRef = Arc::new(scalar_binary_op::<$O, $O, $O, _>(
|
||||
&operator_1,
|
||||
&operator_2,
|
||||
scalar_max,
|
||||
&mut EvalContext::default(),
|
||||
)?);
|
||||
let col = scalar_binary_op::<$O, $O, $O, _>(
|
||||
&col,
|
||||
&operator_3,
|
||||
scalar_min,
|
||||
&mut EvalContext::default(),
|
||||
)?;
|
||||
Ok(Arc::new(col))
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
define_eval!(i64);
|
||||
@@ -108,27 +116,23 @@ pub fn max<T: PartialOrd>(input: T, max: T) -> T {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn scalar_min<S, T, O>(left: Option<S>, right: Option<T>, _ctx: &mut EvalContext) -> Option<O>
|
||||
fn scalar_min<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
|
||||
where
|
||||
S: AsPrimitive<O>,
|
||||
T: AsPrimitive<O>,
|
||||
O: Scalar + Copy + PartialOrd,
|
||||
{
|
||||
match (left, right) {
|
||||
(Some(left), Some(right)) => Some(min(left.as_(), right.as_())),
|
||||
(Some(left), Some(right)) => Some(min(left, right)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn scalar_max<S, T, O>(left: Option<S>, right: Option<T>, _ctx: &mut EvalContext) -> Option<O>
|
||||
fn scalar_max<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
|
||||
where
|
||||
S: AsPrimitive<O>,
|
||||
T: AsPrimitive<O>,
|
||||
O: Scalar + Copy + PartialOrd,
|
||||
{
|
||||
match (left, right) {
|
||||
(Some(left), Some(right)) => Some(max(left.as_(), right.as_())),
|
||||
(Some(left), Some(right)) => Some(max(left, right)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -143,11 +147,15 @@ impl fmt::Display for ClipFunction {
|
||||
mod tests {
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{ConstantVector, Float32Vector, Int32Vector, UInt32Vector};
|
||||
use datatypes::vectors::{
|
||||
ConstantVector, Float32Vector, Int16Vector, Int32Vector, Int8Vector, UInt16Vector,
|
||||
UInt32Vector, UInt8Vector,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_clip_function() {
|
||||
fn test_clip_signature() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
assert_eq!("clip", clip.name());
|
||||
@@ -190,16 +198,21 @@ mod tests {
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == ConcreteDataType::numerics()
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clip_fn_signed() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
// eval with signed integers
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from_values(0..10)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![3])),
|
||||
Arc::new(Int8Vector::from_vec(vec![3])),
|
||||
10,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![6])),
|
||||
Arc::new(Int16Vector::from_vec(vec![6])),
|
||||
10,
|
||||
)),
|
||||
];
|
||||
@@ -217,16 +230,21 @@ mod tests {
|
||||
assert!(matches!(vector.get(i), Value::Int64(v) if v == 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clip_fn_unsigned() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
// eval with unsigned integers
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(UInt32Vector::from_values(0..10)),
|
||||
Arc::new(UInt8Vector::from_values(0..10)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(UInt32Vector::from_vec(vec![3])),
|
||||
10,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(UInt32Vector::from_vec(vec![6])),
|
||||
Arc::new(UInt16Vector::from_vec(vec![6])),
|
||||
10,
|
||||
)),
|
||||
];
|
||||
@@ -244,12 +262,17 @@ mod tests {
|
||||
assert!(matches!(vector.get(i), Value::UInt64(v) if v == 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clip_fn_float() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
// eval with floats
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from_values(0..10)),
|
||||
Arc::new(Int8Vector::from_values(0..10)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![3])),
|
||||
Arc::new(UInt32Vector::from_vec(vec![3])),
|
||||
10,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
|
||||
@@ -14,41 +14,18 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::PrimitiveArray;
|
||||
use arrow::compute::cast::primitive_to_primitive;
|
||||
use arrow::datatypes::DataType::Float64;
|
||||
use common_query::error::{self, Result};
|
||||
use datatypes::arrow::compute::cast;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::type_id::LogicalTypeId;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{Float64Vector, PrimitiveVector, Vector, VectorRef};
|
||||
use datatypes::{arrow, with_match_primitive_type_id};
|
||||
use snafu::{ensure, Snafu};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display(
|
||||
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||
expect,
|
||||
actual,
|
||||
))]
|
||||
ArgsLenNotEnough { expect: usize, actual: usize },
|
||||
|
||||
#[snafu(display("The sample {} is empty", name))]
|
||||
SampleEmpty { name: String },
|
||||
|
||||
#[snafu(display(
|
||||
"The length of the len1: {} don't match the length of the len2: {}",
|
||||
len1,
|
||||
len2,
|
||||
))]
|
||||
LenNotEquals { len1: usize, len2: usize },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
use datatypes::vectors::{Float64Vector, Vector, VectorRef};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
/* search the biggest number that smaller than x in xp */
|
||||
fn linear_search_ascending_vector(x: Value, xp: &PrimitiveVector<f64>) -> usize {
|
||||
fn linear_search_ascending_vector(x: Value, xp: &Float64Vector) -> usize {
|
||||
for i in 0..xp.len() {
|
||||
if x < xp.get(i) {
|
||||
return i - 1;
|
||||
@@ -58,7 +35,7 @@ fn linear_search_ascending_vector(x: Value, xp: &PrimitiveVector<f64>) -> usize
|
||||
}
|
||||
|
||||
/* search the biggest number that smaller than x in xp */
|
||||
fn binary_search_ascending_vector(key: Value, xp: &PrimitiveVector<f64>) -> usize {
|
||||
fn binary_search_ascending_vector(key: Value, xp: &Float64Vector) -> usize {
|
||||
let mut left = 0;
|
||||
let mut right = xp.len();
|
||||
/* If len <= 4 use linear search. */
|
||||
@@ -77,27 +54,33 @@ fn binary_search_ascending_vector(key: Value, xp: &PrimitiveVector<f64>) -> usiz
|
||||
left - 1
|
||||
}
|
||||
|
||||
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<PrimitiveVector<f64>> {
|
||||
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<Float64Vector> {
|
||||
with_match_primitive_type_id!(arg.data_type().logical_type_id(), |$S| {
|
||||
let tmp = arg.to_arrow_array();
|
||||
let from = tmp.as_any().downcast_ref::<PrimitiveArray<$S>>().expect("cast failed");
|
||||
let array = primitive_to_primitive(from, &Float64);
|
||||
Ok(PrimitiveVector::new(array))
|
||||
let array = cast(&tmp, &ArrowDataType::Float64).context(error::TypeCastSnafu {
|
||||
typ: ArrowDataType::Float64,
|
||||
})?;
|
||||
// Safety: array has been cast to Float64Array.
|
||||
Ok(Float64Vector::try_from_arrow_array(array).unwrap())
|
||||
},{
|
||||
unreachable!()
|
||||
})
|
||||
}
|
||||
|
||||
/// https://github.com/numpy/numpy/blob/b101756ac02e390d605b2febcded30a1da50cc2c/numpy/core/src/multiarray/compiled_base.c#L491
|
||||
#[allow(unused)]
|
||||
pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
let mut left = None;
|
||||
let mut right = None;
|
||||
|
||||
ensure!(
|
||||
args.len() >= 3,
|
||||
ArgsLenNotEnoughSnafu {
|
||||
expect: 3_usize,
|
||||
actual: args.len()
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||
3,
|
||||
args.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
@@ -109,9 +92,12 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
if args.len() > 3 {
|
||||
ensure!(
|
||||
args.len() == 5,
|
||||
ArgsLenNotEnoughSnafu {
|
||||
expect: 5_usize,
|
||||
actual: args.len()
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||
5,
|
||||
args.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
@@ -123,14 +109,32 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
.get_data(0);
|
||||
}
|
||||
|
||||
ensure!(x.len() != 0, SampleEmptySnafu { name: "x" });
|
||||
ensure!(xp.len() != 0, SampleEmptySnafu { name: "xp" });
|
||||
ensure!(fp.len() != 0, SampleEmptySnafu { name: "fp" });
|
||||
ensure!(
|
||||
x.len() != 0,
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The sample x is empty",
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
xp.len() != 0,
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The sample xp is empty",
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
fp.len() != 0,
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The sample fp is empty",
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
xp.len() == fp.len(),
|
||||
LenNotEqualsSnafu {
|
||||
len1: xp.len(),
|
||||
len2: fp.len(),
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the len1: {} don't match the length of the len2: {}",
|
||||
xp.len(),
|
||||
fp.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
@@ -147,7 +151,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
|
||||
let res;
|
||||
if xp.len() == 1 {
|
||||
res = x
|
||||
let datas = x
|
||||
.iter_data()
|
||||
.map(|x| {
|
||||
if Value::from(x) < xp.get(0) {
|
||||
@@ -158,7 +162,8 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
fp.get_data(0)
|
||||
}
|
||||
})
|
||||
.collect::<Float64Vector>();
|
||||
.collect::<Vec<_>>();
|
||||
res = Float64Vector::from(datas);
|
||||
} else {
|
||||
let mut j = 0;
|
||||
/* only pre-calculate slopes if there are relatively few of them. */
|
||||
@@ -185,7 +190,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
}
|
||||
slopes = Some(slopes_tmp);
|
||||
}
|
||||
res = x
|
||||
let datas = x
|
||||
.iter_data()
|
||||
.map(|x| match x {
|
||||
Some(xi) => {
|
||||
@@ -248,7 +253,8 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Float64Vector>();
|
||||
.collect::<Vec<_>>();
|
||||
res = Float64Vector::from(datas);
|
||||
}
|
||||
Ok(Arc::new(res) as _)
|
||||
}
|
||||
@@ -257,8 +263,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::prelude::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{Int32Vector, Int64Vector, PrimitiveVectorBuilder};
|
||||
use datatypes::vectors::{Int32Vector, Int64Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -341,12 +346,8 @@ mod tests {
|
||||
assert!(matches!(vector.get(0), Value::Float64(v) if v==x[0] as f64));
|
||||
|
||||
// x=None output:Null
|
||||
let input = [None, Some(0.0), Some(0.3)];
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let x = builder.finish();
|
||||
let input = vec![None, Some(0.0), Some(0.3)];
|
||||
let x = Float64Vector::from(input);
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(x),
|
||||
Arc::new(Int64Vector::from_vec(xp)),
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
|
||||
@@ -17,16 +17,17 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::compute::arithmetics;
|
||||
use arrow::datatypes::DataType as ArrowDatatype;
|
||||
use arrow::scalar::PrimitiveScalar;
|
||||
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::error::{
|
||||
ArrowComputeSnafu, IntoVectorSnafu, Result, TypeCastSnafu, UnsupportedInputDataTypeSnafu,
|
||||
};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::arrow::compute;
|
||||
use datatypes::arrow::datatypes::{DataType as ArrowDatatype, Int64Type};
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::{TimestampVector, VectorRef};
|
||||
use datatypes::vectors::{TimestampMillisecondVector, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
@@ -40,7 +41,7 @@ impl Function for FromUnixtimeFunction {
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::timestamp_millis_datatype())
|
||||
Ok(ConcreteDataType::timestamp_millisecond_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@@ -56,14 +57,18 @@ impl Function for FromUnixtimeFunction {
|
||||
ConcreteDataType::Int64(_) => {
|
||||
let array = columns[0].to_arrow_array();
|
||||
// Our timestamp vector's time unit is millisecond
|
||||
let array = arithmetics::mul_scalar(
|
||||
&*array,
|
||||
&PrimitiveScalar::new(ArrowDatatype::Int64, Some(1000i64)),
|
||||
);
|
||||
let array = compute::multiply_scalar_dyn::<Int64Type>(&array, 1000i64)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
|
||||
let arrow_datatype = &self.return_type(&[]).unwrap().as_arrow_type();
|
||||
Ok(Arc::new(
|
||||
TimestampVector::try_from_arrow_array(array).context(IntoVectorSnafu {
|
||||
data_type: ArrowDatatype::Int64,
|
||||
TimestampMillisecondVector::try_from_arrow_array(
|
||||
compute::cast(&array, arrow_datatype).context(TypeCastSnafu {
|
||||
typ: ArrowDatatype::Int64,
|
||||
})?,
|
||||
)
|
||||
.context(IntoVectorSnafu {
|
||||
data_type: arrow_datatype.clone(),
|
||||
})?,
|
||||
))
|
||||
}
|
||||
@@ -71,8 +76,7 @@ impl Function for FromUnixtimeFunction {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail()
|
||||
.map_err(|e| e.into()),
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -96,7 +100,7 @@ mod tests {
|
||||
let f = FromUnixtimeFunction::default();
|
||||
assert_eq!("from_unixtime", f.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
f.return_type(&[]).unwrap()
|
||||
);
|
||||
|
||||
|
||||
@@ -19,7 +19,8 @@ use common_query::prelude::{
|
||||
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf, ScalarValue,
|
||||
};
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::{ConcreteDataType, VectorHelper};
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::Helper;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::scalars::function::{FunctionContext, FunctionRef};
|
||||
@@ -47,7 +48,7 @@ pub fn create_udf(func: FunctionRef) -> ScalarUdf {
|
||||
let args: Result<Vec<_>, DataTypeError> = args
|
||||
.iter()
|
||||
.map(|arg| match arg {
|
||||
ColumnarValue::Scalar(v) => VectorHelper::try_from_scalar_value(v.clone(), rows),
|
||||
ColumnarValue::Scalar(v) => Helper::try_from_scalar_value(v.clone(), rows),
|
||||
ColumnarValue::Vector(v) => Ok(v.clone()),
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "common-insert"
|
||||
name = "common-grpc-expr"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
@@ -8,10 +8,12 @@ license = "Apache-2.0"
|
||||
api = { path = "../../api" }
|
||||
async-trait = "0.1"
|
||||
common-base = { path = "../base" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
common-grpc = { path = "../grpc" }
|
||||
common-query = { path = "../query" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
common-time = { path = "../time" }
|
||||
common-query = { path = "../query" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
234
src/common/grpc-expr/src/alter.rs
Normal file
234
src/common/grpc-expr/src/alter.rs
Normal file
@@ -0,0 +1,234 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::alter_expr::Kind;
|
||||
use api::v1::{AlterExpr, CreateExpr, DropColumns};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, CreateTableRequest};
|
||||
|
||||
use crate::error::{
|
||||
ColumnNotFoundSnafu, CreateSchemaSnafu, InvalidColumnDefSnafu, MissingFieldSnafu,
|
||||
MissingTimestampColumnSnafu, Result,
|
||||
};
|
||||
|
||||
/// Convert an [`AlterExpr`] to an optional [`AlterTableRequest`]
|
||||
pub fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest>> {
|
||||
match expr.kind {
|
||||
Some(Kind::AddColumns(add_columns)) => {
|
||||
let add_column_requests = add_columns
|
||||
.add_columns
|
||||
.into_iter()
|
||||
.map(|ac| {
|
||||
let column_def = ac.column_def.context(MissingFieldSnafu {
|
||||
field: "column_def",
|
||||
})?;
|
||||
|
||||
let schema =
|
||||
column_def
|
||||
.try_as_column_schema()
|
||||
.context(InvalidColumnDefSnafu {
|
||||
column: &column_def.name,
|
||||
})?;
|
||||
Ok(AddColumnRequest {
|
||||
column_schema: schema,
|
||||
is_key: ac.is_key,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
let alter_kind = AlterKind::AddColumns {
|
||||
columns: add_column_requests,
|
||||
};
|
||||
|
||||
let request = AlterTableRequest {
|
||||
catalog_name: expr.catalog_name,
|
||||
schema_name: expr.schema_name,
|
||||
table_name: expr.table_name,
|
||||
alter_kind,
|
||||
};
|
||||
Ok(Some(request))
|
||||
}
|
||||
Some(Kind::DropColumns(DropColumns { drop_columns })) => {
|
||||
let alter_kind = AlterKind::DropColumns {
|
||||
names: drop_columns.into_iter().map(|c| c.name).collect(),
|
||||
};
|
||||
|
||||
let request = AlterTableRequest {
|
||||
catalog_name: expr.catalog_name,
|
||||
schema_name: expr.schema_name,
|
||||
table_name: expr.table_name,
|
||||
alter_kind,
|
||||
};
|
||||
Ok(Some(request))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
|
||||
let column_schemas = expr
|
||||
.column_defs
|
||||
.iter()
|
||||
.map(|x| {
|
||||
x.try_as_column_schema()
|
||||
.context(InvalidColumnDefSnafu { column: &x.name })
|
||||
})
|
||||
.collect::<Result<Vec<ColumnSchema>>>()?;
|
||||
|
||||
ensure!(
|
||||
column_schemas
|
||||
.iter()
|
||||
.any(|column| column.name == expr.time_index),
|
||||
MissingTimestampColumnSnafu {
|
||||
msg: format!("CreateExpr: {:?}", expr)
|
||||
}
|
||||
);
|
||||
|
||||
let column_schemas = column_schemas
|
||||
.into_iter()
|
||||
.map(|column_schema| {
|
||||
if column_schema.name == expr.time_index {
|
||||
column_schema.with_time_index(true)
|
||||
} else {
|
||||
column_schema
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(Arc::new(
|
||||
SchemaBuilder::try_from(column_schemas)
|
||||
.context(CreateSchemaSnafu)?
|
||||
.build()
|
||||
.context(CreateSchemaSnafu)?,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn create_expr_to_request(table_id: TableId, expr: CreateExpr) -> Result<CreateTableRequest> {
|
||||
let schema = create_table_schema(&expr)?;
|
||||
let primary_key_indices = expr
|
||||
.primary_keys
|
||||
.iter()
|
||||
.map(|key| {
|
||||
schema
|
||||
.column_index_by_name(key)
|
||||
.context(ColumnNotFoundSnafu {
|
||||
column_name: key,
|
||||
table_name: &expr.table_name,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<usize>>>()?;
|
||||
|
||||
let catalog_name = expr
|
||||
.catalog_name
|
||||
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
|
||||
let schema_name = expr
|
||||
.schema_name
|
||||
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
|
||||
|
||||
let region_ids = if expr.region_ids.is_empty() {
|
||||
vec![0]
|
||||
} else {
|
||||
expr.region_ids
|
||||
};
|
||||
|
||||
Ok(CreateTableRequest {
|
||||
id: table_id,
|
||||
catalog_name,
|
||||
schema_name,
|
||||
table_name: expr.table_name,
|
||||
desc: expr.desc,
|
||||
schema,
|
||||
region_numbers: region_ids,
|
||||
primary_key_indices,
|
||||
create_if_not_exists: expr.create_if_not_exists,
|
||||
table_options: expr.table_options,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use api::v1::{AddColumn, AddColumns, ColumnDataType, ColumnDef, DropColumn};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_alter_expr_to_request() {
|
||||
let expr = AlterExpr {
|
||||
catalog_name: None,
|
||||
schema_name: None,
|
||||
table_name: "monitor".to_string(),
|
||||
|
||||
kind: Some(Kind::AddColumns(AddColumns {
|
||||
add_columns: vec![AddColumn {
|
||||
column_def: Some(ColumnDef {
|
||||
name: "mem_usage".to_string(),
|
||||
datatype: ColumnDataType::Float64 as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
}),
|
||||
is_key: false,
|
||||
}],
|
||||
})),
|
||||
};
|
||||
|
||||
let alter_request = alter_expr_to_request(expr).unwrap().unwrap();
|
||||
assert_eq!(None, alter_request.catalog_name);
|
||||
assert_eq!(None, alter_request.schema_name);
|
||||
assert_eq!("monitor".to_string(), alter_request.table_name);
|
||||
let add_column = match alter_request.alter_kind {
|
||||
AlterKind::AddColumns { mut columns } => columns.pop().unwrap(),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
assert!(!add_column.is_key);
|
||||
assert_eq!("mem_usage", add_column.column_schema.name);
|
||||
assert_eq!(
|
||||
ConcreteDataType::float64_datatype(),
|
||||
add_column.column_schema.data_type
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_drop_column_expr() {
|
||||
let expr = AlterExpr {
|
||||
catalog_name: Some("test_catalog".to_string()),
|
||||
schema_name: Some("test_schema".to_string()),
|
||||
table_name: "monitor".to_string(),
|
||||
|
||||
kind: Some(Kind::DropColumns(DropColumns {
|
||||
drop_columns: vec![DropColumn {
|
||||
name: "mem_usage".to_string(),
|
||||
}],
|
||||
})),
|
||||
};
|
||||
|
||||
let alter_request = alter_expr_to_request(expr).unwrap().unwrap();
|
||||
assert_eq!(Some("test_catalog".to_string()), alter_request.catalog_name);
|
||||
assert_eq!(Some("test_schema".to_string()), alter_request.schema_name);
|
||||
assert_eq!("monitor".to_string(), alter_request.table_name);
|
||||
|
||||
let mut drop_names = match alter_request.alter_kind {
|
||||
AlterKind::DropColumns { names } => names,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
assert_eq!(1, drop_names.len());
|
||||
assert_eq!("mem_usage".to_string(), drop_names.pop().unwrap());
|
||||
}
|
||||
}
|
||||
@@ -22,7 +22,7 @@ use snafu::{Backtrace, ErrorCompat};
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Column {} not found in table {}", column_name, table_name))]
|
||||
#[snafu(display("Column `{}` not found in table `{}`", column_name, table_name))]
|
||||
ColumnNotFound {
|
||||
column_name: String,
|
||||
table_name: String,
|
||||
@@ -57,8 +57,8 @@ pub enum Error {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing timestamp column in request"))]
|
||||
MissingTimestampColumn { backtrace: Backtrace },
|
||||
#[snafu(display("Missing timestamp column, msg: {}", msg))]
|
||||
MissingTimestampColumn { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Invalid column proto: {}", err_msg))]
|
||||
InvalidColumnProto {
|
||||
@@ -70,6 +70,26 @@ pub enum Error {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Invalid column default constraint, source: {}", source))]
|
||||
ColumnDefaultConstraint {
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Invalid column proto definition, column: {}, source: {}",
|
||||
column,
|
||||
source
|
||||
))]
|
||||
InvalidColumnDef {
|
||||
column: String,
|
||||
#[snafu(backtrace)]
|
||||
source: api::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -87,6 +107,9 @@ impl ErrorExt for Error {
|
||||
| Error::MissingTimestampColumn { .. } => StatusCode::InvalidArguments,
|
||||
Error::InvalidColumnProto { .. } => StatusCode::InvalidArguments,
|
||||
Error::CreateVector { .. } => StatusCode::InvalidArguments,
|
||||
Error::MissingField { .. } => StatusCode::InvalidArguments,
|
||||
Error::ColumnDefaultConstraint { source, .. } => source.status_code(),
|
||||
Error::InvalidColumnDef { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
@@ -14,30 +14,27 @@
|
||||
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{SemanticType, Values};
|
||||
use api::v1::{AddColumn, AddColumns, Column, ColumnDataType, ColumnDef, CreateExpr};
|
||||
use common_base::BitVec;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use common_time::{Date, DateTime};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::{ValueRef, VectorRef};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorBuilder;
|
||||
use datatypes::vectors::MutableVector;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequest};
|
||||
use table::Table;
|
||||
|
||||
use crate::error::{
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DecodeInsertSnafu,
|
||||
DuplicatedTimestampColumnSnafu, IllegalInsertDataSnafu, InvalidColumnProtoSnafu,
|
||||
MissingTimestampColumnSnafu, Result,
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DuplicatedTimestampColumnSnafu,
|
||||
IllegalInsertDataSnafu, InvalidColumnProtoSnafu, MissingTimestampColumnSnafu, Result,
|
||||
};
|
||||
const TAG_SEMANTIC_TYPE: i32 = SemanticType::Tag as i32;
|
||||
const TIMESTAMP_SEMANTIC_TYPE: i32 = SemanticType::Timestamp as i32;
|
||||
@@ -52,35 +49,25 @@ fn build_column_def(column_name: &str, datatype: i32, nullable: bool) -> ColumnD
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_new_columns(
|
||||
schema: &SchemaRef,
|
||||
insert_batches: &[InsertBatch],
|
||||
) -> Result<Option<AddColumns>> {
|
||||
pub fn find_new_columns(schema: &SchemaRef, columns: &[Column]) -> Result<Option<AddColumns>> {
|
||||
let mut columns_to_add = Vec::default();
|
||||
let mut new_columns: HashSet<String> = HashSet::default();
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
if *row_count == 0 || columns.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if schema.column_schema_by_name(column_name).is_none() && !new_columns.contains(column_name)
|
||||
{
|
||||
if schema.column_schema_by_name(column_name).is_none()
|
||||
&& !new_columns.contains(column_name)
|
||||
{
|
||||
let column_def = Some(build_column_def(column_name, *datatype, true));
|
||||
columns_to_add.push(AddColumn {
|
||||
column_def,
|
||||
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
|
||||
});
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
let column_def = Some(build_column_def(column_name, *datatype, true));
|
||||
columns_to_add.push(AddColumn {
|
||||
column_def,
|
||||
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
|
||||
});
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,7 +99,7 @@ pub fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
|
||||
let column_datatype = wrapper.datatype();
|
||||
|
||||
let rows = rows as usize;
|
||||
let mut vector = VectorBuilder::with_capacity(wrapper.into(), rows);
|
||||
let mut vector = ConcreteDataType::from(wrapper).create_mutable_vector(rows);
|
||||
|
||||
if let Some(values) = &column.values {
|
||||
let values = collect_column_values(column_datatype, values);
|
||||
@@ -123,21 +110,31 @@ pub fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
|
||||
|
||||
for i in 0..rows {
|
||||
if let Some(true) = nulls_iter.next() {
|
||||
vector.push_null();
|
||||
vector
|
||||
.push_value_ref(ValueRef::Null)
|
||||
.context(CreateVectorSnafu)?;
|
||||
} else {
|
||||
let value_ref = values_iter.next().context(InvalidColumnProtoSnafu {
|
||||
err_msg: format!(
|
||||
"value not found at position {} of column {}",
|
||||
i, &column.column_name
|
||||
),
|
||||
})?;
|
||||
vector.try_push_ref(value_ref).context(CreateVectorSnafu)?;
|
||||
let value_ref = values_iter
|
||||
.next()
|
||||
.with_context(|| InvalidColumnProtoSnafu {
|
||||
err_msg: format!(
|
||||
"value not found at position {} of column {}",
|
||||
i, &column.column_name
|
||||
),
|
||||
})?;
|
||||
vector
|
||||
.push_value_ref(value_ref)
|
||||
.context(CreateVectorSnafu)?;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
(0..rows).for_each(|_| vector.push_null());
|
||||
(0..rows).try_for_each(|_| {
|
||||
vector
|
||||
.push_value_ref(ValueRef::Null)
|
||||
.context(CreateVectorSnafu)
|
||||
})?;
|
||||
}
|
||||
Ok(vector.finish())
|
||||
Ok(vector.to_vector())
|
||||
}
|
||||
|
||||
fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Vec<ValueRef> {
|
||||
@@ -187,9 +184,24 @@ fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Ve
|
||||
DateTime::new(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::Timestamp => {
|
||||
collect_values!(values.ts_millis_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::from_millis(*v)
|
||||
ColumnDataType::TimestampSecond => {
|
||||
collect_values!(values.ts_second_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_second(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::TimestampMillisecond => {
|
||||
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_millisecond(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::TimestampMicrosecond => {
|
||||
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_microsecond(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::TimestampNanosecond => {
|
||||
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_nanosecond(*v)
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -201,89 +213,84 @@ pub fn build_create_expr_from_insertion(
|
||||
schema_name: &str,
|
||||
table_id: Option<TableId>,
|
||||
table_name: &str,
|
||||
insert_batches: &[InsertBatch],
|
||||
columns: &[Column],
|
||||
) -> Result<CreateExpr> {
|
||||
let mut new_columns: HashSet<String> = HashSet::default();
|
||||
let mut column_defs = Vec::default();
|
||||
let mut primary_key_indices = Vec::default();
|
||||
let mut timestamp_index = usize::MAX;
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
if *row_count == 0 || columns.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if !new_columns.contains(column_name) {
|
||||
let mut is_nullable = true;
|
||||
match *semantic_type {
|
||||
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
|
||||
TIMESTAMP_SEMANTIC_TYPE => {
|
||||
ensure!(
|
||||
timestamp_index == usize::MAX,
|
||||
DuplicatedTimestampColumnSnafu {
|
||||
exists: &columns[timestamp_index].column_name,
|
||||
duplicated: column_name,
|
||||
}
|
||||
);
|
||||
timestamp_index = column_defs.len();
|
||||
// Timestamp column must not be null.
|
||||
is_nullable = false;
|
||||
}
|
||||
_ => {}
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if !new_columns.contains(column_name) {
|
||||
let mut is_nullable = true;
|
||||
match *semantic_type {
|
||||
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
|
||||
TIMESTAMP_SEMANTIC_TYPE => {
|
||||
ensure!(
|
||||
timestamp_index == usize::MAX,
|
||||
DuplicatedTimestampColumnSnafu {
|
||||
exists: &columns[timestamp_index].column_name,
|
||||
duplicated: column_name,
|
||||
}
|
||||
);
|
||||
timestamp_index = column_defs.len();
|
||||
// Timestamp column must not be null.
|
||||
is_nullable = false;
|
||||
}
|
||||
|
||||
let column_def = build_column_def(column_name, *datatype, is_nullable);
|
||||
column_defs.push(column_def);
|
||||
new_columns.insert(column_name.to_string());
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let column_def = build_column_def(column_name, *datatype, is_nullable);
|
||||
column_defs.push(column_def);
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
|
||||
ensure!(timestamp_index != usize::MAX, MissingTimestampColumnSnafu);
|
||||
let timestamp_field_name = columns[timestamp_index].column_name.clone();
|
||||
|
||||
let primary_keys = primary_key_indices
|
||||
.iter()
|
||||
.map(|idx| columns[*idx].column_name.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expr = CreateExpr {
|
||||
catalog_name: Some(catalog_name.to_string()),
|
||||
schema_name: Some(schema_name.to_string()),
|
||||
table_name: table_name.to_string(),
|
||||
desc: Some("Created on insertion".to_string()),
|
||||
column_defs,
|
||||
time_index: timestamp_field_name,
|
||||
primary_keys,
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
table_id,
|
||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||
};
|
||||
|
||||
return Ok(expr);
|
||||
}
|
||||
|
||||
IllegalInsertDataSnafu.fail()
|
||||
ensure!(
|
||||
timestamp_index != usize::MAX,
|
||||
MissingTimestampColumnSnafu { msg: table_name }
|
||||
);
|
||||
let timestamp_field_name = columns[timestamp_index].column_name.clone();
|
||||
|
||||
let primary_keys = primary_key_indices
|
||||
.iter()
|
||||
.map(|idx| columns[*idx].column_name.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expr = CreateExpr {
|
||||
catalog_name: Some(catalog_name.to_string()),
|
||||
schema_name: Some(schema_name.to_string()),
|
||||
table_name: table_name.to_string(),
|
||||
desc: Some("Created on insertion".to_string()),
|
||||
column_defs,
|
||||
time_index: timestamp_field_name,
|
||||
primary_keys,
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
table_id,
|
||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
pub fn insertion_expr_to_request(
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
insert_batches: Vec<InsertBatch>,
|
||||
insert_batches: Vec<(Vec<Column>, u32)>,
|
||||
table: Arc<dyn Table>,
|
||||
) -> Result<InsertRequest> {
|
||||
let schema = table.schema();
|
||||
let mut columns_builders = HashMap::with_capacity(schema.column_schemas().len());
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
for (columns, row_count) in insert_batches {
|
||||
for Column {
|
||||
column_name,
|
||||
values,
|
||||
@@ -307,10 +314,7 @@ pub fn insertion_expr_to_request(
|
||||
},
|
||||
)?;
|
||||
let data_type = &column_schema.data_type;
|
||||
entry.insert(VectorBuilder::with_capacity(
|
||||
data_type.clone(),
|
||||
row_count as usize,
|
||||
))
|
||||
entry.insert(data_type.create_mutable_vector(row_count as usize))
|
||||
}
|
||||
};
|
||||
add_values_to_builder(vector_builder, values, row_count as usize, null_mask)?;
|
||||
@@ -318,7 +322,7 @@ pub fn insertion_expr_to_request(
|
||||
}
|
||||
let columns_values = columns_builders
|
||||
.into_iter()
|
||||
.map(|(column_name, mut vector_builder)| (column_name, vector_builder.finish()))
|
||||
.map(|(column_name, mut vector_builder)| (column_name, vector_builder.to_vector()))
|
||||
.collect();
|
||||
|
||||
Ok(InsertRequest {
|
||||
@@ -329,16 +333,8 @@ pub fn insertion_expr_to_request(
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn insert_batches(bytes_vec: &[Vec<u8>]) -> Result<Vec<InsertBatch>> {
|
||||
bytes_vec
|
||||
.iter()
|
||||
.map(|bytes| bytes.deref().try_into().context(DecodeInsertSnafu))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn add_values_to_builder(
|
||||
builder: &mut VectorBuilder,
|
||||
builder: &mut Box<dyn MutableVector>,
|
||||
values: Values,
|
||||
row_count: usize,
|
||||
null_mask: Vec<u8>,
|
||||
@@ -349,9 +345,11 @@ fn add_values_to_builder(
|
||||
if null_mask.is_empty() {
|
||||
ensure!(values.len() == row_count, IllegalInsertDataSnafu);
|
||||
|
||||
values.iter().for_each(|value| {
|
||||
builder.push(value);
|
||||
});
|
||||
values.iter().try_for_each(|value| {
|
||||
builder
|
||||
.push_value_ref(value.as_value_ref())
|
||||
.context(CreateVectorSnafu)
|
||||
})?;
|
||||
} else {
|
||||
let null_mask = BitVec::from_vec(null_mask);
|
||||
ensure!(
|
||||
@@ -362,9 +360,13 @@ fn add_values_to_builder(
|
||||
let mut idx_of_values = 0;
|
||||
for idx in 0..row_count {
|
||||
match is_null(&null_mask, idx) {
|
||||
Some(true) => builder.push(&Value::Null),
|
||||
Some(true) => builder
|
||||
.push_value_ref(ValueRef::Null)
|
||||
.context(CreateVectorSnafu)?,
|
||||
_ => {
|
||||
builder.push(&values[idx_of_values]);
|
||||
builder
|
||||
.push_value_ref(values[idx_of_values].as_value_ref())
|
||||
.context(CreateVectorSnafu)?;
|
||||
idx_of_values += 1
|
||||
}
|
||||
}
|
||||
@@ -444,9 +446,9 @@ fn convert_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
|
||||
.map(|v| Value::Date(v.into()))
|
||||
.collect(),
|
||||
ConcreteDataType::Timestamp(_) => values
|
||||
.ts_millis_values
|
||||
.ts_millisecond_values
|
||||
.into_iter()
|
||||
.map(|v| Value::Timestamp(Timestamp::from_millis(v)))
|
||||
.map(|v| Value::Timestamp(Timestamp::new_millisecond(v)))
|
||||
.collect(),
|
||||
ConcreteDataType::Null(_) => unreachable!(),
|
||||
ConcreteDataType::List(_) => unreachable!(),
|
||||
@@ -463,9 +465,8 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{self, SemanticType, Values};
|
||||
use api::v1::{insert_expr, Column, ColumnDataType};
|
||||
use api::v1::{Column, ColumnDataType};
|
||||
use common_base::BitVec;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_query::prelude::Expr;
|
||||
@@ -479,11 +480,12 @@ mod tests {
|
||||
use table::Table;
|
||||
|
||||
use super::{
|
||||
build_create_expr_from_insertion, convert_values, find_new_columns, insert_batches,
|
||||
insertion_expr_to_request, is_null, TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
|
||||
build_create_expr_from_insertion, convert_values, insertion_expr_to_request, is_null,
|
||||
TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
|
||||
};
|
||||
use crate::error;
|
||||
use crate::error::ColumnDataTypeSnafu;
|
||||
use crate::insert::find_new_columns;
|
||||
|
||||
#[inline]
|
||||
fn build_column_schema(
|
||||
@@ -508,11 +510,10 @@ mod tests {
|
||||
|
||||
assert!(build_create_expr_from_insertion("", "", table_id, table_name, &[]).is_err());
|
||||
|
||||
let mock_batch_bytes = mock_insert_batches();
|
||||
let insert_batches = insert_batches(&mock_batch_bytes).unwrap();
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let create_expr =
|
||||
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batches)
|
||||
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batch.0)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(table_id, create_expr.table_id);
|
||||
@@ -570,7 +571,7 @@ mod tests {
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::from(
|
||||
ColumnDataTypeWrapper::try_new(
|
||||
column_defs
|
||||
@@ -598,9 +599,9 @@ mod tests {
|
||||
|
||||
assert!(find_new_columns(&schema, &[]).unwrap().is_none());
|
||||
|
||||
let mock_insert_bytes = mock_insert_batches();
|
||||
let insert_batches = insert_batches(&mock_insert_bytes).unwrap();
|
||||
let add_columns = find_new_columns(&schema, &insert_batches).unwrap().unwrap();
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let add_columns = find_new_columns(&schema, &insert_batch.0).unwrap().unwrap();
|
||||
|
||||
assert_eq!(2, add_columns.add_columns.len());
|
||||
let host_column = &add_columns.add_columns[0];
|
||||
@@ -630,10 +631,7 @@ mod tests {
|
||||
fn test_insertion_expr_to_request() {
|
||||
let table: Arc<dyn Table> = Arc::new(DemoTable {});
|
||||
|
||||
let values = insert_expr::Values {
|
||||
values: mock_insert_batches(),
|
||||
};
|
||||
let insert_batches = insert_batches(&values.values).unwrap();
|
||||
let insert_batches = vec![mock_insert_batch()];
|
||||
let insert_req =
|
||||
insertion_expr_to_request("greptime", "public", "demo", insert_batches, table).unwrap();
|
||||
|
||||
@@ -654,8 +652,8 @@ mod tests {
|
||||
assert_eq!(Value::Float64(0.1.into()), memory.get(1));
|
||||
|
||||
let ts = insert_req.columns_values.get("ts").unwrap();
|
||||
assert_eq!(Value::Timestamp(Timestamp::from_millis(100)), ts.get(0));
|
||||
assert_eq!(Value::Timestamp(Timestamp::from_millis(101)), ts.get(1));
|
||||
assert_eq!(Value::Timestamp(Timestamp::new_millisecond(100)), ts.get(0));
|
||||
assert_eq!(Value::Timestamp(Timestamp::new_millisecond(101)), ts.get(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -705,8 +703,12 @@ mod tests {
|
||||
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
|
||||
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), true)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
|
||||
Arc::new(
|
||||
@@ -731,7 +733,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_insert_batches() -> Vec<Vec<u8>> {
|
||||
fn mock_insert_batch() -> (Vec<Column>, u32) {
|
||||
let row_count = 2;
|
||||
|
||||
let host_vals = column::Values {
|
||||
@@ -771,7 +773,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let ts_vals = column::Values {
|
||||
ts_millis_values: vec![100, 101],
|
||||
ts_millisecond_values: vec![100, 101],
|
||||
..Default::default()
|
||||
};
|
||||
let ts_column = Column {
|
||||
@@ -779,13 +781,12 @@ mod tests {
|
||||
semantic_type: TIMESTAMP_SEMANTIC_TYPE,
|
||||
values: Some(ts_vals),
|
||||
null_mask: vec![0],
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||
};
|
||||
|
||||
let insert_batch = InsertBatch {
|
||||
columns: vec![host_column, cpu_column, mem_column, ts_column],
|
||||
(
|
||||
vec![host_column, cpu_column, mem_column, ts_column],
|
||||
row_count,
|
||||
};
|
||||
vec![insert_batch.into()]
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
#![feature(assert_matches)]
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -12,9 +13,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod alter;
|
||||
pub mod error;
|
||||
mod insert;
|
||||
|
||||
pub use alter::{alter_expr_to_request, create_expr_to_request, create_table_schema};
|
||||
pub use insert::{
|
||||
build_alter_table_request, build_create_expr_from_insertion, column_to_vector,
|
||||
find_new_columns, insert_batches, insertion_expr_to_request,
|
||||
find_new_columns, insertion_expr_to_request,
|
||||
};
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user