mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-25 23:49:58 +00:00
Compare commits
106 Commits
v0.1.0-alp
...
replace-ar
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8630cdb38 | ||
|
|
0f3dcc1b38 | ||
|
|
7c696dae08 | ||
|
|
61d8bc2ea1 | ||
|
|
142dee41d6 | ||
|
|
e3785fca70 | ||
|
|
ce6d1cb7d1 | ||
|
|
dbb3034ecb | ||
|
|
fda9e80cbf | ||
|
|
756c068166 | ||
|
|
652d59a643 | ||
|
|
fa971c6513 | ||
|
|
36c929e1a7 | ||
|
|
6a4e2e5975 | ||
|
|
a712382fba | ||
|
|
4b644aa482 | ||
|
|
9ad6ddb26e | ||
|
|
4defde055c | ||
|
|
c5661ee362 | ||
|
|
9b093463cc | ||
|
|
61e0f1a11c | ||
|
|
95b2d8654f | ||
|
|
249ebc6937 | ||
|
|
42fdc7251a | ||
|
|
c1b8981f61 | ||
|
|
949cd3e3af | ||
|
|
b26982c5d7 | ||
|
|
d0892bf0b7 | ||
|
|
fff530cb50 | ||
|
|
b936d8b18a | ||
|
|
1bde1ba399 | ||
|
|
3687bc7346 | ||
|
|
4fdf26810c | ||
|
|
587bdc9800 | ||
|
|
7f59758e69 | ||
|
|
58c26def6b | ||
|
|
6f3baf96b0 | ||
|
|
a898f846d1 | ||
|
|
a562199455 | ||
|
|
fb0b4eb826 | ||
|
|
a521ab5041 | ||
|
|
833216d317 | ||
|
|
2ba99259e1 | ||
|
|
551cde23b1 | ||
|
|
90c832b33d | ||
|
|
8959dbcef8 | ||
|
|
653906d4fa | ||
|
|
829ff491c4 | ||
|
|
b32438e78c | ||
|
|
0ccb8b4302 | ||
|
|
b48ae21b71 | ||
|
|
2034b40f33 | ||
|
|
3c0adb00f3 | ||
|
|
8c66b7d000 | ||
|
|
99371fd31b | ||
|
|
fe505fecfd | ||
|
|
55e6be7af1 | ||
|
|
f9bfb121db | ||
|
|
cc1ec26416 | ||
|
|
504059a699 | ||
|
|
7151deb4ed | ||
|
|
6fb413ae50 | ||
|
|
beb07fc895 | ||
|
|
4275e47bdb | ||
|
|
6720bc5f7c | ||
|
|
4052563248 | ||
|
|
952e1bd626 | ||
|
|
8232015998 | ||
|
|
d82a3a7d58 | ||
|
|
0599465685 | ||
|
|
13d51250ba | ||
|
|
6127706b5b | ||
|
|
2e17e9c4b5 | ||
|
|
b0cbfa7ffb | ||
|
|
20172338e8 | ||
|
|
9c53f9b24c | ||
|
|
6d24f7ebb6 | ||
|
|
68c2de8e45 | ||
|
|
a17dcbc511 | ||
|
|
53ab19ea5a | ||
|
|
84c44cf540 | ||
|
|
020b9936cd | ||
|
|
75dcf2467b | ||
|
|
eea5393f96 | ||
|
|
3d312d389d | ||
|
|
fdc73fb52f | ||
|
|
2a36e26d19 | ||
|
|
baef640fe3 | ||
|
|
5fddb799f7 | ||
|
|
f372229b18 | ||
|
|
4085fc7899 | ||
|
|
30940e692a | ||
|
|
b371ce0f48 | ||
|
|
ac7f52d303 | ||
|
|
051768b735 | ||
|
|
c5b0d2431f | ||
|
|
4038dd4067 | ||
|
|
8be0f05570 | ||
|
|
69f06eec8b | ||
|
|
7b37e99a45 | ||
|
|
d0686f9c19 | ||
|
|
221f3e9d2e | ||
|
|
61c4a3691a | ||
|
|
d7626fd6af | ||
|
|
e3201a4705 | ||
|
|
571a84d91b |
4
.env.example
Normal file
4
.env.example
Normal file
@@ -0,0 +1,4 @@
|
||||
# Settings for s3 test
|
||||
GT_S3_BUCKET=S3 bucket
|
||||
GT_S3_ACCESS_KEY_ID=S3 access key id
|
||||
GT_S3_ACCESS_KEY=S3 secret access key
|
||||
4
.github/pull_request_template.md
vendored
4
.github/pull_request_template.md
vendored
@@ -13,7 +13,7 @@ Please explain IN DETAIL what the changes are in this PR and why they are needed
|
||||
|
||||
## Checklist
|
||||
|
||||
- [] I have written the necessary rustdoc comments.
|
||||
- [] I have added the necessary unit tests and integration tests.
|
||||
- [ ] I have written the necessary rustdoc comments.
|
||||
- [ ] I have added the necessary unit tests and integration tests.
|
||||
|
||||
## Refer to a related PR or issue link (optional)
|
||||
|
||||
24
.github/workflows/develop.yml
vendored
24
.github/workflows/develop.yml
vendored
@@ -26,6 +26,13 @@ env:
|
||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
||||
|
||||
jobs:
|
||||
typos:
|
||||
name: Spell Check with Typos
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: crate-ci/typos@v1.0.4
|
||||
|
||||
check:
|
||||
name: Check
|
||||
if: github.event.pull_request.draft == false
|
||||
@@ -42,6 +49,23 @@ jobs:
|
||||
- name: Run cargo check
|
||||
run: cargo check --workspace --all-targets
|
||||
|
||||
toml:
|
||||
name: Toml Check
|
||||
if: github.event.pull_request.draft == false
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||
- name: Rust Cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
- name: Install taplo
|
||||
run: cargo install taplo-cli --version ^0.8 --locked
|
||||
- name: Run taplo
|
||||
run: taplo format --check --option "indent_string= "
|
||||
|
||||
# Use coverage to run test.
|
||||
# test:
|
||||
# name: Test Suite
|
||||
|
||||
25
.github/workflows/doc-issue.yml
vendored
Normal file
25
.github/workflows/doc-issue.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
name: Create Issue in docs repo on doc related changes
|
||||
|
||||
on:
|
||||
issues:
|
||||
types:
|
||||
- labeled
|
||||
pull_request_target:
|
||||
types:
|
||||
- labeled
|
||||
|
||||
jobs:
|
||||
doc_issue:
|
||||
if: github.event.label.name == 'doc update required'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: create an issue in doc repo
|
||||
uses: dacbd/create-issue-action@main
|
||||
with:
|
||||
owner: GreptimeTeam
|
||||
repo: docs
|
||||
token: ${{ secrets.DOCS_REPO_TOKEN }}
|
||||
title: Update docs for ${{ github.event.issue.title || github.event.pull_request.title }}
|
||||
body: |
|
||||
A document change request is generated from
|
||||
${{ github.event.issue.html_url || github.event.pull_request.html_url }}
|
||||
32
.github/workflows/release.yml
vendored
32
.github/workflows/release.yml
vendored
@@ -3,9 +3,8 @@ on:
|
||||
tags:
|
||||
- "v*.*.*"
|
||||
schedule:
|
||||
# At 00:00 Everyday
|
||||
# https://crontab.guru/every-day-at-midnight
|
||||
- cron: '0 0 * * *'
|
||||
# At 00:00 on Monday.
|
||||
- cron: '0 0 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
name: Release
|
||||
@@ -14,7 +13,10 @@ env:
|
||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
||||
|
||||
# FIXME(zyy17): Would be better to use `gh release list -L 1 | cut -f 3` to get the latest release version tag, but for a long time, we will stay at 'v0.1.0-alpha-*'.
|
||||
NIGHTLY_BUILD_VERSION_PREFIX: v0.1.0-alpha
|
||||
SCHEDULED_BUILD_VERSION_PREFIX: v0.1.0-alpha
|
||||
|
||||
# In the future, we can change SCHEDULED_PERIOD to nightly.
|
||||
SCHEDULED_PERIOD: weekly
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -113,25 +115,25 @@ jobs:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
|
||||
- name: Configure nightly build version # the version would be ${NIGHTLY_BUILD_VERSION_PREFIX}-YYYYMMDD-nightly, like v0.1.0-alpha-20221119-nightly.
|
||||
- name: Configure scheduled build version # the version would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}, like v0.1.0-alpha-20221119-weekly.
|
||||
shell: bash
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
buildTime=`date "+%Y%m%d"`
|
||||
NIGHTLY_VERSION=${{ env.NIGHTLY_BUILD_VERSION_PREFIX }}-$buildTime-nightly
|
||||
echo "NIGHTLY_VERSION=${NIGHTLY_VERSION}" >> $GITHUB_ENV
|
||||
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
|
||||
echo "SCHEDULED_BUILD_VERSION=${SCHEDULED_BUILD_VERSION}" >> $GITHUB_ENV
|
||||
|
||||
- name: Create nightly git tag
|
||||
- name: Create scheduled build git tag
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
git tag ${{ env.NIGHTLY_VERSION }}
|
||||
git tag ${{ env.SCHEDULED_BUILD_VERSION }}
|
||||
|
||||
- name: Publish nightly release # configure the different release title and tags.
|
||||
- name: Publish scheduled release # configure the different release title and tags.
|
||||
uses: softprops/action-gh-release@v1
|
||||
if: github.event_name == 'schedule'
|
||||
with:
|
||||
name: "Release ${{ env.NIGHTLY_VERSION }}"
|
||||
tag_name: ${{ env.NIGHTLY_VERSION }}
|
||||
name: "Release ${{ env.SCHEDULED_BUILD_VERSION }}"
|
||||
tag_name: ${{ env.SCHEDULED_BUILD_VERSION }}
|
||||
generate_release_notes: true
|
||||
files: |
|
||||
**/greptime-*
|
||||
@@ -189,13 +191,13 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Configure nightly build image tag # the tag would be ${NIGHTLY_BUILD_VERSION_PREFIX}-YYYYMMDD-nightly
|
||||
- name: Configure scheduled build image tag # the tag would be ${SCHEDULED_BUILD_VERSION_PREFIX}-YYYYMMDD-${SCHEDULED_PERIOD}
|
||||
shell: bash
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
buildTime=`date "+%Y%m%d"`
|
||||
NIGHTLY_VERSION=${{ env.NIGHTLY_BUILD_VERSION_PREFIX }}-$buildTime-nightly
|
||||
echo "IMAGE_TAG=${NIGHTLY_VERSION:1}" >> $GITHUB_ENV
|
||||
SCHEDULED_BUILD_VERSION=${{ env.SCHEDULED_BUILD_VERSION_PREFIX }}-$buildTime-${{ env.SCHEDULED_PERIOD }}
|
||||
echo "IMAGE_TAG=${SCHEDULED_BUILD_VERSION:1}" >> $GITHUB_ENV
|
||||
|
||||
- name: Configure tag # If the release tag is v0.1.0, then the image version tag will be 0.1.0.
|
||||
shell: bash
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -32,3 +32,6 @@ logs/
|
||||
|
||||
# Benchmark dataset
|
||||
benchmarks/data
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
@@ -9,7 +9,7 @@ repos:
|
||||
rev: e6a795bc6b2c0958f9ef52af4863bbd7cc17238f
|
||||
hooks:
|
||||
- id: cargo-sort
|
||||
args: ["--workspace", "--print"]
|
||||
args: ["--workspace"]
|
||||
|
||||
- repo: https://github.com/doublify/pre-commit-rust
|
||||
rev: v1.0
|
||||
|
||||
2695
Cargo.lock
generated
2695
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -11,11 +11,11 @@ members = [
|
||||
"src/common/function",
|
||||
"src/common/function-macro",
|
||||
"src/common/grpc",
|
||||
"src/common/grpc-expr",
|
||||
"src/common/query",
|
||||
"src/common/recordbatch",
|
||||
"src/common/runtime",
|
||||
"src/common/substrait",
|
||||
"src/common/grpc-expr",
|
||||
"src/common/telemetry",
|
||||
"src/common/time",
|
||||
"src/datanode",
|
||||
@@ -24,15 +24,18 @@ members = [
|
||||
"src/log-store",
|
||||
"src/meta-client",
|
||||
"src/meta-srv",
|
||||
"src/mito",
|
||||
"src/object-store",
|
||||
"src/promql",
|
||||
"src/query",
|
||||
"src/script",
|
||||
"src/servers",
|
||||
"src/session",
|
||||
"src/sql",
|
||||
"src/storage",
|
||||
"src/store-api",
|
||||
"src/table",
|
||||
"src/mito",
|
||||
"tests-integration",
|
||||
"tests/runner",
|
||||
]
|
||||
|
||||
|
||||
67
Makefile
Normal file
67
Makefile
Normal file
@@ -0,0 +1,67 @@
|
||||
IMAGE_REGISTRY ?= greptimedb
|
||||
IMAGE_TAG ?= latest
|
||||
|
||||
##@ Build
|
||||
|
||||
.PHONY: build
|
||||
build: ## Build debug version greptime.
|
||||
cargo build
|
||||
|
||||
.PHONY: release
|
||||
release: ## Build release version greptime.
|
||||
cargo build --release
|
||||
|
||||
.PHONY: clean
|
||||
clean: ## Clean the project.
|
||||
cargo clean
|
||||
|
||||
.PHONY: fmt
|
||||
fmt: ## Format all the Rust code.
|
||||
cargo fmt --all
|
||||
|
||||
.PHONY: docker-image
|
||||
docker-image: ## Build docker image.
|
||||
docker build --network host -f docker/Dockerfile -t ${IMAGE_REGISTRY}:${IMAGE_TAG} .
|
||||
|
||||
##@ Test
|
||||
|
||||
.PHONY: unit-test
|
||||
unit-test: ## Run unit test.
|
||||
cargo test --workspace
|
||||
|
||||
.PHONY: integration-test
|
||||
integration-test: ## Run integation test.
|
||||
cargo test integration
|
||||
|
||||
.PHONY: sqlness-test
|
||||
sqlness-test: ## Run sqlness test.
|
||||
cargo run --bin sqlness-runner
|
||||
|
||||
.PHONY: check
|
||||
check: ## Cargo check all the targets.
|
||||
cargo check --workspace --all-targets
|
||||
|
||||
.PHONY: clippy
|
||||
clippy: ## Check clippy rules.
|
||||
cargo clippy --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr
|
||||
|
||||
.PHONY: fmt-check
|
||||
fmt-check: ## Check code format.
|
||||
cargo fmt --all -- --check
|
||||
|
||||
##@ General
|
||||
|
||||
# The help target prints out all targets with their descriptions organized
|
||||
# beneath their categories. The categories are represented by '##@' and the
|
||||
# target descriptions by '##'. The awk commands is responsible for reading the
|
||||
# entire set of makefiles included in this invocation, looking for lines of the
|
||||
# file as xyz: ## something, and then pretty-format the target and help. Then,
|
||||
# if there's a line with ##@ something, that gets pretty-printed as a category.
|
||||
# More info on the usage of ANSI control characters for terminal formatting:
|
||||
# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
|
||||
# More info on the awk command:
|
||||
# https://linuxcommand.org/lc3_adv_awk.php
|
||||
|
||||
.PHONY: help
|
||||
help: ## Display help messages.
|
||||
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
|
||||
10
README.md
10
README.md
@@ -1,7 +1,12 @@
|
||||
<p align="center">
|
||||
<img src="/docs/logo-text-padding.png" alt="GreptimeDB Logo" width="400px"></img>
|
||||
<picture>
|
||||
<source media="(prefers-color-scheme: light)" srcset="/docs/logo-text-padding.png">
|
||||
<source media="(prefers-color-scheme: dark)" srcset="/docs/logo-text-padding-dark.png">
|
||||
<img alt="GreptimeDB Logo" src="/docs/logo-text-padding.png" width="400px">
|
||||
</picture>
|
||||
</p>
|
||||
|
||||
|
||||
<h3 align="center">
|
||||
The next-generation hybrid timeseries/analytics processing database in the cloud
|
||||
</h3>
|
||||
@@ -56,7 +61,6 @@ To compile GreptimeDB from source, you'll need:
|
||||
find an installation instructions [here](https://grpc.io/docs/protoc-installation/).
|
||||
**Note that `protoc` version needs to be >= 3.15** because we have used the `optional`
|
||||
keyword. You can check it with `protoc --version`.
|
||||
|
||||
|
||||
#### Build with Docker
|
||||
|
||||
@@ -161,6 +165,8 @@ break things. Benchmark on development branch may not represent its potential
|
||||
performance. We release pre-built binaries constantly for functional
|
||||
evaluation. Do not use it in production at the moment.
|
||||
|
||||
For future plans, check out [GreptimeDB roadmap](https://github.com/GreptimeTeam/greptimedb/issues/669).
|
||||
|
||||
## Community
|
||||
|
||||
Our core team is thrilled too see you participate in any ways you like. When you are stuck, try to
|
||||
|
||||
@@ -5,10 +5,10 @@ edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
arrow = "10"
|
||||
arrow = "26.0.0"
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
client = { path = "../src/client" }
|
||||
indicatif = "0.17.1"
|
||||
itertools = "0.10.5"
|
||||
parquet = { version = "*" }
|
||||
parquet = "26.0.0"
|
||||
tokio = { version = "1.21", features = ["full"] }
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use arrow::array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray};
|
||||
@@ -28,14 +27,11 @@ use arrow::datatypes::{DataType, Float64Type, Int64Type};
|
||||
use arrow::record_batch::RecordBatch;
|
||||
use clap::Parser;
|
||||
use client::admin::Admin;
|
||||
use client::api::v1::codec::InsertBatch;
|
||||
use client::api::v1::column::Values;
|
||||
use client::api::v1::{insert_expr, Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
|
||||
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
|
||||
use client::{Client, Database, Select};
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
|
||||
use parquet::file::reader::FileReader;
|
||||
use parquet::file::serialized_reader::SerializedFileReader;
|
||||
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
||||
use tokio::task::JoinSet;
|
||||
|
||||
const DATABASE_NAME: &str = "greptime";
|
||||
@@ -87,10 +83,14 @@ async fn write_data(
|
||||
pb_style: ProgressStyle,
|
||||
) -> u128 {
|
||||
let file = std::fs::File::open(&path).unwrap();
|
||||
let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
|
||||
let row_num = file_reader.metadata().file_metadata().num_rows();
|
||||
let record_batch_reader = ParquetFileArrowReader::new(file_reader)
|
||||
.get_record_reader(batch_size)
|
||||
let record_batch_reader_builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
|
||||
let row_num = record_batch_reader_builder
|
||||
.metadata()
|
||||
.file_metadata()
|
||||
.num_rows();
|
||||
let record_batch_reader = record_batch_reader_builder
|
||||
.with_batch_size(batch_size)
|
||||
.build()
|
||||
.unwrap();
|
||||
let progress_bar = mpb.add(ProgressBar::new(row_num as _));
|
||||
progress_bar.set_style(pb_style);
|
||||
@@ -100,16 +100,13 @@ async fn write_data(
|
||||
|
||||
for record_batch in record_batch_reader {
|
||||
let record_batch = record_batch.unwrap();
|
||||
let row_count = record_batch.num_rows();
|
||||
let insert_batch = convert_record_batch(record_batch).into();
|
||||
let (columns, row_count) = convert_record_batch(record_batch);
|
||||
let insert_expr = InsertExpr {
|
||||
schema_name: "public".to_string(),
|
||||
table_name: TABLE_NAME.to_string(),
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: vec![insert_batch],
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
region_number: 0,
|
||||
columns,
|
||||
row_count,
|
||||
};
|
||||
let now = Instant::now();
|
||||
db.insert(insert_expr).await.unwrap();
|
||||
@@ -125,7 +122,7 @@ async fn write_data(
|
||||
total_rpc_elapsed_ms
|
||||
}
|
||||
|
||||
fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
|
||||
fn convert_record_batch(record_batch: RecordBatch) -> (Vec<Column>, u32) {
|
||||
let schema = record_batch.schema();
|
||||
let fields = schema.fields();
|
||||
let row_count = record_batch.num_rows();
|
||||
@@ -143,10 +140,7 @@ fn convert_record_batch(record_batch: RecordBatch) -> InsertBatch {
|
||||
columns.push(column);
|
||||
}
|
||||
|
||||
InsertBatch {
|
||||
columns,
|
||||
row_count: row_count as _,
|
||||
}
|
||||
(columns, row_count as _)
|
||||
}
|
||||
|
||||
fn build_values(column: &ArrayRef) -> Values {
|
||||
@@ -217,9 +211,10 @@ fn build_values(column: &ArrayRef) -> Values {
|
||||
| DataType::FixedSizeList(_, _)
|
||||
| DataType::LargeList(_)
|
||||
| DataType::Struct(_)
|
||||
| DataType::Union(_, _)
|
||||
| DataType::Union(_, _, _)
|
||||
| DataType::Dictionary(_, _)
|
||||
| DataType::Decimal(_, _)
|
||||
| DataType::Decimal128(_, _)
|
||||
| DataType::Decimal256(_, _)
|
||||
| DataType::Map(_, _) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ wal_dir = '/tmp/greptimedb/wal'
|
||||
rpc_runtime_size = 8
|
||||
mysql_addr = '127.0.0.1:4406'
|
||||
mysql_runtime_size = 4
|
||||
enable_memory_catalog = false
|
||||
|
||||
[storage]
|
||||
type = 'File'
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
mode = 'distributed'
|
||||
datanode_rpc_addr = '127.0.0.1:3001'
|
||||
http_addr = '127.0.0.1:4000'
|
||||
|
||||
[http_options]
|
||||
addr = '127.0.0.1:4000'
|
||||
timeout = "30s"
|
||||
|
||||
[meta_client_opts]
|
||||
metasrv_addrs = ['127.0.0.1:3002']
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
node_id = 0
|
||||
mode = 'standalone'
|
||||
http_addr = '127.0.0.1:4000'
|
||||
wal_dir = '/tmp/greptimedb/wal/'
|
||||
enable_memory_catalog = false
|
||||
|
||||
[http_options]
|
||||
addr = '127.0.0.1:4000'
|
||||
timeout = "30s"
|
||||
|
||||
[storage]
|
||||
type = 'File'
|
||||
|
||||
BIN
docs/logo-text-padding-dark.png
Normal file
BIN
docs/logo-text-padding-dark.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 25 KiB |
@@ -7,8 +7,8 @@ license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
common-base = { path = "../common/base" }
|
||||
common-time = { path = "../common/time" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
prost = "0.11"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
@@ -20,9 +20,7 @@ fn main() {
|
||||
.file_descriptor_set_path(default_out_dir.join("greptime_fd.bin"))
|
||||
.compile(
|
||||
&[
|
||||
"greptime/v1/insert.proto",
|
||||
"greptime/v1/select.proto",
|
||||
"greptime/v1/physical_plan.proto",
|
||||
"greptime/v1/greptime.proto",
|
||||
"greptime/v1/meta/common.proto",
|
||||
"greptime/v1/meta/heartbeat.proto",
|
||||
|
||||
@@ -20,6 +20,7 @@ message AdminExpr {
|
||||
CreateExpr create = 2;
|
||||
AlterExpr alter = 3;
|
||||
CreateDatabaseExpr create_database = 4;
|
||||
DropTableExpr drop_table = 5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,6 +56,12 @@ message AlterExpr {
|
||||
}
|
||||
}
|
||||
|
||||
message DropTableExpr {
|
||||
string catalog_name = 1;
|
||||
string schema_name = 2;
|
||||
string table_name = 3;
|
||||
}
|
||||
|
||||
message AddColumns {
|
||||
repeated AddColumn add_columns = 1;
|
||||
}
|
||||
|
||||
@@ -32,7 +32,10 @@ message Column {
|
||||
|
||||
repeated int32 date_values = 14;
|
||||
repeated int64 datetime_values = 15;
|
||||
repeated int64 ts_millis_values = 16;
|
||||
repeated int64 ts_second_values = 16;
|
||||
repeated int64 ts_millisecond_values = 17;
|
||||
repeated int64 ts_microsecond_values = 18;
|
||||
repeated int64 ts_nanosecond_values = 19;
|
||||
}
|
||||
// The array of non-null values in this column.
|
||||
//
|
||||
@@ -75,5 +78,8 @@ enum ColumnDataType {
|
||||
STRING = 12;
|
||||
DATE = 13;
|
||||
DATETIME = 14;
|
||||
TIMESTAMP = 15;
|
||||
TIMESTAMP_SECOND = 15;
|
||||
TIMESTAMP_MILLISECOND = 16;
|
||||
TIMESTAMP_MICROSECOND = 17;
|
||||
TIMESTAMP_NANOSECOND = 18;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ syntax = "proto3";
|
||||
|
||||
package greptime.v1;
|
||||
|
||||
import "greptime/v1/column.proto";
|
||||
import "greptime/v1/common.proto";
|
||||
|
||||
message DatabaseRequest {
|
||||
@@ -28,39 +29,23 @@ message SelectExpr {
|
||||
oneof expr {
|
||||
string sql = 1;
|
||||
bytes logical_plan = 2;
|
||||
PhysicalPlan physical_plan = 15;
|
||||
}
|
||||
}
|
||||
|
||||
message PhysicalPlan {
|
||||
bytes original_ql = 1;
|
||||
bytes plan = 2;
|
||||
}
|
||||
|
||||
message InsertExpr {
|
||||
string schema_name = 1;
|
||||
string table_name = 2;
|
||||
|
||||
message Values {
|
||||
repeated bytes values = 1;
|
||||
}
|
||||
// Data is represented here.
|
||||
repeated Column columns = 3;
|
||||
|
||||
oneof expr {
|
||||
Values values = 3;
|
||||
// The row_count of all columns, which include null and non-null values.
|
||||
//
|
||||
// Note: the row_count of all columns in a InsertExpr must be same.
|
||||
uint32 row_count = 4;
|
||||
|
||||
// TODO(LFC): Remove field "sql" in InsertExpr.
|
||||
// When Frontend instance received an insertion SQL (`insert into ...`), it's anticipated to parse the SQL and
|
||||
// assemble the values to insert to feed Datanode. In other words, inserting data through Datanode instance's GRPC
|
||||
// interface shouldn't use SQL directly.
|
||||
// Then why the "sql" field exists here? It's because the Frontend needs table schema to create the values to insert,
|
||||
// which is currently not able to find anywhere. (Maybe the table schema is suppose to be fetched from Meta?)
|
||||
// The "sql" field is meant to be removed in the future.
|
||||
string sql = 4;
|
||||
}
|
||||
|
||||
/// The region number of current insert request.
|
||||
// The region number of current insert request.
|
||||
uint32 region_number = 5;
|
||||
map<string, bytes> options = 6;
|
||||
}
|
||||
|
||||
// TODO(jiachun)
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.codec;
|
||||
|
||||
import "greptime/v1/column.proto";
|
||||
|
||||
message InsertBatch {
|
||||
repeated Column columns = 1;
|
||||
uint32 row_count = 2;
|
||||
}
|
||||
|
||||
message RegionNumber {
|
||||
uint32 id = 1;
|
||||
}
|
||||
@@ -5,6 +5,8 @@ package greptime.v1.meta;
|
||||
import "greptime/v1/meta/common.proto";
|
||||
|
||||
service Router {
|
||||
rpc Create(CreateRequest) returns (RouteResponse) {}
|
||||
|
||||
// Fetch routing information for tables. The smallest unit is the complete
|
||||
// routing information(all regions) of a table.
|
||||
//
|
||||
@@ -26,7 +28,14 @@ service Router {
|
||||
//
|
||||
rpc Route(RouteRequest) returns (RouteResponse) {}
|
||||
|
||||
rpc Create(CreateRequest) returns (RouteResponse) {}
|
||||
rpc Delete(DeleteRequest) returns (RouteResponse) {}
|
||||
}
|
||||
|
||||
message CreateRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
TableName table_name = 2;
|
||||
repeated Partition partitions = 3;
|
||||
}
|
||||
|
||||
message RouteRequest {
|
||||
@@ -35,6 +44,12 @@ message RouteRequest {
|
||||
repeated TableName table_names = 2;
|
||||
}
|
||||
|
||||
message DeleteRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
TableName table_name = 2;
|
||||
}
|
||||
|
||||
message RouteResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
@@ -42,13 +57,6 @@ message RouteResponse {
|
||||
repeated TableRoute table_routes = 3;
|
||||
}
|
||||
|
||||
message CreateRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
TableName table_name = 2;
|
||||
repeated Partition partitions = 3;
|
||||
}
|
||||
|
||||
message TableRoute {
|
||||
Table table = 1;
|
||||
repeated RegionRoute region_routes = 2;
|
||||
|
||||
@@ -20,6 +20,9 @@ service Store {
|
||||
|
||||
// DeleteRange deletes the given range from the key-value store.
|
||||
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse);
|
||||
|
||||
// MoveValue atomically renames the key to the given updated key.
|
||||
rpc MoveValue(MoveValueRequest) returns (MoveValueResponse);
|
||||
}
|
||||
|
||||
message RangeRequest {
|
||||
@@ -136,3 +139,21 @@ message DeleteRangeResponse {
|
||||
// returned.
|
||||
repeated KeyValue prev_kvs = 3;
|
||||
}
|
||||
|
||||
message MoveValueRequest {
|
||||
RequestHeader header = 1;
|
||||
|
||||
// If from_key dose not exist, return the value of to_key (if it exists).
|
||||
// If from_key exists, move the value of from_key to to_key (i.e. rename),
|
||||
// and return the value.
|
||||
bytes from_key = 2;
|
||||
bytes to_key = 3;
|
||||
}
|
||||
|
||||
message MoveValueResponse {
|
||||
ResponseHeader header = 1;
|
||||
|
||||
// If from_key dose not exist, return the value of to_key (if it exists).
|
||||
// If from_key exists, return the value of from_key.
|
||||
KeyValue kv = 2;
|
||||
}
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package greptime.v1.codec;
|
||||
|
||||
message PhysicalPlanNode {
|
||||
oneof PhysicalPlanType {
|
||||
ProjectionExecNode projection = 1;
|
||||
MockInputExecNode mock = 99;
|
||||
// TODO(fys): impl other physical plan node
|
||||
}
|
||||
}
|
||||
|
||||
message ProjectionExecNode {
|
||||
PhysicalPlanNode input = 1;
|
||||
repeated PhysicalExprNode expr = 2;
|
||||
repeated string expr_name = 3;
|
||||
}
|
||||
|
||||
message PhysicalExprNode {
|
||||
oneof ExprType {
|
||||
PhysicalColumn column = 1;
|
||||
// TODO(fys): impl other physical expr node
|
||||
}
|
||||
}
|
||||
|
||||
message PhysicalColumn {
|
||||
string name = 1;
|
||||
uint64 index = 2;
|
||||
}
|
||||
|
||||
message MockInputExecNode {
|
||||
string name = 1;
|
||||
}
|
||||
@@ -15,6 +15,7 @@
|
||||
use common_base::BitVec;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::prelude::*;
|
||||
@@ -56,7 +57,16 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
||||
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
||||
ColumnDataType::Timestamp => ConcreteDataType::timestamp_millis_datatype(),
|
||||
ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
ColumnDataType::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
}
|
||||
ColumnDataType::TimestampMicrosecond => {
|
||||
ConcreteDataType::timestamp_microsecond_datatype()
|
||||
}
|
||||
ColumnDataType::TimestampNanosecond => {
|
||||
ConcreteDataType::timestamp_nanosecond_datatype()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -81,7 +91,12 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
ConcreteDataType::String(_) => ColumnDataType::String,
|
||||
ConcreteDataType::Date(_) => ColumnDataType::Date,
|
||||
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
|
||||
ConcreteDataType::Timestamp(_) => ColumnDataType::Timestamp,
|
||||
ConcreteDataType::Timestamp(unit) => match unit {
|
||||
TimestampType::Second(_) => ColumnDataType::TimestampSecond,
|
||||
TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
|
||||
TimestampType::Microsecond(_) => ColumnDataType::TimestampMicrosecond,
|
||||
TimestampType::Nanosecond(_) => ColumnDataType::TimestampNanosecond,
|
||||
},
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => {
|
||||
return error::IntoColumnDataTypeSnafu { from: datatype }.fail()
|
||||
}
|
||||
@@ -153,8 +168,20 @@ impl Values {
|
||||
datetime_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::Timestamp => Values {
|
||||
ts_millis_values: Vec::with_capacity(capacity),
|
||||
ColumnDataType::TimestampSecond => Values {
|
||||
ts_second_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::TimestampMillisecond => Values {
|
||||
ts_millisecond_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::TimestampMicrosecond => Values {
|
||||
ts_microsecond_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::TimestampNanosecond => Values {
|
||||
ts_nanosecond_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
@@ -187,9 +214,12 @@ impl Column {
|
||||
Value::Binary(val) => values.binary_values.push(val.to_vec()),
|
||||
Value::Date(val) => values.date_values.push(val.val()),
|
||||
Value::DateTime(val) => values.datetime_values.push(val.val()),
|
||||
Value::Timestamp(val) => values
|
||||
.ts_millis_values
|
||||
.push(val.convert_to(TimeUnit::Millisecond)),
|
||||
Value::Timestamp(val) => match val.unit() {
|
||||
TimeUnit::Second => values.ts_second_values.push(val.value()),
|
||||
TimeUnit::Millisecond => values.ts_millisecond_values.push(val.value()),
|
||||
TimeUnit::Microsecond => values.ts_microsecond_values.push(val.value()),
|
||||
TimeUnit::Nanosecond => values.ts_nanosecond_values.push(val.value()),
|
||||
},
|
||||
Value::List(_) => unreachable!(),
|
||||
});
|
||||
self.null_mask = null_mask.into_vec();
|
||||
@@ -200,7 +230,10 @@ impl Column {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::vectors::BooleanVector;
|
||||
use datatypes::vectors::{
|
||||
BooleanVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -258,8 +291,8 @@ mod tests {
|
||||
let values = values.datetime_values;
|
||||
assert_eq!(2, values.capacity());
|
||||
|
||||
let values = Values::with_capacity(ColumnDataType::Timestamp, 2);
|
||||
let values = values.ts_millis_values;
|
||||
let values = Values::with_capacity(ColumnDataType::TimestampMillisecond, 2);
|
||||
let values = values.ts_millisecond_values;
|
||||
assert_eq!(2, values.capacity());
|
||||
}
|
||||
|
||||
@@ -326,8 +359,8 @@ mod tests {
|
||||
ColumnDataTypeWrapper(ColumnDataType::Datetime).into()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ColumnDataTypeWrapper(ColumnDataType::Timestamp).into()
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond).into()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -394,8 +427,8 @@ mod tests {
|
||||
ConcreteDataType::datetime_datatype().try_into().unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ColumnDataTypeWrapper(ColumnDataType::Timestamp),
|
||||
ConcreteDataType::timestamp_millis_datatype()
|
||||
ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond),
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
.try_into()
|
||||
.unwrap()
|
||||
);
|
||||
@@ -412,7 +445,48 @@ mod tests {
|
||||
assert!(result.is_err());
|
||||
assert_eq!(
|
||||
result.unwrap_err().to_string(),
|
||||
"Failed to create column datatype from List(ListType { inner: Boolean(BooleanType) })"
|
||||
"Failed to create column datatype from List(ListType { item_type: Boolean(BooleanType) })"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_put_timestamp_values() {
|
||||
let mut column = Column {
|
||||
column_name: "test".to_string(),
|
||||
semantic_type: 0,
|
||||
values: Some(Values {
|
||||
..Default::default()
|
||||
}),
|
||||
null_mask: vec![],
|
||||
datatype: 0,
|
||||
};
|
||||
|
||||
let vector = Arc::new(TimestampNanosecondVector::from_vec(vec![1, 2, 3]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![1, 2, 3],
|
||||
column.values.as_ref().unwrap().ts_nanosecond_values
|
||||
);
|
||||
|
||||
let vector = Arc::new(TimestampMillisecondVector::from_vec(vec![4, 5, 6]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![4, 5, 6],
|
||||
column.values.as_ref().unwrap().ts_millisecond_values
|
||||
);
|
||||
|
||||
let vector = Arc::new(TimestampMicrosecondVector::from_vec(vec![7, 8, 9]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![7, 8, 9],
|
||||
column.values.as_ref().unwrap().ts_microsecond_values
|
||||
);
|
||||
|
||||
let vector = Arc::new(TimestampSecondVector::from_vec(vec![10, 11, 12]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![10, 11, 12],
|
||||
column.values.as_ref().unwrap().ts_second_values
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
pub use prost::DecodeError;
|
||||
use prost::Message;
|
||||
|
||||
use crate::v1::codec::{InsertBatch, PhysicalPlanNode, RegionNumber, SelectResult};
|
||||
use crate::v1::codec::SelectResult;
|
||||
use crate::v1::meta::TableRouteValue;
|
||||
|
||||
macro_rules! impl_convert_with_bytes {
|
||||
@@ -36,10 +36,7 @@ macro_rules! impl_convert_with_bytes {
|
||||
};
|
||||
}
|
||||
|
||||
impl_convert_with_bytes!(InsertBatch);
|
||||
impl_convert_with_bytes!(SelectResult);
|
||||
impl_convert_with_bytes!(PhysicalPlanNode);
|
||||
impl_convert_with_bytes!(RegionNumber);
|
||||
impl_convert_with_bytes!(TableRouteValue);
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -51,52 +48,6 @@ mod tests {
|
||||
|
||||
const SEMANTIC_TAG: i32 = 0;
|
||||
|
||||
#[test]
|
||||
fn test_convert_insert_batch() {
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let bytes: Vec<u8> = insert_batch.into();
|
||||
let insert: InsertBatch = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(8, insert.row_count);
|
||||
assert_eq!(1, insert.columns.len());
|
||||
|
||||
let column = &insert.columns[0];
|
||||
assert_eq!("foo", column.column_name);
|
||||
assert_eq!(SEMANTIC_TAG, column.semantic_type);
|
||||
assert_eq!(vec![1], column.null_mask);
|
||||
assert_eq!(
|
||||
vec![2, 3, 4, 5, 6, 7, 8],
|
||||
column.values.as_ref().unwrap().i32_values
|
||||
);
|
||||
}
|
||||
|
||||
#[should_panic]
|
||||
#[test]
|
||||
fn test_convert_insert_batch_wrong() {
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let mut bytes: Vec<u8> = insert_batch.into();
|
||||
|
||||
// modify some bytes
|
||||
bytes[0] = 0b1;
|
||||
bytes[1] = 0b1;
|
||||
|
||||
let insert: InsertBatch = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(8, insert.row_count);
|
||||
assert_eq!(1, insert.columns.len());
|
||||
|
||||
let column = &insert.columns[0];
|
||||
assert_eq!("foo", column.column_name);
|
||||
assert_eq!(SEMANTIC_TAG, column.semantic_type);
|
||||
assert_eq!(vec![1], column.null_mask);
|
||||
assert_eq!(
|
||||
vec![2, 3, 4, 5, 6, 7, 8],
|
||||
column.values.as_ref().unwrap().i32_values
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_select_result() {
|
||||
let select_result = mock_select_result();
|
||||
@@ -143,35 +94,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_region_id() {
|
||||
let region_id = RegionNumber { id: 12 };
|
||||
|
||||
let bytes: Vec<u8> = region_id.into();
|
||||
let region_id: RegionNumber = bytes.deref().try_into().unwrap();
|
||||
|
||||
assert_eq!(12, region_id.id);
|
||||
}
|
||||
|
||||
fn mock_insert_batch() -> InsertBatch {
|
||||
let values = column::Values {
|
||||
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
|
||||
..Default::default()
|
||||
};
|
||||
let null_mask = vec![1];
|
||||
let column = Column {
|
||||
column_name: "foo".to_string(),
|
||||
semantic_type: SEMANTIC_TAG,
|
||||
values: Some(values),
|
||||
null_mask,
|
||||
..Default::default()
|
||||
};
|
||||
InsertBatch {
|
||||
columns: vec![column],
|
||||
row_count: 8,
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_select_result() -> SelectResult {
|
||||
let values = column::Values {
|
||||
i32_values: vec![2, 3, 4, 5, 6, 7, 8],
|
||||
|
||||
@@ -145,10 +145,12 @@ gen_set_header!(HeartbeatRequest);
|
||||
gen_set_header!(RouteRequest);
|
||||
gen_set_header!(CreateRequest);
|
||||
gen_set_header!(RangeRequest);
|
||||
gen_set_header!(DeleteRequest);
|
||||
gen_set_header!(PutRequest);
|
||||
gen_set_header!(BatchPutRequest);
|
||||
gen_set_header!(CompareAndPutRequest);
|
||||
gen_set_header!(DeleteRangeRequest);
|
||||
gen_set_header!(MoveValueRequest);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
@@ -19,15 +19,12 @@ common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
lazy_static = "1.4"
|
||||
meta-client = { path = "../meta-client" }
|
||||
opendal = "0.20"
|
||||
regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
@@ -39,9 +36,8 @@ tokio = { version = "1.18", features = ["full"] }
|
||||
[dev-dependencies]
|
||||
chrono = "0.4"
|
||||
log-store = { path = "../log-store" }
|
||||
mito = { path = "../mito", features = ["test"] }
|
||||
object-store = { path = "../object-store" }
|
||||
opendal = "0.20"
|
||||
storage = { path = "../storage" }
|
||||
mito = { path = "../mito" }
|
||||
tempdir = "0.3"
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::any::Any;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::prelude::{Snafu, StatusCode};
|
||||
use datafusion::error::DataFusionError;
|
||||
use datatypes::arrow;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::RawSchema;
|
||||
use snafu::{Backtrace, ErrorCompat};
|
||||
|
||||
@@ -51,14 +51,12 @@ pub enum Error {
|
||||
SystemCatalog { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display(
|
||||
"System catalog table type mismatch, expected: binary, found: {:?} source: {}",
|
||||
"System catalog table type mismatch, expected: binary, found: {:?}",
|
||||
data_type,
|
||||
source
|
||||
))]
|
||||
SystemCatalogTypeMismatch {
|
||||
data_type: arrow::datatypes::DataType,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
data_type: ConcreteDataType,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid system catalog entry type: {:?}", entry_type))]
|
||||
@@ -94,7 +92,7 @@ pub enum Error {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Table {} already exists", table))]
|
||||
#[snafu(display("Table `{}` already exists", table))]
|
||||
TableExists { table: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Schema {} already exists", schema))]
|
||||
@@ -109,6 +107,12 @@ pub enum Error {
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Operation {} not implemented yet", operation))]
|
||||
Unimplemented {
|
||||
operation: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to open table, table info: {}, source: {}", table_info, source))]
|
||||
OpenTable {
|
||||
table_info: String,
|
||||
@@ -216,11 +220,13 @@ impl ErrorExt for Error {
|
||||
| Error::ValueDeserialize { .. }
|
||||
| Error::Io { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
Error::RegisterTable { .. } | Error::SystemCatalogTypeMismatch { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
|
||||
Error::ReadSystemCatalog { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
|
||||
Error::InvalidCatalogValue { source, .. } => source.status_code(),
|
||||
|
||||
Error::RegisterTable { .. } => StatusCode::Internal,
|
||||
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
|
||||
Error::SchemaExists { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
@@ -235,6 +241,8 @@ impl ErrorExt for Error {
|
||||
Error::InvalidTableSchema { source, .. } => source.status_code(),
|
||||
Error::InvalidTableInfoInCatalog { .. } => StatusCode::Unexpected,
|
||||
Error::Internal { source, .. } => source.status_code(),
|
||||
|
||||
Error::Unimplemented { .. } => StatusCode::Unsupported,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -256,7 +264,6 @@ impl From<Error> for DataFusionError {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_error::mock::MockError;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use snafu::GenerateImplicitData;
|
||||
|
||||
use super::*;
|
||||
@@ -305,11 +312,8 @@ mod tests {
|
||||
assert_eq!(
|
||||
StatusCode::Internal,
|
||||
Error::SystemCatalogTypeMismatch {
|
||||
data_type: DataType::Boolean,
|
||||
source: datatypes::error::Error::UnsupportedArrowType {
|
||||
arrow_type: DataType::Boolean,
|
||||
backtrace: Backtrace::generate()
|
||||
}
|
||||
data_type: ConcreteDataType::binary_datatype(),
|
||||
backtrace: Backtrace::generate(),
|
||||
}
|
||||
.status_code()
|
||||
);
|
||||
|
||||
@@ -15,44 +15,56 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
use common_catalog::error::{
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
|
||||
};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::{RawTableInfo, TableId, TableVersion};
|
||||
|
||||
use crate::consts::{
|
||||
CATALOG_KEY_PREFIX, SCHEMA_KEY_PREFIX, TABLE_GLOBAL_KEY_PREFIX, TABLE_REGIONAL_KEY_PREFIX,
|
||||
};
|
||||
use crate::error::{
|
||||
DeserializeCatalogEntryValueSnafu, Error, InvalidCatalogSnafu, SerializeCatalogEntryValueSnafu,
|
||||
};
|
||||
const CATALOG_KEY_PREFIX: &str = "__c";
|
||||
const SCHEMA_KEY_PREFIX: &str = "__s";
|
||||
const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
|
||||
const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
|
||||
|
||||
const ALPHANUMERICS_NAME_PATTERN: &str = "[a-zA-Z_][a-zA-Z0-9_]*";
|
||||
|
||||
lazy_static! {
|
||||
static ref CATALOG_KEY_PATTERN: Regex =
|
||||
Regex::new(&format!("^{}-([a-zA-Z_]+)$", CATALOG_KEY_PREFIX)).unwrap();
|
||||
static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-({})$",
|
||||
CATALOG_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)$",
|
||||
SCHEMA_KEY_PREFIX
|
||||
"^{}-({})-({})$",
|
||||
SCHEMA_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN, ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z0-9_]+)$",
|
||||
TABLE_GLOBAL_KEY_PREFIX
|
||||
"^{}-({})-({})-({})$",
|
||||
TABLE_GLOBAL_KEY_PREFIX,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||
"^{}-([a-zA-Z_]+)-([a-zA-Z_]+)-([a-zA-Z0-9_]+)-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX
|
||||
"^{}-({})-({})-({})-([0-9]+)$",
|
||||
TABLE_REGIONAL_KEY_PREFIX,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN,
|
||||
ALPHANUMERICS_NAME_PATTERN
|
||||
))
|
||||
.unwrap();
|
||||
}
|
||||
@@ -126,7 +138,7 @@ impl TableGlobalKey {
|
||||
|
||||
/// Table global info contains necessary info for a datanode to create table regions, including
|
||||
/// table id, table meta(schema...), region id allocation across datanodes.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct TableGlobalValue {
|
||||
/// Id of datanode that created the global table info kv. only for debugging.
|
||||
pub node_id: u64,
|
||||
@@ -261,6 +273,10 @@ macro_rules! define_catalog_value {
|
||||
.context(DeserializeCatalogEntryValueSnafu { raw: s.as_ref() })
|
||||
}
|
||||
|
||||
pub fn from_bytes(bytes: impl AsRef<[u8]>) -> Result<Self, Error> {
|
||||
Self::parse(&String::from_utf8_lossy(bytes.as_ref()))
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> Result<Vec<u8>, Error> {
|
||||
Ok(serde_json::to_string(self)
|
||||
.context(SerializeCatalogEntryValueSnafu)?
|
||||
@@ -15,6 +15,7 @@
|
||||
#![feature(assert_matches)]
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::info;
|
||||
@@ -28,6 +29,7 @@ use crate::error::{CreateTableSnafu, Result};
|
||||
pub use crate::schema::{SchemaProvider, SchemaProviderRef};
|
||||
|
||||
pub mod error;
|
||||
pub mod helper;
|
||||
pub mod local;
|
||||
pub mod remote;
|
||||
pub mod schema;
|
||||
@@ -83,12 +85,17 @@ pub trait CatalogManager: CatalogList {
|
||||
/// Starts a catalog manager.
|
||||
async fn start(&self) -> Result<()>;
|
||||
|
||||
/// Registers a table given given catalog/schema to catalog manager,
|
||||
/// returns table registered.
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize>;
|
||||
/// Registers a table within given catalog/schema to catalog manager,
|
||||
/// returns whether the table registered.
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool>;
|
||||
|
||||
/// Register a schema with catalog name and schema name.
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize>;
|
||||
/// Deregisters a table within given catalog/schema to catalog manager,
|
||||
/// returns whether the table deregistered.
|
||||
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool>;
|
||||
|
||||
/// Register a schema with catalog name and schema name. Retuens whether the
|
||||
/// schema registered.
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool>;
|
||||
|
||||
/// Register a system table, should be called before starting the manager.
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest)
|
||||
@@ -123,6 +130,25 @@ pub struct RegisterTableRequest {
|
||||
pub table: TableRef,
|
||||
}
|
||||
|
||||
impl Debug for RegisterTableRequest {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("RegisterTableRequest")
|
||||
.field("catalog", &self.catalog)
|
||||
.field("schema", &self.schema)
|
||||
.field("table_name", &self.table_name)
|
||||
.field("table_id", &self.table_id)
|
||||
.field("table", &self.table.table_info())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DeregisterTableRequest {
|
||||
pub catalog: String,
|
||||
pub schema: String,
|
||||
pub table_name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RegisterSchemaRequest {
|
||||
pub catalog: String,
|
||||
|
||||
@@ -21,7 +21,7 @@ use common_catalog::consts::{
|
||||
SYSTEM_CATALOG_NAME, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
|
||||
use common_telemetry::info;
|
||||
use common_telemetry::{error, info};
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::vectors::{BinaryVector, UInt8Vector};
|
||||
use futures_util::lock::Mutex;
|
||||
@@ -36,7 +36,7 @@ use table::TableRef;
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, IllegalManagerStateSnafu, OpenTableSnafu, ReadSystemCatalogSnafu, Result,
|
||||
SchemaExistsSnafu, SchemaNotFoundSnafu, SystemCatalogSnafu, SystemCatalogTypeMismatchSnafu,
|
||||
TableExistsSnafu, TableNotFoundSnafu,
|
||||
TableExistsSnafu, TableNotFoundSnafu, UnimplementedSnafu,
|
||||
};
|
||||
use crate::local::memory::{MemoryCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use crate::system::{
|
||||
@@ -46,8 +46,8 @@ use crate::system::{
|
||||
use crate::tables::SystemCatalog;
|
||||
use crate::{
|
||||
format_full_table_name, handle_system_table_request, CatalogList, CatalogManager,
|
||||
CatalogProvider, CatalogProviderRef, RegisterSchemaRequest, RegisterSystemTableRequest,
|
||||
RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
CatalogProvider, CatalogProviderRef, DeregisterTableRequest, RegisterSchemaRequest,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// A `CatalogManager` consists of a system catalog and a bunch of user catalogs.
|
||||
@@ -57,6 +57,7 @@ pub struct LocalCatalogManager {
|
||||
engine: TableEngineRef,
|
||||
next_table_id: AtomicU32,
|
||||
init_lock: Mutex<bool>,
|
||||
register_lock: Mutex<()>,
|
||||
system_table_requests: Mutex<Vec<RegisterSystemTableRequest>>,
|
||||
}
|
||||
|
||||
@@ -76,6 +77,7 @@ impl LocalCatalogManager {
|
||||
engine,
|
||||
next_table_id: AtomicU32::new(MIN_USER_TABLE_ID),
|
||||
init_lock: Mutex::new(false),
|
||||
register_lock: Mutex::new(()),
|
||||
system_table_requests: Mutex::new(Vec::default()),
|
||||
})
|
||||
}
|
||||
@@ -143,27 +145,34 @@ impl LocalCatalogManager {
|
||||
/// Convert `RecordBatch` to a vector of `Entry`.
|
||||
fn record_batch_to_entry(rb: RecordBatch) -> Result<Vec<Entry>> {
|
||||
ensure!(
|
||||
rb.df_recordbatch.columns().len() >= 6,
|
||||
rb.num_columns() >= 6,
|
||||
SystemCatalogSnafu {
|
||||
msg: format!("Length mismatch: {}", rb.df_recordbatch.columns().len())
|
||||
msg: format!("Length mismatch: {}", rb.num_columns())
|
||||
}
|
||||
);
|
||||
|
||||
let entry_type = UInt8Vector::try_from_arrow_array(&rb.df_recordbatch.columns()[0])
|
||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.df_recordbatch.columns()[ENTRY_TYPE_INDEX]
|
||||
.data_type()
|
||||
.clone(),
|
||||
let entry_type = rb
|
||||
.column(ENTRY_TYPE_INDEX)
|
||||
.as_any()
|
||||
.downcast_ref::<UInt8Vector>()
|
||||
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.column(ENTRY_TYPE_INDEX).data_type(),
|
||||
})?;
|
||||
|
||||
let key = BinaryVector::try_from_arrow_array(&rb.df_recordbatch.columns()[1])
|
||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.df_recordbatch.columns()[KEY_INDEX].data_type().clone(),
|
||||
let key = rb
|
||||
.column(KEY_INDEX)
|
||||
.as_any()
|
||||
.downcast_ref::<BinaryVector>()
|
||||
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.column(KEY_INDEX).data_type(),
|
||||
})?;
|
||||
|
||||
let value = BinaryVector::try_from_arrow_array(&rb.df_recordbatch.columns()[3])
|
||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.df_recordbatch.columns()[VALUE_INDEX].data_type().clone(),
|
||||
let value = rb
|
||||
.column(VALUE_INDEX)
|
||||
.as_any()
|
||||
.downcast_ref::<BinaryVector>()
|
||||
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.column(VALUE_INDEX).data_type(),
|
||||
})?;
|
||||
|
||||
let mut res = Vec::with_capacity(rb.num_rows());
|
||||
@@ -309,7 +318,7 @@ impl CatalogManager for LocalCatalogManager {
|
||||
self.init().await
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let started = self.init_lock.lock().await;
|
||||
|
||||
ensure!(
|
||||
@@ -332,27 +341,50 @@ impl CatalogManager for LocalCatalogManager {
|
||||
schema_info: format!("{}.{}", catalog_name, schema_name),
|
||||
})?;
|
||||
|
||||
if schema.table_exist(&request.table_name)? {
|
||||
return TableExistsSnafu {
|
||||
table: format_full_table_name(catalog_name, schema_name, &request.table_name),
|
||||
{
|
||||
let _lock = self.register_lock.lock().await;
|
||||
if let Some(existing) = schema.table(&request.table_name)? {
|
||||
if existing.table_info().ident.table_id != request.table_id {
|
||||
error!(
|
||||
"Unexpected table register request: {:?}, existing: {:?}",
|
||||
request,
|
||||
existing.table_info()
|
||||
);
|
||||
return TableExistsSnafu {
|
||||
table: format_full_table_name(
|
||||
catalog_name,
|
||||
schema_name,
|
||||
&request.table_name,
|
||||
),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
// Try to register table with same table id, just ignore.
|
||||
Ok(false)
|
||||
} else {
|
||||
// table does not exist
|
||||
self.system
|
||||
.register_table(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
request.table_name.clone(),
|
||||
request.table_id,
|
||||
)
|
||||
.await?;
|
||||
schema.register_table(request.table_name, request.table)?;
|
||||
Ok(true)
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
|
||||
self.system
|
||||
.register_table(
|
||||
catalog_name.clone(),
|
||||
schema_name.clone(),
|
||||
request.table_name.clone(),
|
||||
request.table_id,
|
||||
)
|
||||
.await?;
|
||||
|
||||
schema.register_table(request.table_name, request.table)?;
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
|
||||
UnimplementedSnafu {
|
||||
operation: "deregister table",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let started = self.init_lock.lock().await;
|
||||
ensure!(
|
||||
*started,
|
||||
@@ -367,17 +399,21 @@ impl CatalogManager for LocalCatalogManager {
|
||||
.catalogs
|
||||
.catalog(catalog_name)?
|
||||
.context(CatalogNotFoundSnafu { catalog_name })?;
|
||||
if catalog.schema(schema_name)?.is_some() {
|
||||
return SchemaExistsSnafu {
|
||||
schema: schema_name,
|
||||
}
|
||||
.fail();
|
||||
|
||||
{
|
||||
let _lock = self.register_lock.lock().await;
|
||||
ensure!(
|
||||
catalog.schema(schema_name)?.is_none(),
|
||||
SchemaExistsSnafu {
|
||||
schema: schema_name,
|
||||
}
|
||||
);
|
||||
self.system
|
||||
.register_schema(request.catalog, schema_name.clone())
|
||||
.await?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(true)
|
||||
}
|
||||
self.system
|
||||
.register_schema(request.catalog, schema_name.clone())
|
||||
.await?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(1)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
|
||||
|
||||
@@ -19,6 +19,7 @@ use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use common_catalog::consts::MIN_USER_TABLE_ID;
|
||||
use common_telemetry::error;
|
||||
use snafu::OptionExt;
|
||||
use table::metadata::TableId;
|
||||
use table::table::TableIdProvider;
|
||||
@@ -27,8 +28,8 @@ use table::TableRef;
|
||||
use crate::error::{CatalogNotFoundSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu};
|
||||
use crate::schema::SchemaProvider;
|
||||
use crate::{
|
||||
CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, RegisterSchemaRequest,
|
||||
RegisterSystemTableRequest, RegisterTableRequest, SchemaProviderRef,
|
||||
CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef, DeregisterTableRequest,
|
||||
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// Simple in-memory list of catalogs
|
||||
@@ -69,7 +70,7 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
@@ -84,10 +85,28 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
})?;
|
||||
schema
|
||||
.register_table(request.table_name, request.table)
|
||||
.map(|v| if v.is_some() { 0 } else { 1 })
|
||||
.map(|v| v.is_none())
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, request: DeregisterTableRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
.context(CatalogNotFoundSnafu {
|
||||
catalog_name: &request.catalog,
|
||||
})?
|
||||
.clone();
|
||||
let schema = catalog
|
||||
.schema(&request.schema)?
|
||||
.with_context(|| SchemaNotFoundSnafu {
|
||||
schema_info: format!("{}.{}", &request.catalog, &request.schema),
|
||||
})?;
|
||||
schema
|
||||
.deregister_table(&request.table_name)
|
||||
.map(|v| v.is_some())
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let catalogs = self.catalogs.write().unwrap();
|
||||
let catalog = catalogs
|
||||
.get(&request.catalog)
|
||||
@@ -95,11 +114,12 @@ impl CatalogManager for MemoryCatalogManager {
|
||||
catalog_name: &request.catalog,
|
||||
})?;
|
||||
catalog.register_schema(request.schema, Arc::new(MemorySchemaProvider::new()))?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, _request: RegisterSystemTableRequest) -> Result<()> {
|
||||
unimplemented!()
|
||||
// TODO(ruihang): support register system table request
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn schema(&self, catalog: &str, schema: &str) -> Result<Option<SchemaProviderRef>> {
|
||||
@@ -251,11 +271,21 @@ impl SchemaProvider for MemorySchemaProvider {
|
||||
}
|
||||
|
||||
fn register_table(&self, name: String, table: TableRef) -> Result<Option<TableRef>> {
|
||||
if self.table_exist(name.as_str())? {
|
||||
return TableExistsSnafu { table: name }.fail()?;
|
||||
}
|
||||
let mut tables = self.tables.write().unwrap();
|
||||
Ok(tables.insert(name, table))
|
||||
if let Some(existing) = tables.get(name.as_str()) {
|
||||
// if table with the same name but different table id exists, then it's a fatal bug
|
||||
if existing.table_info().ident.table_id != table.table_info().ident.table_id {
|
||||
error!(
|
||||
"Unexpected table register: {:?}, existing: {:?}",
|
||||
table.table_info(),
|
||||
existing.table_info()
|
||||
);
|
||||
return TableExistsSnafu { table: name }.fail()?;
|
||||
}
|
||||
Ok(Some(existing.clone()))
|
||||
} else {
|
||||
Ok(tables.insert(name, table))
|
||||
}
|
||||
}
|
||||
|
||||
fn deregister_table(&self, name: &str) -> Result<Option<TableRef>> {
|
||||
@@ -315,7 +345,7 @@ mod tests {
|
||||
.unwrap()
|
||||
.is_none());
|
||||
assert!(provider.table_exist(table_name).unwrap());
|
||||
let other_table = NumbersTable::default();
|
||||
let other_table = NumbersTable::new(12);
|
||||
let result = provider.register_table(table_name.to_string(), Arc::new(other_table));
|
||||
let err = result.err().unwrap();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
@@ -340,4 +370,34 @@ mod tests {
|
||||
.downcast_ref::<MemoryCatalogManager>()
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
pub async fn test_catalog_deregister_table() {
|
||||
let catalog = MemoryCatalogManager::default();
|
||||
let schema = catalog
|
||||
.schema(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME)
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
let register_table_req = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "numbers".to_string(),
|
||||
table_id: 2333,
|
||||
table: Arc::new(NumbersTable::default()),
|
||||
};
|
||||
catalog.register_table(register_table_req).await.unwrap();
|
||||
assert!(schema.table_exist("numbers").unwrap());
|
||||
|
||||
let deregister_table_req = DeregisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "numbers".to_string(),
|
||||
};
|
||||
catalog
|
||||
.deregister_table(deregister_table_req)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!schema.table_exist("numbers").unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,10 +20,6 @@ use std::sync::Arc;
|
||||
use arc_swap::ArcSwap;
|
||||
use async_stream::stream;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, MIN_USER_TABLE_ID};
|
||||
use common_catalog::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
use common_telemetry::{debug, info};
|
||||
use futures::Stream;
|
||||
use futures_util::StreamExt;
|
||||
@@ -37,13 +33,17 @@ use tokio::sync::Mutex;
|
||||
|
||||
use crate::error::{
|
||||
CatalogNotFoundSnafu, CreateTableSnafu, InvalidCatalogValueSnafu, InvalidTableSchemaSnafu,
|
||||
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu,
|
||||
OpenTableSnafu, Result, SchemaNotFoundSnafu, TableExistsSnafu, UnimplementedSnafu,
|
||||
};
|
||||
use crate::helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix, CatalogKey, CatalogValue,
|
||||
SchemaKey, SchemaValue, TableGlobalKey, TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
use crate::remote::{Kv, KvBackendRef};
|
||||
use crate::{
|
||||
handle_system_table_request, CatalogList, CatalogManager, CatalogProvider, CatalogProviderRef,
|
||||
RegisterSchemaRequest, RegisterSystemTableRequest, RegisterTableRequest, SchemaProvider,
|
||||
SchemaProviderRef,
|
||||
DeregisterTableRequest, RegisterSchemaRequest, RegisterSystemTableRequest,
|
||||
RegisterTableRequest, SchemaProvider, SchemaProviderRef,
|
||||
};
|
||||
|
||||
/// Catalog manager based on metasrv.
|
||||
@@ -154,8 +154,8 @@ impl RemoteCatalogManager {
|
||||
}
|
||||
let table_key = TableGlobalKey::parse(&String::from_utf8_lossy(&k))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
let table_value = TableGlobalValue::parse(&String::from_utf8_lossy(&v))
|
||||
.context(InvalidCatalogValueSnafu)?;
|
||||
let table_value =
|
||||
TableGlobalValue::from_bytes(&v).context(InvalidCatalogValueSnafu)?;
|
||||
|
||||
info!(
|
||||
"Found catalog table entry, key: {}, value: {:?}",
|
||||
@@ -411,7 +411,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<usize> {
|
||||
async fn register_table(&self, request: RegisterTableRequest) -> Result<bool> {
|
||||
let catalog_name = request.catalog;
|
||||
let schema_name = request.schema;
|
||||
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
|
||||
@@ -430,10 +430,17 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
.fail();
|
||||
}
|
||||
schema_provider.register_table(request.table_name, request.table)?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<usize> {
|
||||
async fn deregister_table(&self, _request: DeregisterTableRequest) -> Result<bool> {
|
||||
UnimplementedSnafu {
|
||||
operation: "deregister table",
|
||||
}
|
||||
.fail()
|
||||
}
|
||||
|
||||
async fn register_schema(&self, request: RegisterSchemaRequest) -> Result<bool> {
|
||||
let catalog_name = request.catalog;
|
||||
let schema_name = request.schema;
|
||||
let catalog_provider = self.catalog(&catalog_name)?.context(CatalogNotFoundSnafu {
|
||||
@@ -441,7 +448,7 @@ impl CatalogManager for RemoteCatalogManager {
|
||||
})?;
|
||||
let schema_provider = self.new_schema_provider(&catalog_name, &schema_name);
|
||||
catalog_provider.register_schema(schema_name, schema_provider)?;
|
||||
Ok(1)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn register_system_table(&self, request: RegisterSystemTableRequest) -> Result<()> {
|
||||
|
||||
@@ -21,14 +21,13 @@ use common_catalog::consts::{
|
||||
SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::{PhysicalPlanRef, RuntimeEnv};
|
||||
use common_query::physical_plan::{PhysicalPlanRef, SessionContext};
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use common_time::util;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVector};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaBuilder, SchemaRef};
|
||||
use datatypes::vectors::{BinaryVector, TimestampVector, UInt8Vector};
|
||||
use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
@@ -43,7 +42,6 @@ use crate::error::{
|
||||
|
||||
pub const ENTRY_TYPE_INDEX: usize = 0;
|
||||
pub const KEY_INDEX: usize = 1;
|
||||
pub const TIMESTAMP_INDEX: usize = 2;
|
||||
pub const VALUE_INDEX: usize = 3;
|
||||
|
||||
pub struct SystemCatalogTable {
|
||||
@@ -111,7 +109,7 @@ impl SystemCatalogTable {
|
||||
desc: Some("System catalog table".to_string()),
|
||||
schema: schema.clone(),
|
||||
region_numbers: vec![0],
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX, TIMESTAMP_INDEX],
|
||||
primary_key_indices: vec![ENTRY_TYPE_INDEX, KEY_INDEX],
|
||||
create_if_not_exists: true,
|
||||
table_options: HashMap::new(),
|
||||
};
|
||||
@@ -128,13 +126,14 @@ impl SystemCatalogTable {
|
||||
/// Create a stream of all entries inside system catalog table
|
||||
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
|
||||
let full_projection = None;
|
||||
let ctx = SessionContext::new();
|
||||
let scan = self
|
||||
.table
|
||||
.scan(&full_projection, &[], None)
|
||||
.await
|
||||
.context(error::SystemCatalogTableScanSnafu)?;
|
||||
let stream = scan
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.execute(0, ctx.task_ctx())
|
||||
.context(error::SystemCatalogTableScanExecSnafu)?;
|
||||
Ok(stream)
|
||||
}
|
||||
@@ -162,7 +161,7 @@ fn build_system_catalog_schema() -> Schema {
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"timestamp".to_string(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
@@ -173,12 +172,12 @@ fn build_system_catalog_schema() -> Schema {
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"gmt_created".to_string(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"gmt_modified".to_string(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
];
|
||||
@@ -223,7 +222,7 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
|
||||
// Timestamp in key part is intentionally left to 0
|
||||
columns_values.insert(
|
||||
"timestamp".to_string(),
|
||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(0)])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[0])) as _,
|
||||
);
|
||||
|
||||
columns_values.insert(
|
||||
@@ -231,18 +230,15 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
|
||||
Arc::new(BinaryVector::from_slice(&[value])) as _,
|
||||
);
|
||||
|
||||
let now = util::current_time_millis();
|
||||
columns_values.insert(
|
||||
"gmt_created".to_string(),
|
||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(
|
||||
util::current_time_millis(),
|
||||
)])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
|
||||
);
|
||||
|
||||
columns_values.insert(
|
||||
"gmt_modified".to_string(),
|
||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(
|
||||
util::current_time_millis(),
|
||||
)])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
|
||||
);
|
||||
|
||||
InsertRequest {
|
||||
@@ -456,7 +452,7 @@ mod tests {
|
||||
pub async fn prepare_table_engine() -> (TempDir, TableEngineRef) {
|
||||
let dir = TempDir::new("system-table-test").unwrap();
|
||||
let store_dir = dir.path().to_string_lossy();
|
||||
let accessor = opendal::services::fs::Builder::default()
|
||||
let accessor = object_store::backend::fs::Builder::default()
|
||||
.root(&store_dir)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
@@ -26,9 +26,9 @@ use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_recordbatch::error::Result as RecordBatchResult;
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream};
|
||||
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::value::ValueRef;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use futures::Stream;
|
||||
use snafu::ResultExt;
|
||||
@@ -149,26 +149,33 @@ fn tables_to_record_batch(
|
||||
engine: &str,
|
||||
) -> Vec<VectorRef> {
|
||||
let mut catalog_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut schema_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut table_name_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut engine_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
|
||||
for table_name in table_names {
|
||||
catalog_vec.push(&Value::String(catalog_name.into()));
|
||||
schema_vec.push(&Value::String(schema_name.into()));
|
||||
table_name_vec.push(&Value::String(table_name.into()));
|
||||
engine_vec.push(&Value::String(engine.into()));
|
||||
// Safety: All these vectors are string type.
|
||||
catalog_vec
|
||||
.push_value_ref(ValueRef::String(catalog_name))
|
||||
.unwrap();
|
||||
schema_vec
|
||||
.push_value_ref(ValueRef::String(schema_name))
|
||||
.unwrap();
|
||||
table_name_vec
|
||||
.push_value_ref(ValueRef::String(&table_name))
|
||||
.unwrap();
|
||||
engine_vec.push_value_ref(ValueRef::String(engine)).unwrap();
|
||||
}
|
||||
|
||||
vec![
|
||||
catalog_vec.finish(),
|
||||
schema_vec.finish(),
|
||||
table_name_vec.finish(),
|
||||
engine_vec.finish(),
|
||||
catalog_vec.to_vector(),
|
||||
schema_vec.to_vector(),
|
||||
table_name_vec.to_vector(),
|
||||
engine_vec.to_vector(),
|
||||
]
|
||||
}
|
||||
|
||||
@@ -340,9 +347,7 @@ fn build_schema_for_tables() -> Schema {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_query::physical_plan::RuntimeEnv;
|
||||
use datatypes::arrow::array::Utf8Array;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use common_query::physical_plan::SessionContext;
|
||||
use futures_util::StreamExt;
|
||||
use table::table::numbers::NumbersTable;
|
||||
|
||||
@@ -366,56 +371,47 @@ mod tests {
|
||||
|
||||
let tables = Tables::new(catalog_list, "test_engine".to_string());
|
||||
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
|
||||
let mut tables_stream = tables_stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.unwrap();
|
||||
let session_ctx = SessionContext::new();
|
||||
let mut tables_stream = tables_stream.execute(0, session_ctx.task_ctx()).unwrap();
|
||||
|
||||
if let Some(t) = tables_stream.next().await {
|
||||
let batch = t.unwrap().df_recordbatch;
|
||||
let batch = t.unwrap();
|
||||
assert_eq!(1, batch.num_rows());
|
||||
assert_eq!(4, batch.num_columns());
|
||||
assert_eq!(&DataType::Utf8, batch.column(0).data_type());
|
||||
assert_eq!(&DataType::Utf8, batch.column(1).data_type());
|
||||
assert_eq!(&DataType::Utf8, batch.column(2).data_type());
|
||||
assert_eq!(&DataType::Utf8, batch.column(3).data_type());
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(0).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(1).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(2).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(3).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
"greptime",
|
||||
batch
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(0).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"public",
|
||||
batch
|
||||
.column(1)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(1).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"test_table",
|
||||
batch
|
||||
.column(2)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(2).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"test_engine",
|
||||
batch
|
||||
.column(3)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(3).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
} else {
|
||||
panic!("Record batch should not be empty!")
|
||||
|
||||
132
src/catalog/tests/local_catalog_tests.rs
Normal file
132
src/catalog/tests/local_catalog_tests.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::local::LocalCatalogManager;
|
||||
use catalog::{CatalogManager, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_telemetry::{error, info};
|
||||
use mito::config::EngineConfig;
|
||||
use table::table::numbers::NumbersTable;
|
||||
use table::TableRef;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
async fn create_local_catalog_manager() -> Result<LocalCatalogManager, catalog::error::Error> {
|
||||
let (_dir, object_store) =
|
||||
mito::table::test_util::new_test_object_store("setup_mock_engine_and_table").await;
|
||||
let mock_engine = Arc::new(mito::table::test_util::MockMitoEngine::new(
|
||||
EngineConfig::default(),
|
||||
mito::table::test_util::MockEngine::default(),
|
||||
object_store,
|
||||
));
|
||||
let catalog_manager = LocalCatalogManager::try_new(mock_engine).await.unwrap();
|
||||
catalog_manager.start().await?;
|
||||
Ok(catalog_manager)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_duplicate_register() {
|
||||
let catalog_manager = create_local_catalog_manager().await.unwrap();
|
||||
let request = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id: 42,
|
||||
table: Arc::new(NumbersTable::new(42)),
|
||||
};
|
||||
assert!(catalog_manager
|
||||
.register_table(request.clone())
|
||||
.await
|
||||
.unwrap());
|
||||
|
||||
// register table with same table id will succeed with 0 as return val.
|
||||
assert!(!catalog_manager.register_table(request).await.unwrap());
|
||||
|
||||
let err = catalog_manager
|
||||
.register_table(RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id: 43,
|
||||
table: Arc::new(NumbersTable::new(43)),
|
||||
})
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(
|
||||
err.to_string()
|
||||
.contains("Table `greptime.public.test_table` already exists"),
|
||||
"Actual error message: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_register() {
|
||||
common_telemetry::init_default_ut_logging();
|
||||
let rt = Arc::new(tokio::runtime::Builder::new_multi_thread().build().unwrap());
|
||||
let catalog_manager =
|
||||
Arc::new(rt.block_on(async { create_local_catalog_manager().await.unwrap() }));
|
||||
|
||||
let succeed: Arc<Mutex<Option<TableRef>>> = Arc::new(Mutex::new(None));
|
||||
|
||||
let mut handles = Vec::with_capacity(8);
|
||||
for i in 0..8 {
|
||||
let catalog = catalog_manager.clone();
|
||||
let succeed = succeed.clone();
|
||||
let handle = rt.spawn(async move {
|
||||
let table_id = 42 + i;
|
||||
let table = Arc::new(NumbersTable::new(table_id));
|
||||
let req = RegisterTableRequest {
|
||||
catalog: DEFAULT_CATALOG_NAME.to_string(),
|
||||
schema: DEFAULT_SCHEMA_NAME.to_string(),
|
||||
table_name: "test_table".to_string(),
|
||||
table_id,
|
||||
table: table.clone(),
|
||||
};
|
||||
match catalog.register_table(req).await {
|
||||
Ok(res) => {
|
||||
if res {
|
||||
let mut succeed = succeed.lock().await;
|
||||
info!("Successfully registered table: {}", table_id);
|
||||
*succeed = Some(table);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
error!("Failed to register table {}", table_id);
|
||||
}
|
||||
}
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
rt.block_on(async move {
|
||||
for handle in handles {
|
||||
handle.await.unwrap();
|
||||
}
|
||||
let guard = succeed.lock().await;
|
||||
let table = guard.as_ref().unwrap();
|
||||
let table_registered = catalog_manager
|
||||
.table(DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME, "test_table")
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
table_registered.table_info().ident.table_id,
|
||||
table.table_info().ident.table_id
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -217,7 +217,7 @@ impl TableEngine for MockTableEngine {
|
||||
&self,
|
||||
_ctx: &EngineContext,
|
||||
_request: DropTableRequest,
|
||||
) -> table::Result<()> {
|
||||
) -> table::Result<bool> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,12 +22,12 @@ mod tests {
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
use catalog::helper::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use catalog::remote::{
|
||||
KvBackend, KvBackendRef, RemoteCatalogManager, RemoteCatalogProvider, RemoteSchemaProvider,
|
||||
};
|
||||
use catalog::{CatalogList, CatalogManager, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_catalog::{CatalogKey, CatalogValue, SchemaKey, SchemaValue};
|
||||
use datatypes::schema::Schema;
|
||||
use futures_util::StreamExt;
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
@@ -202,7 +202,7 @@ mod tests {
|
||||
table_id,
|
||||
table,
|
||||
};
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert!(catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert_eq!(
|
||||
HashSet::from([table_name, "numbers".to_string()]),
|
||||
default_schema
|
||||
@@ -287,7 +287,7 @@ mod tests {
|
||||
.register_schema(schema_name.clone(), schema.clone())
|
||||
.expect("Register schema should not fail");
|
||||
assert!(prev.is_none());
|
||||
assert_eq!(1, catalog_manager.register_table(reg_req).await.unwrap());
|
||||
assert!(catalog_manager.register_table(reg_req).await.unwrap());
|
||||
|
||||
assert_eq!(
|
||||
HashSet::from([schema_name.clone()]),
|
||||
|
||||
@@ -11,13 +11,11 @@ async-stream = "0.3"
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-grpc = { path = "../common/grpc" }
|
||||
common-grpc-expr = { path = "../common/grpc-expr" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-grpc-expr = { path = "../common/grpc-expr" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../datatypes" }
|
||||
enum_dispatch = "0.3"
|
||||
parking_lot = "0.12"
|
||||
|
||||
@@ -12,11 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::*;
|
||||
use client::{Client, Database};
|
||||
|
||||
fn main() {
|
||||
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
|
||||
.unwrap();
|
||||
@@ -29,19 +27,19 @@ async fn run() {
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
let db = Database::new("greptime", client);
|
||||
|
||||
let (columns, row_count) = insert_data();
|
||||
|
||||
let expr = InsertExpr {
|
||||
schema_name: "public".to_string(),
|
||||
table_name: "demo".to_string(),
|
||||
expr: Some(insert_expr::Expr::Values(insert_expr::Values {
|
||||
values: insert_batches(),
|
||||
})),
|
||||
options: HashMap::default(),
|
||||
region_number: 0,
|
||||
columns,
|
||||
row_count,
|
||||
};
|
||||
db.insert(expr).await.unwrap();
|
||||
}
|
||||
|
||||
fn insert_batches() -> Vec<Vec<u8>> {
|
||||
fn insert_data() -> (Vec<Column>, u32) {
|
||||
const SEMANTIC_TAG: i32 = 0;
|
||||
const SEMANTIC_FIELD: i32 = 1;
|
||||
const SEMANTIC_TS: i32 = 2;
|
||||
@@ -101,9 +99,8 @@ fn insert_batches() -> Vec<Vec<u8>> {
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let insert_batch = InsertBatch {
|
||||
columns: vec![host_column, cpu_column, mem_column, ts_column],
|
||||
(
|
||||
vec![host_column, cpu_column, mem_column, ts_column],
|
||||
row_count,
|
||||
};
|
||||
vec![insert_batch.into()]
|
||||
)
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ async fn run() {
|
||||
column_defs: vec![
|
||||
ColumnDef {
|
||||
name: "timestamp".to_string(),
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
},
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use client::{Client, Database};
|
||||
use common_grpc::MockExecution;
|
||||
use datafusion::physical_plan::expressions::Column;
|
||||
use datafusion::physical_plan::projection::ProjectionExec;
|
||||
use datafusion::physical_plan::{ExecutionPlan, PhysicalExpr};
|
||||
use tracing::{event, Level};
|
||||
|
||||
fn main() {
|
||||
tracing::subscriber::set_global_default(tracing_subscriber::FmtSubscriber::builder().finish())
|
||||
.unwrap();
|
||||
|
||||
run();
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn run() {
|
||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||
let db = Database::new("greptime", client);
|
||||
|
||||
let physical = mock_physical_plan();
|
||||
let result = db.physical_plan(physical, None).await;
|
||||
|
||||
event!(Level::INFO, "result: {:#?}", result);
|
||||
}
|
||||
|
||||
fn mock_physical_plan() -> Arc<dyn ExecutionPlan> {
|
||||
let id_expr = Arc::new(Column::new("id", 0)) as Arc<dyn PhysicalExpr>;
|
||||
let age_expr = Arc::new(Column::new("age", 2)) as Arc<dyn PhysicalExpr>;
|
||||
let expr = vec![(id_expr, "id".to_string()), (age_expr, "age".to_string())];
|
||||
|
||||
let input =
|
||||
Arc::new(MockExecution::new("mock_input_exec".to_string())) as Arc<dyn ExecutionPlan>;
|
||||
let projection = ProjectionExec::try_new(expr, input).unwrap();
|
||||
Arc::new(projection)
|
||||
}
|
||||
@@ -58,7 +58,19 @@ impl Admin {
|
||||
header: Some(header),
|
||||
expr: Some(admin_expr::Expr::Alter(expr)),
|
||||
};
|
||||
Ok(self.do_requests(vec![expr]).await?.remove(0))
|
||||
self.do_request(expr).await
|
||||
}
|
||||
|
||||
pub async fn drop_table(&self, expr: DropTableExpr) -> Result<AdminResult> {
|
||||
let header = ExprHeader {
|
||||
version: PROTOCOL_VERSION,
|
||||
};
|
||||
let expr = AdminExpr {
|
||||
header: Some(header),
|
||||
expr: Some(admin_expr::Expr::DropTable(expr)),
|
||||
};
|
||||
|
||||
self.do_request(expr).await
|
||||
}
|
||||
|
||||
/// Invariants: the lengths of input vec (`Vec<AdminExpr>`) and output vec (`Vec<AdminResult>`) are equal.
|
||||
|
||||
@@ -18,22 +18,17 @@ use api::v1::codec::SelectResult as GrpcSelectResult;
|
||||
use api::v1::column::SemanticType;
|
||||
use api::v1::{
|
||||
object_expr, object_result, select_expr, DatabaseRequest, ExprHeader, InsertExpr,
|
||||
MutateResult as GrpcMutateResult, ObjectExpr, ObjectResult as GrpcObjectResult, PhysicalPlan,
|
||||
SelectExpr,
|
||||
MutateResult as GrpcMutateResult, ObjectExpr, ObjectResult as GrpcObjectResult, SelectExpr,
|
||||
};
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_grpc::{AsExecutionPlan, DefaultAsPlanImpl};
|
||||
use common_grpc_expr::column_to_vector;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::physical_plan::ExecutionPlan;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
ColumnToVectorSnafu, ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu, EncodePhysicalSnafu,
|
||||
};
|
||||
use crate::error::{ColumnToVectorSnafu, ConvertSchemaSnafu, DatanodeSnafu, DecodeSelectSnafu};
|
||||
use crate::{error, Client, Result};
|
||||
|
||||
pub const PROTOCOL_VERSION: u32 = 1;
|
||||
@@ -94,24 +89,6 @@ impl Database {
|
||||
self.do_select(select_expr).await
|
||||
}
|
||||
|
||||
pub async fn physical_plan(
|
||||
&self,
|
||||
physical: Arc<dyn ExecutionPlan>,
|
||||
original_ql: Option<String>,
|
||||
) -> Result<ObjectResult> {
|
||||
let plan = DefaultAsPlanImpl::try_from_physical_plan(physical.clone())
|
||||
.context(EncodePhysicalSnafu { physical })?
|
||||
.bytes;
|
||||
let original_ql = original_ql.unwrap_or_default();
|
||||
let select_expr = SelectExpr {
|
||||
expr: Some(select_expr::Expr::PhysicalPlan(PhysicalPlan {
|
||||
original_ql: original_ql.into_bytes(),
|
||||
plan,
|
||||
})),
|
||||
};
|
||||
self.do_select(select_expr).await
|
||||
}
|
||||
|
||||
pub async fn logical_plan(&self, logical_plan: Vec<u8>) -> Result<ObjectResult> {
|
||||
let select_expr = SelectExpr {
|
||||
expr: Some(select_expr::Expr::LogicalPlan(logical_plan)),
|
||||
@@ -341,12 +318,11 @@ mod tests {
|
||||
|
||||
fn create_test_column(vector: VectorRef) -> Column {
|
||||
let wrapper: ColumnDataTypeWrapper = vector.data_type().try_into().unwrap();
|
||||
let array = vector.to_arrow_array();
|
||||
Column {
|
||||
column_name: "test".to_string(),
|
||||
semantic_type: 1,
|
||||
values: Some(values(&[array.clone()]).unwrap()),
|
||||
null_mask: null_mask(&vec![array], vector.len()),
|
||||
values: Some(values(&[vector.clone()]).unwrap()),
|
||||
null_mask: null_mask(&[vector.clone()], vector.len()),
|
||||
datatype: wrapper.datatype() as i32,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,18 +10,19 @@ name = "greptime"
|
||||
path = "src/bin/greptime.rs"
|
||||
|
||||
[dependencies]
|
||||
anymap = "1.0.0-beta.2"
|
||||
clap = { version = "3.1", features = ["derive"] }
|
||||
common-error = { path = "../common/error" }
|
||||
common-telemetry = { path = "../common/telemetry", features = [
|
||||
"deadlock_detection",
|
||||
] }
|
||||
meta-client = { path = "../meta-client" }
|
||||
datanode = { path = "../datanode" }
|
||||
frontend = { path = "../frontend" }
|
||||
futures = "0.3"
|
||||
meta-client = { path = "../meta-client" }
|
||||
meta-srv = { path = "../meta-srv" }
|
||||
serde = "1.0"
|
||||
servers = {path = "../servers"}
|
||||
servers = { path = "../servers" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tokio = { version = "1.18", features = ["full"] }
|
||||
toml = "0.5"
|
||||
@@ -29,3 +30,6 @@ toml = "0.5"
|
||||
[dev-dependencies]
|
||||
serde = "1.0"
|
||||
tempdir = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
build-data = "0.1.3"
|
||||
|
||||
29
src/cmd/build.rs
Normal file
29
src/cmd/build.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
const DEFAULT_VALUE: &str = "unknown";
|
||||
fn main() {
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_COMMIT={}",
|
||||
build_data::get_git_commit().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
|
||||
);
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_BRANCH={}",
|
||||
build_data::get_git_branch().unwrap_or_else(|_| DEFAULT_VALUE.to_string())
|
||||
);
|
||||
println!(
|
||||
"cargo:rustc-env=GIT_DIRTY={}",
|
||||
build_data::get_git_dirty().map_or(DEFAULT_VALUE.to_string(), |v| v.to_string())
|
||||
);
|
||||
}
|
||||
@@ -20,7 +20,7 @@ use cmd::{datanode, frontend, metasrv, standalone};
|
||||
use common_telemetry::logging::{error, info};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[clap(name = "greptimedb")]
|
||||
#[clap(name = "greptimedb", version = print_version())]
|
||||
struct Command {
|
||||
#[clap(long, default_value = "/tmp/greptimedb/logs")]
|
||||
log_dir: String,
|
||||
@@ -70,6 +70,19 @@ impl fmt::Display for SubCommand {
|
||||
}
|
||||
}
|
||||
|
||||
fn print_version() -> &'static str {
|
||||
concat!(
|
||||
"\nbranch: ",
|
||||
env!("GIT_BRANCH"),
|
||||
"\ncommit: ",
|
||||
env!("GIT_COMMIT"),
|
||||
"\ndirty: ",
|
||||
env!("GIT_DIRTY"),
|
||||
"\nversion: ",
|
||||
env!("CARGO_PKG_VERSION")
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
let cmd = Command::parse();
|
||||
|
||||
@@ -170,6 +170,7 @@ mod tests {
|
||||
ObjectStoreConfig::File { data_dir } => {
|
||||
assert_eq!("/tmp/greptimedb/data/".to_string(), data_dir)
|
||||
}
|
||||
ObjectStoreConfig::S3 { .. } => unreachable!(),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -25,12 +25,6 @@ pub enum Error {
|
||||
source: datanode::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to build frontend, source: {}", source))]
|
||||
BuildFrontend {
|
||||
#[snafu(backtrace)]
|
||||
source: frontend::error::Error,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to start frontend, source: {}", source))]
|
||||
StartFrontend {
|
||||
#[snafu(backtrace)]
|
||||
@@ -61,6 +55,12 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Illegal config: {}", msg))]
|
||||
IllegalConfig { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Illegal auth config: {}", source))]
|
||||
IllegalAuthConfig {
|
||||
#[snafu(backtrace)]
|
||||
source: servers::auth::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -75,7 +75,7 @@ impl ErrorExt for Error {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
Error::IllegalConfig { .. } => StatusCode::InvalidArguments,
|
||||
Error::BuildFrontend { source, .. } => source.status_code(),
|
||||
Error::IllegalAuthConfig { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use anymap::AnyMap;
|
||||
use clap::Parser;
|
||||
use frontend::frontend::{Frontend, FrontendOptions};
|
||||
use frontend::grpc::GrpcOptions;
|
||||
@@ -21,10 +22,13 @@ use frontend::mysql::MysqlOptions;
|
||||
use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
use meta_client::MetaClientOpts;
|
||||
use servers::Mode;
|
||||
use servers::auth::UserProviderRef;
|
||||
use servers::http::HttpOptions;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use servers::{auth, Mode};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::error::{self, IllegalAuthConfigSnafu, Result};
|
||||
use crate::toml_loader;
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -70,21 +74,41 @@ pub struct StartCommand {
|
||||
influxdb_enable: Option<bool>,
|
||||
#[clap(long)]
|
||||
metasrv_addr: Option<String>,
|
||||
#[clap(long)]
|
||||
tls_mode: Option<TlsMode>,
|
||||
#[clap(long)]
|
||||
tls_cert_path: Option<String>,
|
||||
#[clap(long)]
|
||||
tls_key_path: Option<String>,
|
||||
#[clap(long)]
|
||||
user_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
async fn run(self) -> Result<()> {
|
||||
let plugins = load_frontend_plugins(&self.user_provider)?;
|
||||
let opts: FrontendOptions = self.try_into()?;
|
||||
let mut frontend = Frontend::new(
|
||||
opts.clone(),
|
||||
Instance::try_new(&opts)
|
||||
Instance::try_new_distributed(&opts)
|
||||
.await
|
||||
.context(error::StartFrontendSnafu)?,
|
||||
plugins,
|
||||
);
|
||||
frontend.start().await.context(error::StartFrontendSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_frontend_plugins(user_provider: &Option<String>) -> Result<AnyMap> {
|
||||
let mut plugins = AnyMap::new();
|
||||
|
||||
if let Some(provider) = user_provider {
|
||||
let provider = auth::user_provider_from_option(provider).context(IllegalAuthConfigSnafu)?;
|
||||
plugins.insert::<UserProviderRef>(provider);
|
||||
}
|
||||
Ok(plugins)
|
||||
}
|
||||
|
||||
impl TryFrom<StartCommand> for FrontendOptions {
|
||||
type Error = error::Error;
|
||||
|
||||
@@ -95,8 +119,13 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
FrontendOptions::default()
|
||||
};
|
||||
|
||||
let tls_option = TlsOption::new(cmd.tls_mode, cmd.tls_cert_path, cmd.tls_key_path);
|
||||
|
||||
if let Some(addr) = cmd.http_addr {
|
||||
opts.http_addr = Some(addr);
|
||||
opts.http_options = Some(HttpOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.grpc_addr {
|
||||
opts.grpc_options = Some(GrpcOptions {
|
||||
@@ -107,12 +136,14 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
if let Some(addr) = cmd.mysql_addr {
|
||||
opts.mysql_options = Some(MysqlOptions {
|
||||
addr,
|
||||
tls: tls_option.clone(),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.postgres_addr {
|
||||
opts.postgres_options = Some(PostgresOptions {
|
||||
addr,
|
||||
tls: tls_option,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
@@ -141,6 +172,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use servers::auth::{Identity, Password, UserProviderRef};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -154,10 +189,14 @@ mod tests {
|
||||
influxdb_enable: Some(false),
|
||||
config_file: None,
|
||||
metasrv_addr: None,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: None,
|
||||
};
|
||||
|
||||
let opts: FrontendOptions = command.try_into().unwrap();
|
||||
assert_eq!(opts.http_addr, Some("127.0.0.1:1234".to_string()));
|
||||
assert_eq!(opts.http_options.as_ref().unwrap().addr, "127.0.0.1:1234");
|
||||
assert_eq!(opts.mysql_options.as_ref().unwrap().addr, "127.0.0.1:5678");
|
||||
assert_eq!(
|
||||
opts.postgres_options.as_ref().unwrap().addr,
|
||||
@@ -188,4 +227,66 @@ mod tests {
|
||||
|
||||
assert!(!opts.influxdb_options.unwrap().enable);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_read_from_config_file() {
|
||||
let command = StartCommand {
|
||||
http_addr: None,
|
||||
grpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
influxdb_enable: None,
|
||||
config_file: Some(format!(
|
||||
"{}/../../config/frontend.example.toml",
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
metasrv_addr: None,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: None,
|
||||
};
|
||||
|
||||
let fe_opts = FrontendOptions::try_from(command).unwrap();
|
||||
assert_eq!(Mode::Distributed, fe_opts.mode);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4000".to_string(),
|
||||
fe_opts.http_options.as_ref().unwrap().addr
|
||||
);
|
||||
assert_eq!(
|
||||
Duration::from_secs(30),
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_try_from_start_command_to_anymap() {
|
||||
let command = StartCommand {
|
||||
http_addr: None,
|
||||
grpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
influxdb_enable: None,
|
||||
config_file: None,
|
||||
metasrv_addr: None,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: Some("static_user_provider:cmd:test=test".to_string()),
|
||||
};
|
||||
|
||||
let plugins = load_frontend_plugins(&command.user_provider);
|
||||
assert!(plugins.is_ok());
|
||||
let plugins = plugins.unwrap();
|
||||
let provider = plugins.get::<UserProviderRef>();
|
||||
assert!(provider.is_some());
|
||||
|
||||
let provider = provider.unwrap();
|
||||
let result = provider
|
||||
.auth(Identity::UserId("test", None), Password::PlainText("test"))
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use anymap::AnyMap;
|
||||
use clap::Parser;
|
||||
use common_telemetry::info;
|
||||
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig};
|
||||
@@ -25,13 +26,13 @@ use frontend::opentsdb::OpentsdbOptions;
|
||||
use frontend::postgres::PostgresOptions;
|
||||
use frontend::prometheus::PrometheusOptions;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use servers::http::HttpOptions;
|
||||
use servers::tls::{TlsMode, TlsOption};
|
||||
use servers::Mode;
|
||||
use snafu::ResultExt;
|
||||
use tokio::try_join;
|
||||
|
||||
use crate::error::{
|
||||
BuildFrontendSnafu, Error, IllegalConfigSnafu, Result, StartDatanodeSnafu, StartFrontendSnafu,
|
||||
};
|
||||
use crate::error::{Error, IllegalConfigSnafu, Result, StartDatanodeSnafu, StartFrontendSnafu};
|
||||
use crate::frontend::load_frontend_plugins;
|
||||
use crate::toml_loader;
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -61,7 +62,7 @@ impl SubCommand {
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct StandaloneOptions {
|
||||
pub http_addr: Option<String>,
|
||||
pub http_options: Option<HttpOptions>,
|
||||
pub grpc_options: Option<GrpcOptions>,
|
||||
pub mysql_options: Option<MysqlOptions>,
|
||||
pub postgres_options: Option<PostgresOptions>,
|
||||
@@ -71,12 +72,13 @@ pub struct StandaloneOptions {
|
||||
pub mode: Mode,
|
||||
pub wal_dir: String,
|
||||
pub storage: ObjectStoreConfig,
|
||||
pub enable_memory_catalog: bool,
|
||||
}
|
||||
|
||||
impl Default for StandaloneOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
http_addr: Some("127.0.0.1:4000".to_string()),
|
||||
http_options: Some(HttpOptions::default()),
|
||||
grpc_options: Some(GrpcOptions::default()),
|
||||
mysql_options: Some(MysqlOptions::default()),
|
||||
postgres_options: Some(PostgresOptions::default()),
|
||||
@@ -86,6 +88,7 @@ impl Default for StandaloneOptions {
|
||||
mode: Mode::Standalone,
|
||||
wal_dir: "/tmp/greptimedb/wal".to_string(),
|
||||
storage: ObjectStoreConfig::default(),
|
||||
enable_memory_catalog: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -93,7 +96,7 @@ impl Default for StandaloneOptions {
|
||||
impl StandaloneOptions {
|
||||
fn frontend_options(self) -> FrontendOptions {
|
||||
FrontendOptions {
|
||||
http_addr: self.http_addr,
|
||||
http_options: self.http_options,
|
||||
grpc_options: self.grpc_options,
|
||||
mysql_options: self.mysql_options,
|
||||
postgres_options: self.postgres_options,
|
||||
@@ -101,7 +104,6 @@ impl StandaloneOptions {
|
||||
influxdb_options: self.influxdb_options,
|
||||
prometheus_options: self.prometheus_options,
|
||||
mode: self.mode,
|
||||
datanode_rpc_addr: "127.0.0.1:3001".to_string(),
|
||||
meta_client_opts: None,
|
||||
}
|
||||
}
|
||||
@@ -110,6 +112,7 @@ impl StandaloneOptions {
|
||||
DatanodeOptions {
|
||||
wal_dir: self.wal_dir,
|
||||
storage: self.storage,
|
||||
enable_memory_catalog: self.enable_memory_catalog,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
@@ -131,18 +134,31 @@ struct StartCommand {
|
||||
influxdb_enable: bool,
|
||||
#[clap(short, long)]
|
||||
config_file: Option<String>,
|
||||
#[clap(short = 'm', long = "memory-catalog")]
|
||||
enable_memory_catalog: bool,
|
||||
#[clap(long)]
|
||||
tls_mode: Option<TlsMode>,
|
||||
#[clap(long)]
|
||||
tls_cert_path: Option<String>,
|
||||
#[clap(long)]
|
||||
tls_key_path: Option<String>,
|
||||
#[clap(long)]
|
||||
user_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl StartCommand {
|
||||
async fn run(self) -> Result<()> {
|
||||
let enable_memory_catalog = self.enable_memory_catalog;
|
||||
let config_file = self.config_file.clone();
|
||||
let plugins = load_frontend_plugins(&self.user_provider)?;
|
||||
let fe_opts = FrontendOptions::try_from(self)?;
|
||||
let dn_opts: DatanodeOptions = {
|
||||
let opts: StandaloneOptions = if let Some(path) = config_file {
|
||||
let mut opts: StandaloneOptions = if let Some(path) = config_file {
|
||||
toml_loader::from_file!(&path)?
|
||||
} else {
|
||||
StandaloneOptions::default()
|
||||
};
|
||||
opts.enable_memory_catalog = enable_memory_catalog;
|
||||
opts.datanode_options()
|
||||
};
|
||||
|
||||
@@ -154,13 +170,16 @@ impl StartCommand {
|
||||
let mut datanode = Datanode::new(dn_opts.clone())
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
let mut frontend = build_frontend(fe_opts, &dn_opts, datanode.get_instance()).await?;
|
||||
let mut frontend = build_frontend(fe_opts, plugins, datanode.get_instance()).await?;
|
||||
|
||||
try_join!(
|
||||
async { datanode.start().await.context(StartDatanodeSnafu) },
|
||||
async { frontend.start().await.context(StartFrontendSnafu) }
|
||||
)?;
|
||||
// Start datanode instance before starting services, to avoid requests come in before internal components are started.
|
||||
datanode
|
||||
.start_instance()
|
||||
.await
|
||||
.context(StartDatanodeSnafu)?;
|
||||
info!("Datanode instance started");
|
||||
|
||||
frontend.start().await.context(StartFrontendSnafu)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -168,20 +187,12 @@ impl StartCommand {
|
||||
/// Build frontend instance in standalone mode
|
||||
async fn build_frontend(
|
||||
fe_opts: FrontendOptions,
|
||||
dn_opts: &DatanodeOptions,
|
||||
plugins: AnyMap,
|
||||
datanode_instance: InstanceRef,
|
||||
) -> Result<Frontend<FeInstance>> {
|
||||
let grpc_server_addr = &dn_opts.rpc_addr;
|
||||
info!(
|
||||
"Build frontend with datanode gRPC addr: {}",
|
||||
grpc_server_addr
|
||||
);
|
||||
let mut frontend_instance = FeInstance::try_new(&fe_opts)
|
||||
.await
|
||||
.context(BuildFrontendSnafu)?;
|
||||
frontend_instance.set_catalog_manager(datanode_instance.catalog_manager().clone());
|
||||
let mut frontend_instance = FeInstance::new_standalone(datanode_instance.clone());
|
||||
frontend_instance.set_script_handler(datanode_instance);
|
||||
Ok(Frontend::new(fe_opts, frontend_instance))
|
||||
Ok(Frontend::new(fe_opts, frontend_instance, plugins))
|
||||
}
|
||||
|
||||
impl TryFrom<StartCommand> for FrontendOptions {
|
||||
@@ -199,7 +210,10 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
opts.mode = Mode::Standalone;
|
||||
|
||||
if let Some(addr) = cmd.http_addr {
|
||||
opts.http_addr = Some(addr);
|
||||
opts.http_options = Some(HttpOptions {
|
||||
addr,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.rpc_addr {
|
||||
// frontend grpc addr conflict with datanode default grpc addr
|
||||
@@ -243,12 +257,28 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
opts.influxdb_options = Some(InfluxdbOptions { enable: true });
|
||||
}
|
||||
|
||||
let tls_option = TlsOption::new(cmd.tls_mode, cmd.tls_cert_path, cmd.tls_key_path);
|
||||
|
||||
if let Some(mut mysql_options) = opts.mysql_options {
|
||||
mysql_options.tls = tls_option.clone();
|
||||
opts.mysql_options = Some(mysql_options);
|
||||
}
|
||||
|
||||
if let Some(mut postgres_options) = opts.postgres_options {
|
||||
postgres_options.tls = tls_option;
|
||||
opts.postgres_options = Some(postgres_options);
|
||||
}
|
||||
|
||||
Ok(opts)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::Duration;
|
||||
|
||||
use servers::auth::{Identity, Password, UserProviderRef};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -264,12 +294,23 @@ mod tests {
|
||||
std::env::current_dir().unwrap().as_path().to_str().unwrap()
|
||||
)),
|
||||
influxdb_enable: false,
|
||||
enable_memory_catalog: false,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: None,
|
||||
};
|
||||
|
||||
let fe_opts = FrontendOptions::try_from(cmd).unwrap();
|
||||
assert_eq!(Mode::Standalone, fe_opts.mode);
|
||||
assert_eq!("127.0.0.1:3001".to_string(), fe_opts.datanode_rpc_addr);
|
||||
assert_eq!(Some("127.0.0.1:4000".to_string()), fe_opts.http_addr);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4000".to_string(),
|
||||
fe_opts.http_options.as_ref().unwrap().addr
|
||||
);
|
||||
assert_eq!(
|
||||
Duration::from_secs(30),
|
||||
fe_opts.http_options.as_ref().unwrap().timeout
|
||||
);
|
||||
assert_eq!(
|
||||
"127.0.0.1:4001".to_string(),
|
||||
fe_opts.grpc_options.unwrap().addr
|
||||
@@ -281,4 +322,33 @@ mod tests {
|
||||
assert_eq!(2, fe_opts.mysql_options.as_ref().unwrap().runtime_size);
|
||||
assert!(fe_opts.influxdb_options.as_ref().unwrap().enable);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_try_from_start_command_to_anymap() {
|
||||
let command = StartCommand {
|
||||
http_addr: None,
|
||||
rpc_addr: None,
|
||||
mysql_addr: None,
|
||||
postgres_addr: None,
|
||||
opentsdb_addr: None,
|
||||
config_file: None,
|
||||
influxdb_enable: false,
|
||||
enable_memory_catalog: false,
|
||||
tls_mode: None,
|
||||
tls_cert_path: None,
|
||||
tls_key_path: None,
|
||||
user_provider: Some("static_user_provider:cmd:test=test".to_string()),
|
||||
};
|
||||
|
||||
let plugins = load_frontend_plugins(&command.user_provider);
|
||||
assert!(plugins.is_ok());
|
||||
let plugins = plugins.unwrap();
|
||||
let provider = plugins.get::<UserProviderRef>();
|
||||
assert!(provider.is_some());
|
||||
let provider = provider.unwrap();
|
||||
let result = provider
|
||||
.auth(Identity::UserId("test", None), Password::PlainText("test"))
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@ regex = "1.6"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
|
||||
[dev-dependencies]
|
||||
chrono = "0.4"
|
||||
|
||||
@@ -25,9 +25,3 @@ pub const MIN_USER_TABLE_ID: u32 = 1024;
|
||||
pub const SYSTEM_CATALOG_TABLE_ID: u32 = 0;
|
||||
/// scripts table id
|
||||
pub const SCRIPTS_TABLE_ID: u32 = 1;
|
||||
|
||||
pub(crate) const CATALOG_KEY_PREFIX: &str = "__c";
|
||||
pub(crate) const SCHEMA_KEY_PREFIX: &str = "__s";
|
||||
pub(crate) const TABLE_GLOBAL_KEY_PREFIX: &str = "__tg";
|
||||
pub(crate) const TABLE_REGIONAL_KEY_PREFIX: &str = "__tr";
|
||||
pub const TABLE_ID_KEY_PREFIX: &str = "__tid";
|
||||
|
||||
@@ -14,10 +14,3 @@
|
||||
|
||||
pub mod consts;
|
||||
pub mod error;
|
||||
mod helper;
|
||||
|
||||
pub use helper::{
|
||||
build_catalog_prefix, build_schema_prefix, build_table_global_prefix,
|
||||
build_table_regional_prefix, CatalogKey, CatalogValue, SchemaKey, SchemaValue, TableGlobalKey,
|
||||
TableGlobalValue, TableRegionalKey, TableRegionalValue,
|
||||
};
|
||||
|
||||
@@ -62,6 +62,19 @@ pub enum StatusCode {
|
||||
/// Runtime resources exhausted, like creating threads failed.
|
||||
RuntimeResourcesExhausted = 6000,
|
||||
// ====== End of server related status code =======
|
||||
|
||||
// ====== Begin of auth related status code =====
|
||||
/// User not exist
|
||||
UserNotFound = 7000,
|
||||
/// Unsupported password type
|
||||
UnsupportedPasswordType = 7001,
|
||||
/// Username and password does not match
|
||||
UserPasswordMismatch = 7002,
|
||||
/// Not found http authorization header
|
||||
AuthHeaderNotFound = 7003,
|
||||
/// Invalid http authorization header
|
||||
InvalidAuthHeader = 7004,
|
||||
// ====== End of auth related status code =====
|
||||
}
|
||||
|
||||
impl StatusCode {
|
||||
|
||||
@@ -9,9 +9,9 @@ arc-swap = "1.0"
|
||||
chrono-tz = "0.6"
|
||||
common-error = { path = "../error" }
|
||||
common-function-macro = { path = "../function-macro" }
|
||||
common-time = { path = "../time" }
|
||||
common-query = { path = "../query" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
common-time = { path = "../time" }
|
||||
datafusion-common = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
libc = "0.2"
|
||||
num = "0.4"
|
||||
|
||||
@@ -12,5 +12,4 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod error;
|
||||
pub mod scalars;
|
||||
|
||||
@@ -23,6 +23,5 @@ pub(crate) mod test;
|
||||
mod timestamp;
|
||||
pub mod udf;
|
||||
|
||||
pub use aggregate::MedianAccumulatorCreator;
|
||||
pub use function::{Function, FunctionRef};
|
||||
pub use function_registry::{FunctionRegistry, FUNCTION_REGISTRY};
|
||||
|
||||
@@ -16,7 +16,6 @@ mod argmax;
|
||||
mod argmin;
|
||||
mod diff;
|
||||
mod mean;
|
||||
mod median;
|
||||
mod percentile;
|
||||
mod polyval;
|
||||
mod scipy_stats_norm_cdf;
|
||||
@@ -29,7 +28,6 @@ pub use argmin::ArgminAccumulatorCreator;
|
||||
use common_query::logical_plan::AggregateFunctionCreatorRef;
|
||||
pub use diff::DiffAccumulatorCreator;
|
||||
pub use mean::MeanAccumulatorCreator;
|
||||
pub use median::MedianAccumulatorCreator;
|
||||
pub use percentile::PercentileAccumulatorCreator;
|
||||
pub use polyval::PolyvalAccumulatorCreator;
|
||||
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
|
||||
@@ -88,7 +86,6 @@ impl AggregateFunctions {
|
||||
};
|
||||
}
|
||||
|
||||
register_aggr_func!("median", 1, MedianAccumulatorCreator);
|
||||
register_aggr_func!("diff", 1, DiffAccumulatorCreator);
|
||||
register_aggr_func!("mean", 1, MeanAccumulatorCreator);
|
||||
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
|
||||
@@ -20,24 +20,22 @@ use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Resul
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::ConstantVector;
|
||||
use datatypes::types::{LogicalPrimitiveType, WrapperType};
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::ensure;
|
||||
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
|
||||
// return the index of the max value
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Argmax<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
{
|
||||
pub struct Argmax<T> {
|
||||
max: Option<T>,
|
||||
n: u64,
|
||||
}
|
||||
|
||||
impl<T> Argmax<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
T: PartialOrd + Copy,
|
||||
{
|
||||
fn update(&mut self, value: T, index: u64) {
|
||||
if let Some(Ordering::Less) = self.max.partial_cmp(&Some(value)) {
|
||||
@@ -49,8 +47,7 @@ where
|
||||
|
||||
impl<T> Accumulator for Argmax<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType + PartialOrd,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
match self.max {
|
||||
@@ -66,10 +63,10 @@ where
|
||||
|
||||
let column = &values[0];
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
for (i, v) in column.iter_data().enumerate() {
|
||||
if let Some(value) = v {
|
||||
@@ -93,8 +90,8 @@ where
|
||||
|
||||
let max = &states[0];
|
||||
let index = &states[1];
|
||||
let max: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(max) };
|
||||
let index: &<u64 as Scalar>::VectorType = unsafe { VectorHelper::static_cast(index) };
|
||||
let max: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(max) };
|
||||
let index: &<u64 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
|
||||
index
|
||||
.iter_data()
|
||||
.flatten()
|
||||
@@ -122,7 +119,7 @@ impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Argmax::<$S>::default()))
|
||||
Ok(Box::new(Argmax::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -154,7 +151,7 @@ impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -166,21 +163,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(argmax.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(argmax.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, argmax.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(3),
|
||||
@@ -190,7 +185,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(4),
|
||||
@@ -201,7 +196,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(argmax.update_batch(&v).is_ok());
|
||||
|
||||
@@ -20,23 +20,20 @@ use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Resul
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::ConstantVector;
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::ensure;
|
||||
|
||||
// // https://numpy.org/doc/stable/reference/generated/numpy.argmin.html
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Argmin<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
{
|
||||
pub struct Argmin<T> {
|
||||
min: Option<T>,
|
||||
n: u32,
|
||||
}
|
||||
|
||||
impl<T> Argmin<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
T: Copy + PartialOrd,
|
||||
{
|
||||
fn update(&mut self, value: T, index: u32) {
|
||||
match self.min {
|
||||
@@ -56,8 +53,7 @@ where
|
||||
|
||||
impl<T> Accumulator for Argmin<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType + PartialOrd,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
match self.min {
|
||||
@@ -75,10 +71,10 @@ where
|
||||
|
||||
let column = &values[0];
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
for (i, v) in column.iter_data().enumerate() {
|
||||
if let Some(value) = v {
|
||||
@@ -102,8 +98,8 @@ where
|
||||
|
||||
let min = &states[0];
|
||||
let index = &states[1];
|
||||
let min: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(min) };
|
||||
let index: &<u32 as Scalar>::VectorType = unsafe { VectorHelper::static_cast(index) };
|
||||
let min: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(min) };
|
||||
let index: &<u32 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
|
||||
index
|
||||
.iter_data()
|
||||
.flatten()
|
||||
@@ -131,7 +127,7 @@ impl AggregateFunctionCreator for ArgminAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Argmin::<$S>::default()))
|
||||
Ok(Box::new(Argmin::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -163,7 +159,7 @@ impl AggregateFunctionCreator for ArgminAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -175,21 +171,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(argmin.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(argmin.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, argmin.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(3),
|
||||
@@ -199,7 +193,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(4),
|
||||
@@ -210,7 +204,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(argmin.update_batch(&v).is_ok());
|
||||
|
||||
@@ -22,40 +22,32 @@ use common_query::error::{
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::PrimitiveType;
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.diff.html
|
||||
// I is the input type, O is the output type.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Diff<T, SubT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<SubT>,
|
||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
||||
{
|
||||
values: Vec<T>,
|
||||
_phantom: PhantomData<SubT>,
|
||||
pub struct Diff<I, O> {
|
||||
values: Vec<I>,
|
||||
_phantom: PhantomData<O>,
|
||||
}
|
||||
|
||||
impl<T, SubT> Diff<T, SubT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<SubT>,
|
||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
impl<I, O> Diff<I, O> {
|
||||
fn push(&mut self, value: I) {
|
||||
self.values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, SubT> Accumulator for Diff<T, SubT>
|
||||
impl<I, O> Accumulator for Diff<I, O>
|
||||
where
|
||||
T: Primitive + AsPrimitive<SubT>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
||||
for<'a> SubT: Scalar<RefType<'a> = SubT>,
|
||||
I: WrapperType,
|
||||
O: WrapperType,
|
||||
I::Native: AsPrimitive<O::Native>,
|
||||
O::Native: std::ops::Sub<Output = O::Native>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -65,7 +57,7 @@ where
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
I::LogicalType::build_data_type(),
|
||||
))])
|
||||
}
|
||||
|
||||
@@ -78,12 +70,12 @@ where
|
||||
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &<I as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
@@ -109,8 +101,9 @@ where
|
||||
),
|
||||
})?;
|
||||
for state in states.values_iter() {
|
||||
let state = state.context(FromScalarValueSnafu)?;
|
||||
self.update_batch(&[state])?
|
||||
if let Some(state) = state.context(FromScalarValueSnafu)? {
|
||||
self.update_batch(&[state])?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -122,11 +115,14 @@ where
|
||||
let diff = self
|
||||
.values
|
||||
.windows(2)
|
||||
.map(|x| (x[1].as_() - x[0].as_()).into())
|
||||
.map(|x| {
|
||||
let native = x[1].into_native().as_() - x[0].into_native().as_();
|
||||
O::from_native(native).into()
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
let diff = Value::List(ListValue::new(
|
||||
Some(Box::new(diff)),
|
||||
SubT::default().into().data_type(),
|
||||
O::LogicalType::build_data_type(),
|
||||
));
|
||||
Ok(diff)
|
||||
}
|
||||
@@ -143,7 +139,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Diff::<$S,<$S as Primitive>::LargestType>::default()))
|
||||
Ok(Box::new(Diff::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -163,7 +159,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_types[0].logical_type_id(),
|
||||
|$S| {
|
||||
Ok(ConcreteDataType::list_datatype(PrimitiveType::<<$S as Primitive>::LargestType>::default().into()))
|
||||
Ok(ConcreteDataType::list_datatype($S::default().into()))
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
@@ -177,7 +173,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_types[0].logical_type_id(),
|
||||
|$S| {
|
||||
Ok(vec![ConcreteDataType::list_datatype(PrimitiveType::<$S>::default().into())])
|
||||
Ok(vec![ConcreteDataType::list_datatype($S::default().into())])
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
@@ -188,9 +184,10 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
@@ -201,21 +198,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(diff.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(diff.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
@@ -232,7 +227,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
@@ -251,7 +246,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
4,
|
||||
))];
|
||||
let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
|
||||
|
||||
@@ -22,16 +22,14 @@ use common_query::error::{
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, UInt64Vector};
|
||||
use datatypes::types::WrapperType;
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, UInt64Vector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Mean<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64>,
|
||||
{
|
||||
pub struct Mean<T> {
|
||||
sum: f64,
|
||||
n: u64,
|
||||
_phantom: PhantomData<T>,
|
||||
@@ -39,11 +37,12 @@ where
|
||||
|
||||
impl<T> Mean<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
#[inline(always)]
|
||||
fn push(&mut self, value: T) {
|
||||
self.sum += value.as_();
|
||||
self.sum += value.into_native().as_();
|
||||
self.n += 1;
|
||||
}
|
||||
|
||||
@@ -56,8 +55,8 @@ where
|
||||
|
||||
impl<T> Accumulator for Mean<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
Ok(vec![self.sum.into(), self.n.into()])
|
||||
@@ -73,10 +72,10 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
@@ -150,7 +149,7 @@ impl AggregateFunctionCreator for MeanAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Mean::<$S>::default()))
|
||||
Ok(Box::new(Mean::<<$S as LogicalPrimitiveType>::Native>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -182,7 +181,7 @@ impl AggregateFunctionCreator for MeanAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -194,21 +193,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(mean.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(42.0_f64), mean.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(mean.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, mean.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
@@ -218,7 +215,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
@@ -230,7 +227,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(mean.update_batch(&v).is_ok());
|
||||
|
||||
@@ -1,289 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_function_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, Result,
|
||||
};
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::OrdPrimitive;
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num::NumCast;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
// This median calculation algorithm's details can be found at
|
||||
// https://leetcode.cn/problems/find-median-from-data-stream/
|
||||
//
|
||||
// Basically, it uses two heaps, a maximum heap and a minimum. The maximum heap stores numbers that
|
||||
// are not greater than the median, and the minimum heap stores the greater. In a streaming of
|
||||
// numbers, when a number is arrived, we adjust the heaps' tops, so that either one top is the
|
||||
// median or both tops can be averaged to get the median.
|
||||
//
|
||||
// The time complexity to update the median is O(logn), O(1) to get the median; and the space
|
||||
// complexity is O(n). (Ignore the costs for heap expansion.)
|
||||
//
|
||||
// From the point of algorithm, [quick select](https://en.wikipedia.org/wiki/Quickselect) might be
|
||||
// better. But to use quick select here, we need a mutable self in the final calculation(`evaluate`)
|
||||
// to swap stored numbers in the states vector. Though we can make our `evaluate` received
|
||||
// `&mut self`, DataFusion calls our accumulator with `&self` (see `DfAccumulatorAdaptor`). That
|
||||
// means we have to introduce some kinds of interior mutability, and the overhead is not neglectable.
|
||||
//
|
||||
// TODO(LFC): Use quick select to get median when we can modify DataFusion's code, and benchmark with two-heap algorithm.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Median<T>
|
||||
where
|
||||
T: Primitive,
|
||||
{
|
||||
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
|
||||
not_greater: BinaryHeap<OrdPrimitive<T>>,
|
||||
}
|
||||
|
||||
impl<T> Median<T>
|
||||
where
|
||||
T: Primitive,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
let value = OrdPrimitive::<T>(value);
|
||||
|
||||
if self.not_greater.is_empty() {
|
||||
self.not_greater.push(value);
|
||||
return;
|
||||
}
|
||||
// The `unwrap`s below are safe because there are `push`s before them.
|
||||
if value <= *self.not_greater.peek().unwrap() {
|
||||
self.not_greater.push(value);
|
||||
if self.not_greater.len() > self.greater.len() + 1 {
|
||||
self.greater.push(Reverse(self.not_greater.pop().unwrap()));
|
||||
}
|
||||
} else {
|
||||
self.greater.push(Reverse(value));
|
||||
if self.greater.len() > self.not_greater.len() {
|
||||
self.not_greater.push(self.greater.pop().unwrap().0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// UDAFs are built using the trait `Accumulator`, that offers DataFusion the necessary functions
|
||||
// to use them.
|
||||
impl<T> Accumulator for Median<T>
|
||||
where
|
||||
T: Primitive,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
// This function serializes our state to `ScalarValue`, which DataFusion uses to pass this
|
||||
// state between execution stages. Note that this can be arbitrary data.
|
||||
//
|
||||
// The `ScalarValue`s returned here will be passed in as argument `states: &[VectorRef]` to
|
||||
// `merge_batch` function.
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
.greater
|
||||
.iter()
|
||||
.map(|x| &x.0)
|
||||
.chain(self.not_greater.iter())
|
||||
.map(|&n| n.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
))])
|
||||
}
|
||||
|
||||
// DataFusion calls this function to update the accumulator's state for a batch of inputs rows.
|
||||
// It is expected this function to update the accumulator's state.
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(values.len() == 1, InvalidInputStateSnafu);
|
||||
|
||||
// This is a unary accumulator, so only one column is provided.
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// DataFusion executes accumulators in partitions. In some execution stage, DataFusion will
|
||||
// merge states from other accumulators (returned by `state()` method).
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// The states here are returned by the `state` method. Since we only returned a vector
|
||||
// with one value in that method, `states[0]` is fine.
|
||||
let states = &states[0];
|
||||
let states = states
|
||||
.as_any()
|
||||
.downcast_ref::<ListVector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect ListVector, got vector type {}",
|
||||
states.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
for state in states.values_iter() {
|
||||
let state = state.context(FromScalarValueSnafu)?;
|
||||
// merging state is simply accumulate stored numbers from others', so just call update
|
||||
self.update_batch(&[state])?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// DataFusion expects this function to return the final value of this aggregator.
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
if self.not_greater.is_empty() {
|
||||
assert!(
|
||||
self.greater.is_empty(),
|
||||
"not expected in two-heap median algorithm, there must be a bug when implementing it"
|
||||
);
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
|
||||
// unwrap is safe because we checked not_greater heap's len above
|
||||
let not_greater = *self.not_greater.peek().unwrap();
|
||||
let median = if self.not_greater.len() > self.greater.len() {
|
||||
not_greater.into()
|
||||
} else {
|
||||
// unwrap is safe because greater heap len >= not_greater heap len, which is > 0
|
||||
let greater = self.greater.peek().unwrap();
|
||||
|
||||
// the following three NumCast's `unwrap`s are safe because T is primitive
|
||||
let not_greater_v: f64 = NumCast::from(not_greater.as_primitive()).unwrap();
|
||||
let greater_v: f64 = NumCast::from(greater.0.as_primitive()).unwrap();
|
||||
let median: T = NumCast::from((not_greater_v + greater_v) / 2.0).unwrap();
|
||||
median.into()
|
||||
};
|
||||
Ok(median)
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct MedianAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for MedianAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Median::<$S>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"MEDIAN\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
|
||||
// unwrap is safe because we have checked input_types len must equals 1
|
||||
Ok(input_types.into_iter().next().unwrap())
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
Ok(vec![ConcreteDataType::list_datatype(self.output_type()?)])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut median = Median::<i32>::default();
|
||||
assert!(median.update_batch(&[]).is_ok());
|
||||
assert!(median.not_greater.is_empty());
|
||||
assert!(median.greater.is_empty());
|
||||
assert_eq!(Value::Null, median.evaluate().unwrap());
|
||||
|
||||
// test update one not-null value
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(42), median.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, median.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(1), median.evaluate().unwrap());
|
||||
|
||||
// test update null-value batch
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(3), median.evaluate().unwrap());
|
||||
|
||||
// test update with constant vector
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(4), median.evaluate().unwrap());
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,7 @@ use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::OrdPrimitive;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num::NumCast;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -44,15 +44,15 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
// This optional method parameter specifies the method to use when the desired quantile lies between two data points i < j.
|
||||
// If g is the fractional part of the index surrounded by i and alpha and beta are correction constants modifying i and j.
|
||||
// i+g = (q-alpha)/(n-alpha-beta+1)
|
||||
// Below, ‘q’ is the quantile value, ‘n’ is the sample size and alpha and beta are constants. The following formula gives an interpolation “i + g” of where the quantile would be in the sorted sample.
|
||||
// With ‘i’ being the floor and ‘g’ the fractional part of the result.
|
||||
// Below, 'q' is the quantile value, 'n' is the sample size and alpha and beta are constants. The following formula gives an interpolation "i + g" of where the quantile would be in the sorted sample.
|
||||
// With 'i' being the floor and 'g' the fractional part of the result.
|
||||
// the default method is linear where
|
||||
// alpha = 1
|
||||
// beta = 1
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Percentile<T>
|
||||
where
|
||||
T: Primitive,
|
||||
T: WrapperType,
|
||||
{
|
||||
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
|
||||
not_greater: BinaryHeap<OrdPrimitive<T>>,
|
||||
@@ -62,7 +62,7 @@ where
|
||||
|
||||
impl<T> Percentile<T>
|
||||
where
|
||||
T: Primitive,
|
||||
T: WrapperType,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
let value = OrdPrimitive::<T>(value);
|
||||
@@ -93,8 +93,7 @@ where
|
||||
|
||||
impl<T> Accumulator for Percentile<T>
|
||||
where
|
||||
T: Primitive,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -107,7 +106,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.p.into(),
|
||||
])
|
||||
@@ -129,14 +128,14 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"POLYVAL\" function's second argument to be float64",
|
||||
})?;
|
||||
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
||||
@@ -209,10 +208,11 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -259,7 +259,7 @@ impl AggregateFunctionCreator for PercentileAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Percentile::<$S>::default()))
|
||||
Ok(Box::new(Percentile::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -292,7 +292,7 @@ impl AggregateFunctionCreator for PercentileAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -307,8 +307,8 @@ mod test {
|
||||
// test update one not-null value
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![Some(100.0_f64)])),
|
||||
Arc::new(Int32Vector::from(vec![Some(42)])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
|
||||
];
|
||||
assert!(percentile.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(42.0_f64), percentile.evaluate().unwrap());
|
||||
@@ -316,8 +316,8 @@ mod test {
|
||||
// test update one null value
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Option::<i32>::None])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![Some(100.0_f64)])),
|
||||
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
|
||||
];
|
||||
assert!(percentile.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, percentile.evaluate().unwrap());
|
||||
@@ -325,12 +325,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
@@ -342,13 +338,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
@@ -362,13 +353,10 @@ mod test {
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
2,
|
||||
)),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64), Some(100.0_f64)])),
|
||||
];
|
||||
assert!(percentile.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
|
||||
@@ -376,12 +364,8 @@ mod test {
|
||||
// test left border
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(0.0_f64),
|
||||
Some(0.0_f64),
|
||||
Some(0.0_f64),
|
||||
@@ -393,12 +377,8 @@ mod test {
|
||||
// test medium
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(50.0_f64),
|
||||
Some(50.0_f64),
|
||||
Some(50.0_f64),
|
||||
@@ -410,12 +390,8 @@ mod test {
|
||||
// test right border
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
@@ -431,12 +407,8 @@ mod test {
|
||||
// >> 6.400000000000
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(10i32),
|
||||
Some(7),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(40.0_f64),
|
||||
Some(40.0_f64),
|
||||
Some(40.0_f64),
|
||||
@@ -451,12 +423,8 @@ mod test {
|
||||
// >> 9.7000000000000011
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(10i32),
|
||||
Some(7),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(95.0_f64),
|
||||
Some(95.0_f64),
|
||||
Some(95.0_f64),
|
||||
|
||||
@@ -23,9 +23,9 @@ use common_query::error::{
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::PrimitiveType;
|
||||
use datatypes::types::{LogicalPrimitiveType, WrapperType};
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, Int64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Helper, Int64Vector, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -34,8 +34,10 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Polyval<T, PolyT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<PolyT>,
|
||||
PolyT: Primitive + std::ops::Mul<Output = PolyT>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
|
||||
{
|
||||
values: Vec<T>,
|
||||
// DataFusion casts constant in into i64 type.
|
||||
@@ -45,8 +47,10 @@ where
|
||||
|
||||
impl<T, PolyT> Polyval<T, PolyT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<PolyT>,
|
||||
PolyT: Primitive + std::ops::Mul<Output = PolyT>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
@@ -55,11 +59,11 @@ where
|
||||
|
||||
impl<T, PolyT> Accumulator for Polyval<T, PolyT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<PolyT>,
|
||||
PolyT: Primitive + std::ops::Mul<Output = PolyT> + std::iter::Sum<PolyT>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
for<'a> PolyT: Scalar<RefType<'a> = PolyT>,
|
||||
i64: AsPrimitive<PolyT>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType + std::iter::Sum<<PolyT as WrapperType>::Native>,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native> + std::iter::Sum<PolyT::Native>,
|
||||
i64: AsPrimitive<<PolyT as WrapperType>::Native>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -70,7 +74,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.x.into(),
|
||||
])
|
||||
@@ -91,10 +95,10 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
@@ -103,7 +107,7 @@ where
|
||||
});
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<i64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<i64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"POLYVAL\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
||||
@@ -172,12 +176,14 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -196,7 +202,7 @@ where
|
||||
.values
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, &value)| value.as_() * (x.pow((len - 1 - i) as u32)).as_())
|
||||
.map(|(i, &value)| value.into_native().as_() * x.pow((len - 1 - i) as u32).as_())
|
||||
.sum();
|
||||
Ok(polyval.into())
|
||||
}
|
||||
@@ -213,7 +219,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Polyval::<$S,<$S as Primitive>::LargestType>::default()))
|
||||
Ok(Box::new(Polyval::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -234,7 +240,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type,
|
||||
|$S| {
|
||||
Ok(PrimitiveType::<<$S as Primitive>::LargestType>::default().into())
|
||||
Ok(<<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::build_data_type())
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
@@ -254,7 +260,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -268,8 +274,8 @@ mod test {
|
||||
// test update one not-null value
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Some(3)])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(2_i64)])),
|
||||
Arc::new(Int32Vector::from(vec![Some(3)])),
|
||||
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
|
||||
];
|
||||
assert!(polyval.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int64(3), polyval.evaluate().unwrap());
|
||||
@@ -277,8 +283,8 @@ mod test {
|
||||
// test update one null value
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Option::<i32>::None])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(2_i64)])),
|
||||
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
|
||||
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
|
||||
];
|
||||
assert!(polyval.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, polyval.evaluate().unwrap());
|
||||
@@ -286,12 +292,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(3),
|
||||
Some(0),
|
||||
Some(1),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(3), Some(0), Some(1)])),
|
||||
Arc::new(Int64Vector::from(vec![
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
@@ -303,13 +305,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(3),
|
||||
Some(0),
|
||||
None,
|
||||
Some(1),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(3), Some(0), None, Some(1)])),
|
||||
Arc::new(Int64Vector::from(vec![
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
@@ -323,10 +320,10 @@ mod test {
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
2,
|
||||
)),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(5_i64), Some(5_i64)])),
|
||||
Arc::new(Int64Vector::from(vec![Some(5_i64), Some(5_i64)])),
|
||||
];
|
||||
assert!(polyval.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int64(24), polyval.evaluate().unwrap());
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -33,18 +33,12 @@ use statrs::statistics::Statistics;
|
||||
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ScipyStatsNormCdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
pub struct ScipyStatsNormCdf<T> {
|
||||
values: Vec<T>,
|
||||
x: Option<f64>,
|
||||
}
|
||||
|
||||
impl<T> ScipyStatsNormCdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
impl<T> ScipyStatsNormCdf<T> {
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
}
|
||||
@@ -52,8 +46,8 @@ where
|
||||
|
||||
impl<T> Accumulator for ScipyStatsNormCdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType + std::iter::Sum<T>,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -64,7 +58,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.x.into(),
|
||||
])
|
||||
@@ -86,14 +80,14 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"SCIPYSTATSNORMCDF\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
let first = x.get(0);
|
||||
@@ -160,19 +154,19 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
let values = self.values.iter().map(|&v| v.as_()).collect::<Vec<_>>();
|
||||
let mean = values.clone().mean();
|
||||
let std_dev = values.std_dev();
|
||||
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
|
||||
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
|
||||
if mean.is_nan() || std_dev.is_nan() {
|
||||
Ok(Value::Null)
|
||||
} else {
|
||||
@@ -198,7 +192,7 @@ impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(ScipyStatsNormCdf::<$S>::default()))
|
||||
Ok(Box::new(ScipyStatsNormCdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -230,7 +224,7 @@ impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -244,12 +238,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
@@ -264,13 +254,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
None,
|
||||
Some(2.0_f64),
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -33,18 +33,12 @@ use statrs::statistics::Statistics;
|
||||
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ScipyStatsNormPdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
pub struct ScipyStatsNormPdf<T> {
|
||||
values: Vec<T>,
|
||||
x: Option<f64>,
|
||||
}
|
||||
|
||||
impl<T> ScipyStatsNormPdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
impl<T> ScipyStatsNormPdf<T> {
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
}
|
||||
@@ -52,8 +46,8 @@ where
|
||||
|
||||
impl<T> Accumulator for ScipyStatsNormPdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -64,7 +58,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.x.into(),
|
||||
])
|
||||
@@ -86,14 +80,14 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"SCIPYSTATSNORMPDF\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
let first = x.get(0);
|
||||
@@ -160,19 +154,20 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
let values = self.values.iter().map(|&v| v.as_()).collect::<Vec<_>>();
|
||||
let mean = values.clone().mean();
|
||||
let std_dev = values.std_dev();
|
||||
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
|
||||
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
|
||||
|
||||
if mean.is_nan() || std_dev.is_nan() {
|
||||
Ok(Value::Null)
|
||||
} else {
|
||||
@@ -198,7 +193,7 @@ impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(ScipyStatsNormPdf::<$S>::default()))
|
||||
Ok(Box::new(ScipyStatsNormPdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -230,7 +225,7 @@ impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -244,12 +239,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
@@ -264,13 +255,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
None,
|
||||
Some(2.0_f64),
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
use std::iter;
|
||||
|
||||
use common_query::error::Result;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::ConstantVector;
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::ctx::EvalContext;
|
||||
|
||||
pub fn scalar_binary_op<L: Scalar, R: Scalar, O: Scalar, F>(
|
||||
@@ -36,10 +36,9 @@ where
|
||||
|
||||
let result = match (l.is_const(), r.is_const()) {
|
||||
(false, true) => {
|
||||
let left: &<L as Scalar>::VectorType = unsafe { VectorHelper::static_cast(l) };
|
||||
let right: &ConstantVector = unsafe { VectorHelper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(right.inner()) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(l) };
|
||||
let right: &ConstantVector = unsafe { Helper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(right.inner()) };
|
||||
let b = right.get_data(0);
|
||||
|
||||
let it = left.iter_data().map(|a| f(a, b, ctx));
|
||||
@@ -47,8 +46,8 @@ where
|
||||
}
|
||||
|
||||
(false, false) => {
|
||||
let left: &<L as Scalar>::VectorType = unsafe { VectorHelper::static_cast(l) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { VectorHelper::static_cast(r) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(l) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(r) };
|
||||
|
||||
let it = left
|
||||
.iter_data()
|
||||
@@ -58,25 +57,22 @@ where
|
||||
}
|
||||
|
||||
(true, false) => {
|
||||
let left: &ConstantVector = unsafe { VectorHelper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(left.inner()) };
|
||||
let left: &ConstantVector = unsafe { Helper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(left.inner()) };
|
||||
let a = left.get_data(0);
|
||||
|
||||
let right: &<R as Scalar>::VectorType = unsafe { VectorHelper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(r) };
|
||||
let it = right.iter_data().map(|b| f(a, b, ctx));
|
||||
<O as Scalar>::VectorType::from_owned_iterator(it)
|
||||
}
|
||||
|
||||
(true, true) => {
|
||||
let left: &ConstantVector = unsafe { VectorHelper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(left.inner()) };
|
||||
let left: &ConstantVector = unsafe { Helper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(left.inner()) };
|
||||
let a = left.get_data(0);
|
||||
|
||||
let right: &ConstantVector = unsafe { VectorHelper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(right.inner()) };
|
||||
let right: &ConstantVector = unsafe { Helper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(right.inner()) };
|
||||
let b = right.get_data(0);
|
||||
|
||||
let it = iter::repeat(a)
|
||||
|
||||
@@ -13,8 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use chrono_tz::Tz;
|
||||
|
||||
use crate::error::Error;
|
||||
use common_query::error::Error;
|
||||
|
||||
pub struct EvalContext {
|
||||
_tz: Tz,
|
||||
|
||||
@@ -12,10 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_query::error::{self, Result};
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::Helper;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{GetScalarVectorSnafu, Result};
|
||||
use crate::scalars::expression::ctx::EvalContext;
|
||||
|
||||
/// TODO: remove the allow_unused when it's used.
|
||||
@@ -28,7 +29,7 @@ pub fn scalar_unary_op<L: Scalar, O: Scalar, F>(
|
||||
where
|
||||
F: Fn(Option<L::RefType<'_>>, &mut EvalContext) -> Option<O>,
|
||||
{
|
||||
let left = VectorHelper::check_get_scalar::<L>(l).context(GetScalarVectorSnafu)?;
|
||||
let left = Helper::check_get_scalar::<L>(l).context(error::GetScalarVectorSnafu)?;
|
||||
let it = left.iter_data().map(|a| f(a, ctx));
|
||||
let result = <O as Scalar>::VectorType::from_owned_iterator(it);
|
||||
|
||||
|
||||
@@ -16,12 +16,11 @@ use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono_tz::Tz;
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::vectors::VectorRef;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FunctionContext {
|
||||
pub tz: Tz,
|
||||
|
||||
@@ -13,10 +13,12 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod pow;
|
||||
mod rate;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use pow::PowFunction;
|
||||
pub use rate::RateFunction;
|
||||
|
||||
use crate::scalars::function_registry::FunctionRegistry;
|
||||
|
||||
@@ -25,5 +27,6 @@ pub(crate) struct MathFunction;
|
||||
impl MathFunction {
|
||||
pub fn register(registry: &FunctionRegistry) {
|
||||
registry.register(Arc::new(PowFunction::default()));
|
||||
registry.register(Arc::new(RateFunction::default()))
|
||||
}
|
||||
}
|
||||
@@ -15,15 +15,16 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::LogicalPrimitiveType;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num::traits::Pow;
|
||||
use num_traits::AsPrimitive;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
@@ -46,7 +47,7 @@ impl Function for PowFunction {
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
|
||||
let col = scalar_binary_op::<$S, $T, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
|
||||
let col = scalar_binary_op::<<$S as LogicalPrimitiveType>::Native, <$T as LogicalPrimitiveType>::Native, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
|
||||
Ok(Arc::new(col))
|
||||
},{
|
||||
unreachable!()
|
||||
|
||||
106
src/common/function/src/scalars/math/rate.rs
Normal file
106
src/common/function/src/scalars/math/rate.rs
Normal file
@@ -0,0 +1,106 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::arrow::compute::kernels::{arithmetic, cast};
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
/// generates rates from a sequence of adjacent data points.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct RateFunction;
|
||||
|
||||
impl fmt::Display for RateFunction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "RATE")
|
||||
}
|
||||
}
|
||||
|
||||
impl Function for RateFunction {
|
||||
fn name(&self) -> &str {
|
||||
"prom_rate"
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::float64_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
Signature::uniform(2, ConcreteDataType::numerics(), Volatility::Immutable)
|
||||
}
|
||||
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
let val = &columns[0].to_arrow_array();
|
||||
let val_0 = val.slice(0, val.len() - 1);
|
||||
let val_1 = val.slice(1, val.len() - 1);
|
||||
let dv = arithmetic::subtract_dyn(&val_1, &val_0).context(error::ArrowComputeSnafu)?;
|
||||
let ts = &columns[1].to_arrow_array();
|
||||
let ts_0 = ts.slice(0, ts.len() - 1);
|
||||
let ts_1 = ts.slice(1, ts.len() - 1);
|
||||
let dt = arithmetic::subtract_dyn(&ts_1, &ts_0).context(error::ArrowComputeSnafu)?;
|
||||
|
||||
let dv = cast::cast(&dv, &DataType::Float64).context(error::TypeCastSnafu {
|
||||
typ: DataType::Float64,
|
||||
})?;
|
||||
let dt = cast::cast(&dt, &DataType::Float64).context(error::TypeCastSnafu {
|
||||
typ: DataType::Float64,
|
||||
})?;
|
||||
let rate = arithmetic::divide_dyn(&dv, &dt).context(error::ArrowComputeSnafu)?;
|
||||
let v = Helper::try_into_vector(&rate).context(error::FromArrowArraySnafu)?;
|
||||
|
||||
Ok(v)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::vectors::{Float32Vector, Float64Vector, Int64Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_rate_function() {
|
||||
let rate = RateFunction::default();
|
||||
assert_eq!("prom_rate", rate.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::float64_datatype(),
|
||||
rate.return_type(&[]).unwrap()
|
||||
);
|
||||
assert!(matches!(rate.signature(),
|
||||
Signature {
|
||||
type_signature: TypeSignature::Uniform(2, valid_types),
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == ConcreteDataType::numerics()
|
||||
));
|
||||
let values = vec![1.0, 3.0, 6.0];
|
||||
let ts = vec![0, 1, 2];
|
||||
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(Float32Vector::from_vec(values)),
|
||||
Arc::new(Int64Vector::from_vec(ts)),
|
||||
];
|
||||
let vector = rate.eval(FunctionContext::default(), &args).unwrap();
|
||||
let expect: VectorRef = Arc::new(Float64Vector::from_vec(vec![2.0, 3.0]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod clip;
|
||||
#[allow(unused)]
|
||||
mod interp;
|
||||
|
||||
use std::sync::Arc;
|
||||
@@ -15,14 +15,15 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::{Scalar, VectorRef};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use datatypes::arrow::compute;
|
||||
use datatypes::arrow::datatypes::ArrowPrimitiveType;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use paste::paste;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
@@ -34,25 +35,32 @@ macro_rules! define_eval {
|
||||
($O: ident) => {
|
||||
paste! {
|
||||
fn [<eval_ $O>](columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
|
||||
with_match_primitive_type_id!(columns[2].data_type().logical_type_id(), |$R| {
|
||||
// clip(a, min, max) is equals to min(max(a, min), max)
|
||||
let col: VectorRef = Arc::new(scalar_binary_op::<$S, $T, $O, _>(&columns[0], &columns[1], scalar_max, &mut EvalContext::default())?);
|
||||
let col = scalar_binary_op::<$O, $R, $O, _>(&col, &columns[2], scalar_min, &mut EvalContext::default())?;
|
||||
Ok(Arc::new(col))
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
fn cast_vector(input: &VectorRef) -> VectorRef {
|
||||
Arc::new(PrimitiveVector::<<$O as WrapperType>::LogicalType>::try_from_arrow_array(
|
||||
compute::cast(&input.to_arrow_array(), &<<<$O as WrapperType>::LogicalType as LogicalPrimitiveType>::ArrowPrimitive as ArrowPrimitiveType>::DATA_TYPE).unwrap()
|
||||
).unwrap()) as _
|
||||
}
|
||||
let operator_1 = cast_vector(&columns[0]);
|
||||
let operator_2 = cast_vector(&columns[1]);
|
||||
let operator_3 = cast_vector(&columns[2]);
|
||||
|
||||
// clip(a, min, max) is equals to min(max(a, min), max)
|
||||
let col: VectorRef = Arc::new(scalar_binary_op::<$O, $O, $O, _>(
|
||||
&operator_1,
|
||||
&operator_2,
|
||||
scalar_max,
|
||||
&mut EvalContext::default(),
|
||||
)?);
|
||||
let col = scalar_binary_op::<$O, $O, $O, _>(
|
||||
&col,
|
||||
&operator_3,
|
||||
scalar_min,
|
||||
&mut EvalContext::default(),
|
||||
)?;
|
||||
Ok(Arc::new(col))
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
define_eval!(i64);
|
||||
@@ -108,27 +116,23 @@ pub fn max<T: PartialOrd>(input: T, max: T) -> T {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn scalar_min<S, T, O>(left: Option<S>, right: Option<T>, _ctx: &mut EvalContext) -> Option<O>
|
||||
fn scalar_min<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
|
||||
where
|
||||
S: AsPrimitive<O>,
|
||||
T: AsPrimitive<O>,
|
||||
O: Scalar + Copy + PartialOrd,
|
||||
{
|
||||
match (left, right) {
|
||||
(Some(left), Some(right)) => Some(min(left.as_(), right.as_())),
|
||||
(Some(left), Some(right)) => Some(min(left, right)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn scalar_max<S, T, O>(left: Option<S>, right: Option<T>, _ctx: &mut EvalContext) -> Option<O>
|
||||
fn scalar_max<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
|
||||
where
|
||||
S: AsPrimitive<O>,
|
||||
T: AsPrimitive<O>,
|
||||
O: Scalar + Copy + PartialOrd,
|
||||
{
|
||||
match (left, right) {
|
||||
(Some(left), Some(right)) => Some(max(left.as_(), right.as_())),
|
||||
(Some(left), Some(right)) => Some(max(left, right)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -143,11 +147,15 @@ impl fmt::Display for ClipFunction {
|
||||
mod tests {
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{ConstantVector, Float32Vector, Int32Vector, UInt32Vector};
|
||||
use datatypes::vectors::{
|
||||
ConstantVector, Float32Vector, Int16Vector, Int32Vector, Int8Vector, UInt16Vector,
|
||||
UInt32Vector, UInt8Vector,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_clip_function() {
|
||||
fn test_clip_signature() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
assert_eq!("clip", clip.name());
|
||||
@@ -190,16 +198,21 @@ mod tests {
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == ConcreteDataType::numerics()
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clip_fn_signed() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
// eval with signed integers
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from_values(0..10)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![3])),
|
||||
Arc::new(Int8Vector::from_vec(vec![3])),
|
||||
10,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![6])),
|
||||
Arc::new(Int16Vector::from_vec(vec![6])),
|
||||
10,
|
||||
)),
|
||||
];
|
||||
@@ -217,16 +230,21 @@ mod tests {
|
||||
assert!(matches!(vector.get(i), Value::Int64(v) if v == 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clip_fn_unsigned() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
// eval with unsigned integers
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(UInt32Vector::from_values(0..10)),
|
||||
Arc::new(UInt8Vector::from_values(0..10)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(UInt32Vector::from_vec(vec![3])),
|
||||
10,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(UInt32Vector::from_vec(vec![6])),
|
||||
Arc::new(UInt16Vector::from_vec(vec![6])),
|
||||
10,
|
||||
)),
|
||||
];
|
||||
@@ -244,12 +262,17 @@ mod tests {
|
||||
assert!(matches!(vector.get(i), Value::UInt64(v) if v == 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clip_fn_float() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
// eval with floats
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from_values(0..10)),
|
||||
Arc::new(Int8Vector::from_values(0..10)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![3])),
|
||||
Arc::new(UInt32Vector::from_vec(vec![3])),
|
||||
10,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
|
||||
@@ -14,41 +14,18 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::arrow::array::PrimitiveArray;
|
||||
use datatypes::arrow::compute::cast::primitive_to_primitive;
|
||||
use datatypes::arrow::datatypes::DataType::Float64;
|
||||
use common_query::error::{self, Result};
|
||||
use datatypes::arrow::compute::cast;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::type_id::LogicalTypeId;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{Float64Vector, PrimitiveVector, Vector, VectorRef};
|
||||
use datatypes::{arrow, with_match_primitive_type_id};
|
||||
use snafu::{ensure, Snafu};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display(
|
||||
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||
expect,
|
||||
actual,
|
||||
))]
|
||||
ArgsLenNotEnough { expect: usize, actual: usize },
|
||||
|
||||
#[snafu(display("The sample {} is empty", name))]
|
||||
SampleEmpty { name: String },
|
||||
|
||||
#[snafu(display(
|
||||
"The length of the len1: {} don't match the length of the len2: {}",
|
||||
len1,
|
||||
len2,
|
||||
))]
|
||||
LenNotEquals { len1: usize, len2: usize },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
use datatypes::vectors::{Float64Vector, Vector, VectorRef};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
/* search the biggest number that smaller than x in xp */
|
||||
fn linear_search_ascending_vector(x: Value, xp: &PrimitiveVector<f64>) -> usize {
|
||||
fn linear_search_ascending_vector(x: Value, xp: &Float64Vector) -> usize {
|
||||
for i in 0..xp.len() {
|
||||
if x < xp.get(i) {
|
||||
return i - 1;
|
||||
@@ -58,7 +35,7 @@ fn linear_search_ascending_vector(x: Value, xp: &PrimitiveVector<f64>) -> usize
|
||||
}
|
||||
|
||||
/* search the biggest number that smaller than x in xp */
|
||||
fn binary_search_ascending_vector(key: Value, xp: &PrimitiveVector<f64>) -> usize {
|
||||
fn binary_search_ascending_vector(key: Value, xp: &Float64Vector) -> usize {
|
||||
let mut left = 0;
|
||||
let mut right = xp.len();
|
||||
/* If len <= 4 use linear search. */
|
||||
@@ -77,27 +54,33 @@ fn binary_search_ascending_vector(key: Value, xp: &PrimitiveVector<f64>) -> usiz
|
||||
left - 1
|
||||
}
|
||||
|
||||
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<PrimitiveVector<f64>> {
|
||||
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<Float64Vector> {
|
||||
with_match_primitive_type_id!(arg.data_type().logical_type_id(), |$S| {
|
||||
let tmp = arg.to_arrow_array();
|
||||
let from = tmp.as_any().downcast_ref::<PrimitiveArray<$S>>().expect("cast failed");
|
||||
let array = primitive_to_primitive(from, &Float64);
|
||||
Ok(PrimitiveVector::new(array))
|
||||
let array = cast(&tmp, &ArrowDataType::Float64).context(error::TypeCastSnafu {
|
||||
typ: ArrowDataType::Float64,
|
||||
})?;
|
||||
// Safety: array has been cast to Float64Array.
|
||||
Ok(Float64Vector::try_from_arrow_array(array).unwrap())
|
||||
},{
|
||||
unreachable!()
|
||||
})
|
||||
}
|
||||
|
||||
/// https://github.com/numpy/numpy/blob/b101756ac02e390d605b2febcded30a1da50cc2c/numpy/core/src/multiarray/compiled_base.c#L491
|
||||
#[allow(unused)]
|
||||
pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
let mut left = None;
|
||||
let mut right = None;
|
||||
|
||||
ensure!(
|
||||
args.len() >= 3,
|
||||
ArgsLenNotEnoughSnafu {
|
||||
expect: 3_usize,
|
||||
actual: args.len()
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||
3,
|
||||
args.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
@@ -109,9 +92,12 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
if args.len() > 3 {
|
||||
ensure!(
|
||||
args.len() == 5,
|
||||
ArgsLenNotEnoughSnafu {
|
||||
expect: 5_usize,
|
||||
actual: args.len()
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||
5,
|
||||
args.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
@@ -123,14 +109,32 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
.get_data(0);
|
||||
}
|
||||
|
||||
ensure!(x.len() != 0, SampleEmptySnafu { name: "x" });
|
||||
ensure!(xp.len() != 0, SampleEmptySnafu { name: "xp" });
|
||||
ensure!(fp.len() != 0, SampleEmptySnafu { name: "fp" });
|
||||
ensure!(
|
||||
x.len() != 0,
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The sample x is empty",
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
xp.len() != 0,
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The sample xp is empty",
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
fp.len() != 0,
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The sample fp is empty",
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
xp.len() == fp.len(),
|
||||
LenNotEqualsSnafu {
|
||||
len1: xp.len(),
|
||||
len2: fp.len(),
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the len1: {} don't match the length of the len2: {}",
|
||||
xp.len(),
|
||||
fp.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
@@ -147,7 +151,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
|
||||
let res;
|
||||
if xp.len() == 1 {
|
||||
res = x
|
||||
let datas = x
|
||||
.iter_data()
|
||||
.map(|x| {
|
||||
if Value::from(x) < xp.get(0) {
|
||||
@@ -158,7 +162,8 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
fp.get_data(0)
|
||||
}
|
||||
})
|
||||
.collect::<Float64Vector>();
|
||||
.collect::<Vec<_>>();
|
||||
res = Float64Vector::from(datas);
|
||||
} else {
|
||||
let mut j = 0;
|
||||
/* only pre-calculate slopes if there are relatively few of them. */
|
||||
@@ -185,7 +190,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
}
|
||||
slopes = Some(slopes_tmp);
|
||||
}
|
||||
res = x
|
||||
let datas = x
|
||||
.iter_data()
|
||||
.map(|x| match x {
|
||||
Some(xi) => {
|
||||
@@ -248,7 +253,8 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Float64Vector>();
|
||||
.collect::<Vec<_>>();
|
||||
res = Float64Vector::from(datas);
|
||||
}
|
||||
Ok(Arc::new(res) as _)
|
||||
}
|
||||
@@ -257,8 +263,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::prelude::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{Int32Vector, Int64Vector, PrimitiveVectorBuilder};
|
||||
use datatypes::vectors::{Int32Vector, Int64Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -341,12 +346,8 @@ mod tests {
|
||||
assert!(matches!(vector.get(0), Value::Float64(v) if v==x[0] as f64));
|
||||
|
||||
// x=None output:Null
|
||||
let input = [None, Some(0.0), Some(0.3)];
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let x = builder.finish();
|
||||
let input = vec![None, Some(0.0), Some(0.3)];
|
||||
let x = Float64Vector::from(input);
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(x),
|
||||
Arc::new(Int64Vector::from_vec(xp)),
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
|
||||
@@ -17,16 +17,17 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::error::{
|
||||
ArrowComputeSnafu, IntoVectorSnafu, Result, TypeCastSnafu, UnsupportedInputDataTypeSnafu,
|
||||
};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::arrow::compute::arithmetics;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDatatype;
|
||||
use datatypes::arrow::scalar::PrimitiveScalar;
|
||||
use datatypes::arrow::compute;
|
||||
use datatypes::arrow::datatypes::{DataType as ArrowDatatype, Int64Type};
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::{TimestampVector, VectorRef};
|
||||
use datatypes::vectors::{TimestampMillisecondVector, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
@@ -40,7 +41,7 @@ impl Function for FromUnixtimeFunction {
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::timestamp_millis_datatype())
|
||||
Ok(ConcreteDataType::timestamp_millisecond_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@@ -56,14 +57,18 @@ impl Function for FromUnixtimeFunction {
|
||||
ConcreteDataType::Int64(_) => {
|
||||
let array = columns[0].to_arrow_array();
|
||||
// Our timestamp vector's time unit is millisecond
|
||||
let array = arithmetics::mul_scalar(
|
||||
&*array,
|
||||
&PrimitiveScalar::new(ArrowDatatype::Int64, Some(1000i64)),
|
||||
);
|
||||
let array = compute::multiply_scalar_dyn::<Int64Type>(&array, 1000i64)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
|
||||
let arrow_datatype = &self.return_type(&[]).unwrap().as_arrow_type();
|
||||
Ok(Arc::new(
|
||||
TimestampVector::try_from_arrow_array(array).context(IntoVectorSnafu {
|
||||
data_type: ArrowDatatype::Int64,
|
||||
TimestampMillisecondVector::try_from_arrow_array(
|
||||
compute::cast(&array, arrow_datatype).context(TypeCastSnafu {
|
||||
typ: ArrowDatatype::Int64,
|
||||
})?,
|
||||
)
|
||||
.context(IntoVectorSnafu {
|
||||
data_type: arrow_datatype.clone(),
|
||||
})?,
|
||||
))
|
||||
}
|
||||
@@ -71,8 +76,7 @@ impl Function for FromUnixtimeFunction {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail()
|
||||
.map_err(|e| e.into()),
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -96,7 +100,7 @@ mod tests {
|
||||
let f = FromUnixtimeFunction::default();
|
||||
assert_eq!("from_unixtime", f.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
f.return_type(&[]).unwrap()
|
||||
);
|
||||
|
||||
|
||||
@@ -19,7 +19,8 @@ use common_query::prelude::{
|
||||
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf, ScalarValue,
|
||||
};
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::{ConcreteDataType, VectorHelper};
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::Helper;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::scalars::function::{FunctionContext, FunctionRef};
|
||||
@@ -47,7 +48,7 @@ pub fn create_udf(func: FunctionRef) -> ScalarUdf {
|
||||
let args: Result<Vec<_>, DataTypeError> = args
|
||||
.iter()
|
||||
.map(|arg| match arg {
|
||||
ColumnarValue::Scalar(v) => VectorHelper::try_from_scalar_value(v.clone(), rows),
|
||||
ColumnarValue::Scalar(v) => Helper::try_from_scalar_value(v.clone(), rows),
|
||||
ColumnarValue::Vector(v) => Ok(v.clone()),
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -8,11 +8,12 @@ license = "Apache-2.0"
|
||||
api = { path = "../../api" }
|
||||
async-trait = "0.1"
|
||||
common-base = { path = "../base" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
common-grpc = { path = "../grpc" }
|
||||
common-query = { path = "../query" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
common-time = { path = "../time" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-query = { path = "../query" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
table = { path = "../../table" }
|
||||
|
||||
@@ -14,30 +14,27 @@
|
||||
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{SemanticType, Values};
|
||||
use api::v1::{AddColumn, AddColumns, Column, ColumnDataType, ColumnDef, CreateExpr};
|
||||
use common_base::BitVec;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use common_time::{Date, DateTime};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::{ValueRef, VectorRef};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorBuilder;
|
||||
use datatypes::vectors::MutableVector;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequest};
|
||||
use table::Table;
|
||||
|
||||
use crate::error::{
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DecodeInsertSnafu,
|
||||
DuplicatedTimestampColumnSnafu, IllegalInsertDataSnafu, InvalidColumnProtoSnafu,
|
||||
MissingTimestampColumnSnafu, Result,
|
||||
ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateVectorSnafu, DuplicatedTimestampColumnSnafu,
|
||||
IllegalInsertDataSnafu, InvalidColumnProtoSnafu, MissingTimestampColumnSnafu, Result,
|
||||
};
|
||||
const TAG_SEMANTIC_TYPE: i32 = SemanticType::Tag as i32;
|
||||
const TIMESTAMP_SEMANTIC_TYPE: i32 = SemanticType::Timestamp as i32;
|
||||
@@ -52,35 +49,25 @@ fn build_column_def(column_name: &str, datatype: i32, nullable: bool) -> ColumnD
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_new_columns(
|
||||
schema: &SchemaRef,
|
||||
insert_batches: &[InsertBatch],
|
||||
) -> Result<Option<AddColumns>> {
|
||||
pub fn find_new_columns(schema: &SchemaRef, columns: &[Column]) -> Result<Option<AddColumns>> {
|
||||
let mut columns_to_add = Vec::default();
|
||||
let mut new_columns: HashSet<String> = HashSet::default();
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
if *row_count == 0 || columns.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if schema.column_schema_by_name(column_name).is_none() && !new_columns.contains(column_name)
|
||||
{
|
||||
if schema.column_schema_by_name(column_name).is_none()
|
||||
&& !new_columns.contains(column_name)
|
||||
{
|
||||
let column_def = Some(build_column_def(column_name, *datatype, true));
|
||||
columns_to_add.push(AddColumn {
|
||||
column_def,
|
||||
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
|
||||
});
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
let column_def = Some(build_column_def(column_name, *datatype, true));
|
||||
columns_to_add.push(AddColumn {
|
||||
column_def,
|
||||
is_key: *semantic_type == TAG_SEMANTIC_TYPE,
|
||||
});
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -112,7 +99,7 @@ pub fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
|
||||
let column_datatype = wrapper.datatype();
|
||||
|
||||
let rows = rows as usize;
|
||||
let mut vector = VectorBuilder::with_capacity(wrapper.into(), rows);
|
||||
let mut vector = ConcreteDataType::from(wrapper).create_mutable_vector(rows);
|
||||
|
||||
if let Some(values) = &column.values {
|
||||
let values = collect_column_values(column_datatype, values);
|
||||
@@ -123,21 +110,31 @@ pub fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
|
||||
|
||||
for i in 0..rows {
|
||||
if let Some(true) = nulls_iter.next() {
|
||||
vector.push_null();
|
||||
vector
|
||||
.push_value_ref(ValueRef::Null)
|
||||
.context(CreateVectorSnafu)?;
|
||||
} else {
|
||||
let value_ref = values_iter.next().context(InvalidColumnProtoSnafu {
|
||||
err_msg: format!(
|
||||
"value not found at position {} of column {}",
|
||||
i, &column.column_name
|
||||
),
|
||||
})?;
|
||||
vector.try_push_ref(value_ref).context(CreateVectorSnafu)?;
|
||||
let value_ref = values_iter
|
||||
.next()
|
||||
.with_context(|| InvalidColumnProtoSnafu {
|
||||
err_msg: format!(
|
||||
"value not found at position {} of column {}",
|
||||
i, &column.column_name
|
||||
),
|
||||
})?;
|
||||
vector
|
||||
.push_value_ref(value_ref)
|
||||
.context(CreateVectorSnafu)?;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
(0..rows).for_each(|_| vector.push_null());
|
||||
(0..rows).try_for_each(|_| {
|
||||
vector
|
||||
.push_value_ref(ValueRef::Null)
|
||||
.context(CreateVectorSnafu)
|
||||
})?;
|
||||
}
|
||||
Ok(vector.finish())
|
||||
Ok(vector.to_vector())
|
||||
}
|
||||
|
||||
fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Vec<ValueRef> {
|
||||
@@ -187,9 +184,24 @@ fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Ve
|
||||
DateTime::new(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::Timestamp => {
|
||||
collect_values!(values.ts_millis_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::from_millis(*v)
|
||||
ColumnDataType::TimestampSecond => {
|
||||
collect_values!(values.ts_second_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_second(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::TimestampMillisecond => {
|
||||
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_millisecond(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::TimestampMicrosecond => {
|
||||
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_microsecond(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::TimestampNanosecond => {
|
||||
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_nanosecond(*v)
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -201,92 +213,84 @@ pub fn build_create_expr_from_insertion(
|
||||
schema_name: &str,
|
||||
table_id: Option<TableId>,
|
||||
table_name: &str,
|
||||
insert_batches: &[InsertBatch],
|
||||
columns: &[Column],
|
||||
) -> Result<CreateExpr> {
|
||||
let mut new_columns: HashSet<String> = HashSet::default();
|
||||
let mut column_defs = Vec::default();
|
||||
let mut primary_key_indices = Vec::default();
|
||||
let mut timestamp_index = usize::MAX;
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
if *row_count == 0 || columns.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if !new_columns.contains(column_name) {
|
||||
let mut is_nullable = true;
|
||||
match *semantic_type {
|
||||
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
|
||||
TIMESTAMP_SEMANTIC_TYPE => {
|
||||
ensure!(
|
||||
timestamp_index == usize::MAX,
|
||||
DuplicatedTimestampColumnSnafu {
|
||||
exists: &columns[timestamp_index].column_name,
|
||||
duplicated: column_name,
|
||||
}
|
||||
);
|
||||
timestamp_index = column_defs.len();
|
||||
// Timestamp column must not be null.
|
||||
is_nullable = false;
|
||||
}
|
||||
_ => {}
|
||||
for Column {
|
||||
column_name,
|
||||
semantic_type,
|
||||
datatype,
|
||||
..
|
||||
} in columns
|
||||
{
|
||||
if !new_columns.contains(column_name) {
|
||||
let mut is_nullable = true;
|
||||
match *semantic_type {
|
||||
TAG_SEMANTIC_TYPE => primary_key_indices.push(column_defs.len()),
|
||||
TIMESTAMP_SEMANTIC_TYPE => {
|
||||
ensure!(
|
||||
timestamp_index == usize::MAX,
|
||||
DuplicatedTimestampColumnSnafu {
|
||||
exists: &columns[timestamp_index].column_name,
|
||||
duplicated: column_name,
|
||||
}
|
||||
);
|
||||
timestamp_index = column_defs.len();
|
||||
// Timestamp column must not be null.
|
||||
is_nullable = false;
|
||||
}
|
||||
|
||||
let column_def = build_column_def(column_name, *datatype, is_nullable);
|
||||
column_defs.push(column_def);
|
||||
new_columns.insert(column_name.to_string());
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let column_def = build_column_def(column_name, *datatype, is_nullable);
|
||||
column_defs.push(column_def);
|
||||
new_columns.insert(column_name.to_string());
|
||||
}
|
||||
|
||||
ensure!(
|
||||
timestamp_index != usize::MAX,
|
||||
MissingTimestampColumnSnafu { msg: table_name }
|
||||
);
|
||||
let timestamp_field_name = columns[timestamp_index].column_name.clone();
|
||||
|
||||
let primary_keys = primary_key_indices
|
||||
.iter()
|
||||
.map(|idx| columns[*idx].column_name.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expr = CreateExpr {
|
||||
catalog_name: Some(catalog_name.to_string()),
|
||||
schema_name: Some(schema_name.to_string()),
|
||||
table_name: table_name.to_string(),
|
||||
desc: Some("Created on insertion".to_string()),
|
||||
column_defs,
|
||||
time_index: timestamp_field_name,
|
||||
primary_keys,
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
table_id,
|
||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||
};
|
||||
|
||||
return Ok(expr);
|
||||
}
|
||||
|
||||
IllegalInsertDataSnafu.fail()
|
||||
ensure!(
|
||||
timestamp_index != usize::MAX,
|
||||
MissingTimestampColumnSnafu { msg: table_name }
|
||||
);
|
||||
let timestamp_field_name = columns[timestamp_index].column_name.clone();
|
||||
|
||||
let primary_keys = primary_key_indices
|
||||
.iter()
|
||||
.map(|idx| columns[*idx].column_name.clone())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let expr = CreateExpr {
|
||||
catalog_name: Some(catalog_name.to_string()),
|
||||
schema_name: Some(schema_name.to_string()),
|
||||
table_name: table_name.to_string(),
|
||||
desc: Some("Created on insertion".to_string()),
|
||||
column_defs,
|
||||
time_index: timestamp_field_name,
|
||||
primary_keys,
|
||||
create_if_not_exists: true,
|
||||
table_options: Default::default(),
|
||||
table_id,
|
||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
pub fn insertion_expr_to_request(
|
||||
catalog_name: &str,
|
||||
schema_name: &str,
|
||||
table_name: &str,
|
||||
insert_batches: Vec<InsertBatch>,
|
||||
insert_batches: Vec<(Vec<Column>, u32)>,
|
||||
table: Arc<dyn Table>,
|
||||
) -> Result<InsertRequest> {
|
||||
let schema = table.schema();
|
||||
let mut columns_builders = HashMap::with_capacity(schema.column_schemas().len());
|
||||
|
||||
for InsertBatch { columns, row_count } in insert_batches {
|
||||
for (columns, row_count) in insert_batches {
|
||||
for Column {
|
||||
column_name,
|
||||
values,
|
||||
@@ -310,10 +314,7 @@ pub fn insertion_expr_to_request(
|
||||
},
|
||||
)?;
|
||||
let data_type = &column_schema.data_type;
|
||||
entry.insert(VectorBuilder::with_capacity(
|
||||
data_type.clone(),
|
||||
row_count as usize,
|
||||
))
|
||||
entry.insert(data_type.create_mutable_vector(row_count as usize))
|
||||
}
|
||||
};
|
||||
add_values_to_builder(vector_builder, values, row_count as usize, null_mask)?;
|
||||
@@ -321,7 +322,7 @@ pub fn insertion_expr_to_request(
|
||||
}
|
||||
let columns_values = columns_builders
|
||||
.into_iter()
|
||||
.map(|(column_name, mut vector_builder)| (column_name, vector_builder.finish()))
|
||||
.map(|(column_name, mut vector_builder)| (column_name, vector_builder.to_vector()))
|
||||
.collect();
|
||||
|
||||
Ok(InsertRequest {
|
||||
@@ -332,16 +333,8 @@ pub fn insertion_expr_to_request(
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn insert_batches(bytes_vec: &[Vec<u8>]) -> Result<Vec<InsertBatch>> {
|
||||
bytes_vec
|
||||
.iter()
|
||||
.map(|bytes| bytes.deref().try_into().context(DecodeInsertSnafu))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn add_values_to_builder(
|
||||
builder: &mut VectorBuilder,
|
||||
builder: &mut Box<dyn MutableVector>,
|
||||
values: Values,
|
||||
row_count: usize,
|
||||
null_mask: Vec<u8>,
|
||||
@@ -352,9 +345,11 @@ fn add_values_to_builder(
|
||||
if null_mask.is_empty() {
|
||||
ensure!(values.len() == row_count, IllegalInsertDataSnafu);
|
||||
|
||||
values.iter().for_each(|value| {
|
||||
builder.push(value);
|
||||
});
|
||||
values.iter().try_for_each(|value| {
|
||||
builder
|
||||
.push_value_ref(value.as_value_ref())
|
||||
.context(CreateVectorSnafu)
|
||||
})?;
|
||||
} else {
|
||||
let null_mask = BitVec::from_vec(null_mask);
|
||||
ensure!(
|
||||
@@ -365,9 +360,13 @@ fn add_values_to_builder(
|
||||
let mut idx_of_values = 0;
|
||||
for idx in 0..row_count {
|
||||
match is_null(&null_mask, idx) {
|
||||
Some(true) => builder.push(&Value::Null),
|
||||
Some(true) => builder
|
||||
.push_value_ref(ValueRef::Null)
|
||||
.context(CreateVectorSnafu)?,
|
||||
_ => {
|
||||
builder.push(&values[idx_of_values]);
|
||||
builder
|
||||
.push_value_ref(values[idx_of_values].as_value_ref())
|
||||
.context(CreateVectorSnafu)?;
|
||||
idx_of_values += 1
|
||||
}
|
||||
}
|
||||
@@ -447,9 +446,9 @@ fn convert_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
|
||||
.map(|v| Value::Date(v.into()))
|
||||
.collect(),
|
||||
ConcreteDataType::Timestamp(_) => values
|
||||
.ts_millis_values
|
||||
.ts_millisecond_values
|
||||
.into_iter()
|
||||
.map(|v| Value::Timestamp(Timestamp::from_millis(v)))
|
||||
.map(|v| Value::Timestamp(Timestamp::new_millisecond(v)))
|
||||
.collect(),
|
||||
ConcreteDataType::Null(_) => unreachable!(),
|
||||
ConcreteDataType::List(_) => unreachable!(),
|
||||
@@ -466,9 +465,8 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{self, SemanticType, Values};
|
||||
use api::v1::{insert_expr, Column, ColumnDataType};
|
||||
use api::v1::{Column, ColumnDataType};
|
||||
use common_base::BitVec;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_query::prelude::Expr;
|
||||
@@ -482,11 +480,12 @@ mod tests {
|
||||
use table::Table;
|
||||
|
||||
use super::{
|
||||
build_create_expr_from_insertion, convert_values, find_new_columns, insert_batches,
|
||||
insertion_expr_to_request, is_null, TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
|
||||
build_create_expr_from_insertion, convert_values, insertion_expr_to_request, is_null,
|
||||
TAG_SEMANTIC_TYPE, TIMESTAMP_SEMANTIC_TYPE,
|
||||
};
|
||||
use crate::error;
|
||||
use crate::error::ColumnDataTypeSnafu;
|
||||
use crate::insert::find_new_columns;
|
||||
|
||||
#[inline]
|
||||
fn build_column_schema(
|
||||
@@ -511,11 +510,10 @@ mod tests {
|
||||
|
||||
assert!(build_create_expr_from_insertion("", "", table_id, table_name, &[]).is_err());
|
||||
|
||||
let mock_batch_bytes = mock_insert_batches();
|
||||
let insert_batches = insert_batches(&mock_batch_bytes).unwrap();
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let create_expr =
|
||||
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batches)
|
||||
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batch.0)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(table_id, create_expr.table_id);
|
||||
@@ -573,7 +571,7 @@ mod tests {
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::from(
|
||||
ColumnDataTypeWrapper::try_new(
|
||||
column_defs
|
||||
@@ -601,9 +599,9 @@ mod tests {
|
||||
|
||||
assert!(find_new_columns(&schema, &[]).unwrap().is_none());
|
||||
|
||||
let mock_insert_bytes = mock_insert_batches();
|
||||
let insert_batches = insert_batches(&mock_insert_bytes).unwrap();
|
||||
let add_columns = find_new_columns(&schema, &insert_batches).unwrap().unwrap();
|
||||
let insert_batch = mock_insert_batch();
|
||||
|
||||
let add_columns = find_new_columns(&schema, &insert_batch.0).unwrap().unwrap();
|
||||
|
||||
assert_eq!(2, add_columns.add_columns.len());
|
||||
let host_column = &add_columns.add_columns[0];
|
||||
@@ -633,10 +631,7 @@ mod tests {
|
||||
fn test_insertion_expr_to_request() {
|
||||
let table: Arc<dyn Table> = Arc::new(DemoTable {});
|
||||
|
||||
let values = insert_expr::Values {
|
||||
values: mock_insert_batches(),
|
||||
};
|
||||
let insert_batches = insert_batches(&values.values).unwrap();
|
||||
let insert_batches = vec![mock_insert_batch()];
|
||||
let insert_req =
|
||||
insertion_expr_to_request("greptime", "public", "demo", insert_batches, table).unwrap();
|
||||
|
||||
@@ -657,8 +652,8 @@ mod tests {
|
||||
assert_eq!(Value::Float64(0.1.into()), memory.get(1));
|
||||
|
||||
let ts = insert_req.columns_values.get("ts").unwrap();
|
||||
assert_eq!(Value::Timestamp(Timestamp::from_millis(100)), ts.get(0));
|
||||
assert_eq!(Value::Timestamp(Timestamp::from_millis(101)), ts.get(1));
|
||||
assert_eq!(Value::Timestamp(Timestamp::new_millisecond(100)), ts.get(0));
|
||||
assert_eq!(Value::Timestamp(Timestamp::new_millisecond(101)), ts.get(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -708,8 +703,12 @@ mod tests {
|
||||
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
|
||||
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), true)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
|
||||
Arc::new(
|
||||
@@ -734,7 +733,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn mock_insert_batches() -> Vec<Vec<u8>> {
|
||||
fn mock_insert_batch() -> (Vec<Column>, u32) {
|
||||
let row_count = 2;
|
||||
|
||||
let host_vals = column::Values {
|
||||
@@ -774,7 +773,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let ts_vals = column::Values {
|
||||
ts_millis_values: vec![100, 101],
|
||||
ts_millisecond_values: vec![100, 101],
|
||||
..Default::default()
|
||||
};
|
||||
let ts_column = Column {
|
||||
@@ -782,13 +781,12 @@ mod tests {
|
||||
semantic_type: TIMESTAMP_SEMANTIC_TYPE,
|
||||
values: Some(ts_vals),
|
||||
null_mask: vec![0],
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||
};
|
||||
|
||||
let insert_batch = InsertBatch {
|
||||
columns: vec![host_column, cpu_column, mem_column, ts_column],
|
||||
(
|
||||
vec![host_column, cpu_column, mem_column, ts_column],
|
||||
row_count,
|
||||
};
|
||||
vec![insert_batch.into()]
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,5 +20,5 @@ mod insert;
|
||||
pub use alter::{alter_expr_to_request, create_expr_to_request, create_table_schema};
|
||||
pub use insert::{
|
||||
build_alter_table_request, build_create_expr_from_insertion, column_to_vector,
|
||||
find_new_columns, insert_batches, insertion_expr_to_request,
|
||||
find_new_columns, insertion_expr_to_request,
|
||||
};
|
||||
|
||||
@@ -12,11 +12,9 @@ common-error = { path = "../error" }
|
||||
common-query = { path = "../query" }
|
||||
common-recordbatch = { path = "../recordbatch" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
datatypes = { path = "../../datatypes" }
|
||||
dashmap = "5.4"
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
tonic = "0.8"
|
||||
|
||||
@@ -14,9 +14,7 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use api::DecodeError;
|
||||
use common_error::prelude::{ErrorExt, StatusCode};
|
||||
use datafusion::error::DataFusionError;
|
||||
use snafu::{Backtrace, ErrorCompat, Snafu};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -24,33 +22,9 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("Unexpected empty physical plan type: {}", name))]
|
||||
EmptyPhysicalPlan { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unexpected empty physical expr: {}", name))]
|
||||
EmptyPhysicalExpr { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unsupported datafusion execution plan: {}", name))]
|
||||
UnsupportedDfPlan { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Unsupported datafusion physical expr: {}", name))]
|
||||
UnsupportedDfExpr { name: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Missing required field in protobuf, field: {}", field))]
|
||||
MissingField { field: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Failed to new datafusion projection exec, source: {}", source))]
|
||||
NewProjection {
|
||||
source: DataFusionError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to decode physical plan node, source: {}", source))]
|
||||
DecodePhysicalPlanNode {
|
||||
source: DecodeError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Write type mismatch, column name: {}, expected: {}, actual: {}",
|
||||
column_name,
|
||||
@@ -89,17 +63,8 @@ pub enum Error {
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
Error::EmptyPhysicalPlan { .. }
|
||||
| Error::EmptyPhysicalExpr { .. }
|
||||
| Error::MissingField { .. }
|
||||
| Error::TypeMismatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::UnsupportedDfPlan { .. } | Error::UnsupportedDfExpr { .. } => {
|
||||
StatusCode::Unsupported
|
||||
}
|
||||
Error::NewProjection { .. }
|
||||
| Error::DecodePhysicalPlanNode { .. }
|
||||
| Error::CreateChannel { .. }
|
||||
| Error::Conversion { .. } => StatusCode::Internal,
|
||||
Error::MissingField { .. } | Error::TypeMismatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::CreateChannel { .. } | Error::Conversion { .. } => StatusCode::Internal,
|
||||
Error::CollectRecordBatches { source } => source.status_code(),
|
||||
Error::ColumnDataType { source } => source.status_code(),
|
||||
}
|
||||
@@ -126,50 +91,6 @@ mod tests {
|
||||
None
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_physical_plan_error() {
|
||||
let e = throw_none_option()
|
||||
.context(EmptyPhysicalPlanSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_physical_expr_error() {
|
||||
let e = throw_none_option()
|
||||
.context(EmptyPhysicalExprSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unsupported_df_plan_error() {
|
||||
let e = throw_none_option()
|
||||
.context(UnsupportedDfPlanSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Unsupported);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unsupported_df_expr_error() {
|
||||
let e = throw_none_option()
|
||||
.context(UnsupportedDfExprSnafu { name: "test" })
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Unsupported);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_field_error() {
|
||||
let e = throw_none_option()
|
||||
@@ -181,33 +102,6 @@ mod tests {
|
||||
assert_eq!(e.status_code(), StatusCode::InvalidArguments);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_new_projection_error() {
|
||||
fn throw_df_error() -> StdResult<DataFusionError> {
|
||||
Err(DataFusionError::NotImplemented("".to_string()))
|
||||
}
|
||||
|
||||
let e = throw_df_error().context(NewProjectionSnafu).err().unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Internal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_physical_plan_node_error() {
|
||||
fn throw_decode_error() -> StdResult<DecodeError> {
|
||||
Err(DecodeError::new("test"))
|
||||
}
|
||||
|
||||
let e = throw_decode_error()
|
||||
.context(DecodePhysicalPlanNodeSnafu)
|
||||
.err()
|
||||
.unwrap();
|
||||
|
||||
assert!(e.backtrace_opt().is_some());
|
||||
assert_eq!(e.status_code(), StatusCode::Internal);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_type_mismatch_error() {
|
||||
let e = throw_none_option()
|
||||
|
||||
@@ -14,10 +14,7 @@
|
||||
|
||||
pub mod channel_manager;
|
||||
pub mod error;
|
||||
pub mod physical;
|
||||
pub mod select;
|
||||
pub mod writer;
|
||||
|
||||
pub use error::Error;
|
||||
pub use physical::plan::{DefaultAsPlanImpl, MockExecution};
|
||||
pub use physical::AsExecutionPlan;
|
||||
|
||||
@@ -1,100 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::result::Result;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::codec;
|
||||
use datafusion::physical_plan::expressions::Column as DfColumn;
|
||||
use datafusion::physical_plan::PhysicalExpr as DfPhysicalExpr;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{EmptyPhysicalExprSnafu, Error, UnsupportedDfExprSnafu};
|
||||
|
||||
// grpc -> datafusion (physical expr)
|
||||
pub(crate) fn parse_grpc_physical_expr(
|
||||
proto: &codec::PhysicalExprNode,
|
||||
) -> Result<Arc<dyn DfPhysicalExpr>, Error> {
|
||||
let expr_type = proto.expr_type.as_ref().context(EmptyPhysicalExprSnafu {
|
||||
name: format!("{:?}", proto),
|
||||
})?;
|
||||
|
||||
// TODO(fys): impl other physical expr
|
||||
let pexpr: Arc<dyn DfPhysicalExpr> = match expr_type {
|
||||
codec::physical_expr_node::ExprType::Column(c) => {
|
||||
let pcol = DfColumn::new(&c.name, c.index as usize);
|
||||
Arc::new(pcol)
|
||||
}
|
||||
};
|
||||
Ok(pexpr)
|
||||
}
|
||||
|
||||
// datafusion -> grpc (physical expr)
|
||||
pub(crate) fn parse_df_physical_expr(
|
||||
df_expr: Arc<dyn DfPhysicalExpr>,
|
||||
) -> Result<codec::PhysicalExprNode, Error> {
|
||||
let expr = df_expr.as_any();
|
||||
|
||||
// TODO(fys): impl other physical expr
|
||||
if let Some(expr) = expr.downcast_ref::<DfColumn>() {
|
||||
Ok(codec::PhysicalExprNode {
|
||||
expr_type: Some(codec::physical_expr_node::ExprType::Column(
|
||||
codec::PhysicalColumn {
|
||||
name: expr.name().to_string(),
|
||||
index: expr.index() as u64,
|
||||
},
|
||||
)),
|
||||
})
|
||||
} else {
|
||||
UnsupportedDfExprSnafu {
|
||||
name: df_expr.to_string(),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::codec::physical_expr_node::ExprType::Column;
|
||||
use api::v1::codec::{PhysicalColumn, PhysicalExprNode};
|
||||
use datafusion::physical_plan::expressions::Column as DfColumn;
|
||||
use datafusion::physical_plan::PhysicalExpr;
|
||||
|
||||
use crate::physical::expr::{parse_df_physical_expr, parse_grpc_physical_expr};
|
||||
|
||||
#[test]
|
||||
fn test_column_convert() {
|
||||
// mock df_column_expr
|
||||
let df_column = DfColumn::new("name", 11);
|
||||
let df_column_clone = df_column.clone();
|
||||
let df_expr = Arc::new(df_column) as Arc<dyn PhysicalExpr>;
|
||||
|
||||
// mock grpc_column_expr
|
||||
let grpc_expr = PhysicalExprNode {
|
||||
expr_type: Some(Column(PhysicalColumn {
|
||||
name: "name".to_owned(),
|
||||
index: 11,
|
||||
})),
|
||||
};
|
||||
|
||||
let result = parse_df_physical_expr(df_expr).unwrap();
|
||||
assert_eq!(grpc_expr, result);
|
||||
|
||||
let result = parse_grpc_physical_expr(&grpc_expr).unwrap();
|
||||
let df_column = result.as_any().downcast_ref::<DfColumn>().unwrap();
|
||||
assert_eq!(df_column_clone, df_column.to_owned());
|
||||
}
|
||||
}
|
||||
@@ -1,280 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::ops::Deref;
|
||||
use std::result::Result;
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::codec::physical_plan_node::PhysicalPlanType;
|
||||
use api::v1::codec::{MockInputExecNode, PhysicalPlanNode, ProjectionExecNode};
|
||||
use async_trait::async_trait;
|
||||
use datafusion::execution::runtime_env::RuntimeEnv;
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datafusion::physical_plan::memory::MemoryStream;
|
||||
use datafusion::physical_plan::projection::ProjectionExec;
|
||||
use datafusion::physical_plan::{
|
||||
ExecutionPlan, PhysicalExpr, SendableRecordBatchStream, Statistics,
|
||||
};
|
||||
use datafusion::record_batch::RecordBatch;
|
||||
use datatypes::arrow::array::{PrimitiveArray, Utf8Array};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema, SchemaRef};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{
|
||||
DecodePhysicalPlanNodeSnafu, EmptyPhysicalPlanSnafu, Error, MissingFieldSnafu,
|
||||
NewProjectionSnafu, UnsupportedDfPlanSnafu,
|
||||
};
|
||||
use crate::physical::{expr, AsExecutionPlan, ExecutionPlanRef};
|
||||
|
||||
pub struct DefaultAsPlanImpl {
|
||||
pub bytes: Vec<u8>,
|
||||
}
|
||||
|
||||
impl AsExecutionPlan for DefaultAsPlanImpl {
|
||||
type Error = Error;
|
||||
|
||||
// Vec<u8> -> PhysicalPlanNode -> ExecutionPlanRef
|
||||
fn try_into_physical_plan(&self) -> Result<ExecutionPlanRef, Self::Error> {
|
||||
let physicalplan_node: PhysicalPlanNode = self
|
||||
.bytes
|
||||
.deref()
|
||||
.try_into()
|
||||
.context(DecodePhysicalPlanNodeSnafu)?;
|
||||
physicalplan_node.try_into_physical_plan()
|
||||
}
|
||||
|
||||
// ExecutionPlanRef -> PhysicalPlanNode -> Vec<u8>
|
||||
fn try_from_physical_plan(plan: ExecutionPlanRef) -> Result<Self, Self::Error>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
let bytes: Vec<u8> = PhysicalPlanNode::try_from_physical_plan(plan)?.into();
|
||||
Ok(DefaultAsPlanImpl { bytes })
|
||||
}
|
||||
}
|
||||
|
||||
impl AsExecutionPlan for PhysicalPlanNode {
|
||||
type Error = Error;
|
||||
|
||||
fn try_into_physical_plan(&self) -> Result<ExecutionPlanRef, Self::Error> {
|
||||
let plan = self
|
||||
.physical_plan_type
|
||||
.as_ref()
|
||||
.context(EmptyPhysicalPlanSnafu {
|
||||
name: format!("{:?}", self),
|
||||
})?;
|
||||
|
||||
// TODO(fys): impl other physical plan type
|
||||
match plan {
|
||||
PhysicalPlanType::Projection(projection) => {
|
||||
let input = if let Some(input) = &projection.input {
|
||||
input.as_ref().try_into_physical_plan()?
|
||||
} else {
|
||||
MissingFieldSnafu { field: "input" }.fail()?
|
||||
};
|
||||
let exprs = projection
|
||||
.expr
|
||||
.iter()
|
||||
.zip(projection.expr_name.iter())
|
||||
.map(|(expr, name)| {
|
||||
Ok((expr::parse_grpc_physical_expr(expr)?, name.to_string()))
|
||||
})
|
||||
.collect::<Result<Vec<(Arc<dyn PhysicalExpr>, String)>, Error>>()?;
|
||||
|
||||
let projection =
|
||||
ProjectionExec::try_new(exprs, input).context(NewProjectionSnafu)?;
|
||||
|
||||
Ok(Arc::new(projection))
|
||||
}
|
||||
PhysicalPlanType::Mock(mock) => Ok(Arc::new(MockExecution {
|
||||
name: mock.name.to_string(),
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_from_physical_plan(plan: ExecutionPlanRef) -> Result<Self, Self::Error>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
let plan = plan.as_any();
|
||||
|
||||
if let Some(exec) = plan.downcast_ref::<ProjectionExec>() {
|
||||
let input = PhysicalPlanNode::try_from_physical_plan(exec.input().to_owned())?;
|
||||
|
||||
let expr = exec
|
||||
.expr()
|
||||
.iter()
|
||||
.map(|expr| expr::parse_df_physical_expr(expr.0.clone()))
|
||||
.collect::<Result<Vec<_>, Error>>()?;
|
||||
|
||||
let expr_name = exec.expr().iter().map(|expr| expr.1.clone()).collect();
|
||||
|
||||
Ok(PhysicalPlanNode {
|
||||
physical_plan_type: Some(PhysicalPlanType::Projection(Box::new(
|
||||
ProjectionExecNode {
|
||||
input: Some(Box::new(input)),
|
||||
expr,
|
||||
expr_name,
|
||||
},
|
||||
))),
|
||||
})
|
||||
} else if let Some(exec) = plan.downcast_ref::<MockExecution>() {
|
||||
Ok(PhysicalPlanNode {
|
||||
physical_plan_type: Some(PhysicalPlanType::Mock(MockInputExecNode {
|
||||
name: exec.name.clone(),
|
||||
})),
|
||||
})
|
||||
} else {
|
||||
UnsupportedDfPlanSnafu {
|
||||
name: format!("{:?}", plan),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(fys): use "test" feature to enable it
|
||||
#[derive(Debug)]
|
||||
pub struct MockExecution {
|
||||
name: String,
|
||||
}
|
||||
|
||||
impl MockExecution {
|
||||
pub fn new(name: String) -> Self {
|
||||
Self { name }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExecutionPlan for MockExecution {
|
||||
fn as_any(&self) -> &dyn std::any::Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
let field1 = Field::new("id", DataType::UInt32, false);
|
||||
let field2 = Field::new("name", DataType::Utf8, false);
|
||||
let field3 = Field::new("age", DataType::UInt32, false);
|
||||
Arc::new(Schema::new(vec![field1, field2, field3]))
|
||||
}
|
||||
|
||||
fn output_partitioning(&self) -> datafusion::physical_plan::Partitioning {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn output_ordering(
|
||||
&self,
|
||||
) -> Option<&[datafusion::physical_plan::expressions::PhysicalSortExpr]> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<ExecutionPlanRef> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
&self,
|
||||
_children: Vec<ExecutionPlanRef>,
|
||||
) -> datafusion::error::Result<ExecutionPlanRef> {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
_runtime: Arc<RuntimeEnv>,
|
||||
) -> datafusion::error::Result<SendableRecordBatchStream> {
|
||||
let id_array = Arc::new(PrimitiveArray::from_slice([1u32, 2, 3, 4, 5]));
|
||||
let name_array = Arc::new(Utf8Array::<i32>::from_slice([
|
||||
"zhangsan", "lisi", "wangwu", "Tony", "Mike",
|
||||
]));
|
||||
let age_array = Arc::new(PrimitiveArray::from_slice([25u32, 28, 27, 35, 25]));
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::UInt32, false),
|
||||
Field::new("name", DataType::Utf8, false),
|
||||
Field::new("age", DataType::UInt32, false),
|
||||
]));
|
||||
let record_batch =
|
||||
RecordBatch::try_new(schema, vec![id_array, name_array, age_array]).unwrap();
|
||||
let data: Vec<RecordBatch> = vec![record_batch];
|
||||
let projection = Some(vec![0, 1, 2]);
|
||||
let stream = MemoryStream::try_new(data, self.schema(), projection).unwrap();
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Statistics {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::v1::codec::PhysicalPlanNode;
|
||||
use datafusion::physical_plan::expressions::Column;
|
||||
use datafusion::physical_plan::projection::ProjectionExec;
|
||||
|
||||
use crate::physical::plan::{DefaultAsPlanImpl, MockExecution};
|
||||
use crate::physical::{AsExecutionPlan, ExecutionPlanRef};
|
||||
|
||||
#[test]
|
||||
fn test_convert_df_projection_with_bytes() {
|
||||
let projection_exec = mock_df_projection();
|
||||
|
||||
let bytes = DefaultAsPlanImpl::try_from_physical_plan(projection_exec).unwrap();
|
||||
let exec = bytes.try_into_physical_plan().unwrap();
|
||||
|
||||
verify_df_projection(exec);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_df_with_grpc_projection() {
|
||||
let projection_exec = mock_df_projection();
|
||||
|
||||
let projection_node = PhysicalPlanNode::try_from_physical_plan(projection_exec).unwrap();
|
||||
let exec = projection_node.try_into_physical_plan().unwrap();
|
||||
|
||||
verify_df_projection(exec);
|
||||
}
|
||||
|
||||
fn mock_df_projection() -> Arc<ProjectionExec> {
|
||||
let mock_input = Arc::new(MockExecution {
|
||||
name: "mock_input".to_string(),
|
||||
});
|
||||
let column1 = Arc::new(Column::new("id", 0));
|
||||
let column2 = Arc::new(Column::new("name", 1));
|
||||
Arc::new(
|
||||
ProjectionExec::try_new(
|
||||
vec![(column1, "id".to_string()), (column2, "name".to_string())],
|
||||
mock_input,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
fn verify_df_projection(exec: ExecutionPlanRef) {
|
||||
let projection_exec = exec.as_any().downcast_ref::<ProjectionExec>().unwrap();
|
||||
let mock_input = projection_exec
|
||||
.input()
|
||||
.as_any()
|
||||
.downcast_ref::<MockExecution>()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!("mock_input", mock_input.name);
|
||||
assert_eq!(2, projection_exec.expr().len());
|
||||
assert_eq!("id", projection_exec.expr()[0].1);
|
||||
assert_eq!("name", projection_exec.expr()[1].1);
|
||||
}
|
||||
}
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::result::{build_err_result, ObjectResultBuilder};
|
||||
use api::v1::codec::SelectResult;
|
||||
@@ -23,10 +21,15 @@ use common_base::BitVec;
|
||||
use common_error::prelude::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::{util, RecordBatches, SendableRecordBatchStream};
|
||||
use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray};
|
||||
use datatypes::arrow_array::{BinaryArray, StringArray};
|
||||
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::types::{TimestampType, WrapperType};
|
||||
use datatypes::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, Float32Vector, Float64Vector,
|
||||
Int16Vector, Int32Vector, Int64Vector, Int8Vector, StringVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
|
||||
UInt32Vector, UInt64Vector, UInt8Vector, VectorRef,
|
||||
};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{self, ConversionSnafu, Result};
|
||||
@@ -46,14 +49,11 @@ pub async fn to_object_result(output: std::result::Result<Output, impl ErrorExt>
|
||||
Err(e) => build_err_result(&e),
|
||||
}
|
||||
}
|
||||
|
||||
async fn collect(stream: SendableRecordBatchStream) -> Result<ObjectResult> {
|
||||
let schema = stream.schema();
|
||||
|
||||
let recordbatches = util::collect(stream)
|
||||
let recordbatches = RecordBatches::try_collect(stream)
|
||||
.await
|
||||
.and_then(|batches| RecordBatches::try_new(schema, batches))
|
||||
.context(error::CollectRecordBatchesSnafu)?;
|
||||
|
||||
let object_result = build_result(recordbatches)?;
|
||||
Ok(object_result)
|
||||
}
|
||||
@@ -82,10 +82,7 @@ fn try_convert(record_batches: RecordBatches) -> Result<SelectResult> {
|
||||
let schema = record_batches.schema();
|
||||
let record_batches = record_batches.take();
|
||||
|
||||
let row_count: usize = record_batches
|
||||
.iter()
|
||||
.map(|r| r.df_recordbatch.num_rows())
|
||||
.sum();
|
||||
let row_count: usize = record_batches.iter().map(|r| r.num_rows()).sum();
|
||||
|
||||
let schemas = schema.column_schemas();
|
||||
let mut columns = Vec::with_capacity(schemas.len());
|
||||
@@ -93,9 +90,9 @@ fn try_convert(record_batches: RecordBatches) -> Result<SelectResult> {
|
||||
for (idx, column_schema) in schemas.iter().enumerate() {
|
||||
let column_name = column_schema.name.clone();
|
||||
|
||||
let arrays: Vec<Arc<dyn Array>> = record_batches
|
||||
let arrays: Vec<_> = record_batches
|
||||
.iter()
|
||||
.map(|r| r.df_recordbatch.columns()[idx].clone())
|
||||
.map(|r| r.column(idx).clone())
|
||||
.collect();
|
||||
|
||||
let column = Column {
|
||||
@@ -116,7 +113,7 @@ fn try_convert(record_batches: RecordBatches) -> Result<SelectResult> {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
|
||||
pub fn null_mask(arrays: &[VectorRef], row_count: usize) -> Vec<u8> {
|
||||
let null_count: usize = arrays.iter().map(|a| a.null_count()).sum();
|
||||
|
||||
if null_count == 0 {
|
||||
@@ -126,10 +123,12 @@ pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
|
||||
let mut null_mask = BitVec::with_capacity(row_count);
|
||||
for array in arrays {
|
||||
let validity = array.validity();
|
||||
if let Some(v) = validity {
|
||||
v.iter().for_each(|x| null_mask.push(!x));
|
||||
} else {
|
||||
if validity.is_all_valid() {
|
||||
null_mask.extend_from_bitslice(&BitVec::repeat(false, array.len()));
|
||||
} else {
|
||||
for i in 0..array.len() {
|
||||
null_mask.push(!validity.is_set(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
null_mask.into_vec()
|
||||
@@ -137,7 +136,9 @@ pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
|
||||
|
||||
macro_rules! convert_arrow_array_to_grpc_vals {
|
||||
($data_type: expr, $arrays: ident, $(($Type: pat, $CastType: ty, $field: ident, $MapFunction: expr)), +) => {{
|
||||
use datatypes::arrow::datatypes::{DataType, TimeUnit};
|
||||
use datatypes::data_type::{ConcreteDataType};
|
||||
use datatypes::prelude::ScalarVector;
|
||||
|
||||
match $data_type {
|
||||
$(
|
||||
$Type => {
|
||||
@@ -147,52 +148,114 @@ macro_rules! convert_arrow_array_to_grpc_vals {
|
||||
from: format!("{:?}", $data_type),
|
||||
})?;
|
||||
vals.$field.extend(array
|
||||
.iter()
|
||||
.iter_data()
|
||||
.filter_map(|i| i.map($MapFunction))
|
||||
.collect::<Vec<_>>());
|
||||
}
|
||||
return Ok(vals);
|
||||
},
|
||||
)+
|
||||
_ => unimplemented!(),
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
|
||||
pub fn values(arrays: &[VectorRef]) -> Result<Values> {
|
||||
if arrays.is_empty() {
|
||||
return Ok(Values::default());
|
||||
}
|
||||
let data_type = arrays[0].data_type();
|
||||
|
||||
convert_arrow_array_to_grpc_vals!(
|
||||
data_type, arrays,
|
||||
|
||||
(DataType::Boolean, BooleanArray, bool_values, |x| {x}),
|
||||
|
||||
(DataType::Int8, PrimitiveArray<i8>, i8_values, |x| {*x as i32}),
|
||||
(DataType::Int16, PrimitiveArray<i16>, i16_values, |x| {*x as i32}),
|
||||
(DataType::Int32, PrimitiveArray<i32>, i32_values, |x| {*x}),
|
||||
(DataType::Int64, PrimitiveArray<i64>, i64_values, |x| {*x}),
|
||||
|
||||
(DataType::UInt8, PrimitiveArray<u8>, u8_values, |x| {*x as u32}),
|
||||
(DataType::UInt16, PrimitiveArray<u16>, u16_values, |x| {*x as u32}),
|
||||
(DataType::UInt32, PrimitiveArray<u32>, u32_values, |x| {*x}),
|
||||
(DataType::UInt64, PrimitiveArray<u64>, u64_values, |x| {*x}),
|
||||
|
||||
(DataType::Float32, PrimitiveArray<f32>, f32_values, |x| {*x}),
|
||||
(DataType::Float64, PrimitiveArray<f64>, f64_values, |x| {*x}),
|
||||
|
||||
(DataType::Binary, BinaryArray, binary_values, |x| {x.into()}),
|
||||
(DataType::LargeBinary, BinaryArray, binary_values, |x| {x.into()}),
|
||||
|
||||
(DataType::Utf8, StringArray, string_values, |x| {x.into()}),
|
||||
(DataType::LargeUtf8, StringArray, string_values, |x| {x.into()}),
|
||||
|
||||
(DataType::Date32, PrimitiveArray<i32>, date_values, |x| {*x as i32}),
|
||||
(DataType::Date64, PrimitiveArray<i64>, datetime_values,|x| {*x as i64}),
|
||||
|
||||
(DataType::Timestamp(TimeUnit::Millisecond, _), PrimitiveArray<i64>, ts_millis_values, |x| {*x})
|
||||
data_type,
|
||||
arrays,
|
||||
(
|
||||
ConcreteDataType::Boolean(_),
|
||||
BooleanVector,
|
||||
bool_values,
|
||||
|x| { x }
|
||||
),
|
||||
(ConcreteDataType::Int8(_), Int8Vector, i8_values, |x| {
|
||||
i32::from(x)
|
||||
}),
|
||||
(ConcreteDataType::Int16(_), Int16Vector, i16_values, |x| {
|
||||
i32::from(x)
|
||||
}),
|
||||
(ConcreteDataType::Int32(_), Int32Vector, i32_values, |x| {
|
||||
x
|
||||
}),
|
||||
(ConcreteDataType::Int64(_), Int64Vector, i64_values, |x| {
|
||||
x
|
||||
}),
|
||||
(ConcreteDataType::UInt8(_), UInt8Vector, u8_values, |x| {
|
||||
u32::from(x)
|
||||
}),
|
||||
(ConcreteDataType::UInt16(_), UInt16Vector, u16_values, |x| {
|
||||
u32::from(x)
|
||||
}),
|
||||
(ConcreteDataType::UInt32(_), UInt32Vector, u32_values, |x| {
|
||||
x
|
||||
}),
|
||||
(ConcreteDataType::UInt64(_), UInt64Vector, u64_values, |x| {
|
||||
x
|
||||
}),
|
||||
(
|
||||
ConcreteDataType::Float32(_),
|
||||
Float32Vector,
|
||||
f32_values,
|
||||
|x| { x }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Float64(_),
|
||||
Float64Vector,
|
||||
f64_values,
|
||||
|x| { x }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Binary(_),
|
||||
BinaryVector,
|
||||
binary_values,
|
||||
|x| { x.into() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::String(_),
|
||||
StringVector,
|
||||
string_values,
|
||||
|x| { x.into() }
|
||||
),
|
||||
(ConcreteDataType::Date(_), DateVector, date_values, |x| {
|
||||
x.val()
|
||||
}),
|
||||
(
|
||||
ConcreteDataType::DateTime(_),
|
||||
DateTimeVector,
|
||||
datetime_values,
|
||||
|x| { x.val() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(_)),
|
||||
TimestampSecondVector,
|
||||
ts_second_values,
|
||||
|x| { x.into_native() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Timestamp(TimestampType::Millisecond(_)),
|
||||
TimestampMillisecondVector,
|
||||
ts_millisecond_values,
|
||||
|x| { x.into_native() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Timestamp(TimestampType::Microsecond(_)),
|
||||
TimestampMicrosecondVector,
|
||||
ts_microsecond_values,
|
||||
|x| { x.into_native() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)),
|
||||
TimestampNanosecondVector,
|
||||
ts_nanosecond_values,
|
||||
|x| { x.into_native() }
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -201,14 +264,10 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||
use datatypes::arrow_array::StringArray;
|
||||
use datatypes::schema::Schema;
|
||||
use datatypes::vectors::{UInt32Vector, VectorRef};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
|
||||
use crate::select::{null_mask, try_convert, values};
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_convert_record_batches_to_select_result() {
|
||||
@@ -234,9 +293,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_convert_arrow_arrays_i32() {
|
||||
let array: PrimitiveArray<i32> =
|
||||
PrimitiveArray::from(vec![Some(1), Some(2), None, Some(3)]);
|
||||
let array: Arc<dyn Array> = Arc::new(array);
|
||||
let array = Int32Vector::from(vec![Some(1), Some(2), None, Some(3)]);
|
||||
let array: VectorRef = Arc::new(array);
|
||||
|
||||
let values = values(&[array]).unwrap();
|
||||
|
||||
@@ -245,14 +303,14 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_convert_arrow_arrays_string() {
|
||||
let array = StringArray::from(vec![
|
||||
let array = StringVector::from(vec![
|
||||
Some("1".to_string()),
|
||||
Some("2".to_string()),
|
||||
None,
|
||||
Some("3".to_string()),
|
||||
None,
|
||||
]);
|
||||
let array: Arc<dyn Array> = Arc::new(array);
|
||||
let array: VectorRef = Arc::new(array);
|
||||
|
||||
let values = values(&[array]).unwrap();
|
||||
|
||||
@@ -261,8 +319,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_convert_arrow_arrays_bool() {
|
||||
let array = BooleanArray::from(vec![Some(true), Some(false), None, Some(false), None]);
|
||||
let array: Arc<dyn Array> = Arc::new(array);
|
||||
let array = BooleanVector::from(vec![Some(true), Some(false), None, Some(false), None]);
|
||||
let array: VectorRef = Arc::new(array);
|
||||
|
||||
let values = values(&[array]).unwrap();
|
||||
|
||||
@@ -271,43 +329,42 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_convert_arrow_arrays_empty() {
|
||||
let array = BooleanArray::from(vec![None, None, None, None, None]);
|
||||
let array: Arc<dyn Array> = Arc::new(array);
|
||||
let array = BooleanVector::from(vec![None, None, None, None, None]);
|
||||
let array: VectorRef = Arc::new(array);
|
||||
|
||||
let values = values(&[array]).unwrap();
|
||||
|
||||
assert_eq!(Vec::<bool>::default(), values.bool_values);
|
||||
assert!(values.bool_values.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_mask() {
|
||||
let a1: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![None, Some(2), None]));
|
||||
let a2: Arc<dyn Array> =
|
||||
Arc::new(PrimitiveArray::from(vec![Some(1), Some(2), None, Some(4)]));
|
||||
let mask = null_mask(&vec![a1, a2], 3 + 4);
|
||||
let a1: VectorRef = Arc::new(Int32Vector::from(vec![None, Some(2), None]));
|
||||
let a2: VectorRef = Arc::new(Int32Vector::from(vec![Some(1), Some(2), None, Some(4)]));
|
||||
let mask = null_mask(&[a1, a2], 3 + 4);
|
||||
assert_eq!(vec![0b0010_0101], mask);
|
||||
|
||||
let empty: Arc<dyn Array> = Arc::new(PrimitiveArray::<i32>::from(vec![None, None, None]));
|
||||
let mask = null_mask(&vec![empty.clone(), empty.clone(), empty], 9);
|
||||
let empty: VectorRef = Arc::new(Int32Vector::from(vec![None, None, None]));
|
||||
let mask = null_mask(&[empty.clone(), empty.clone(), empty], 9);
|
||||
assert_eq!(vec![0b1111_1111, 0b0000_0001], mask);
|
||||
|
||||
let a1: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(1), Some(2), Some(3)]));
|
||||
let a2: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(4), Some(5), Some(6)]));
|
||||
let mask = null_mask(&vec![a1, a2], 3 + 3);
|
||||
let a1: VectorRef = Arc::new(Int32Vector::from(vec![Some(1), Some(2), Some(3)]));
|
||||
let a2: VectorRef = Arc::new(Int32Vector::from(vec![Some(4), Some(5), Some(6)]));
|
||||
let mask = null_mask(&[a1, a2], 3 + 3);
|
||||
assert_eq!(Vec::<u8>::default(), mask);
|
||||
|
||||
let a1: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(1), Some(2), Some(3)]));
|
||||
let a2: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(4), Some(5), None]));
|
||||
let mask = null_mask(&vec![a1, a2], 3 + 3);
|
||||
let a1: VectorRef = Arc::new(Int32Vector::from(vec![Some(1), Some(2), Some(3)]));
|
||||
let a2: VectorRef = Arc::new(Int32Vector::from(vec![Some(4), Some(5), None]));
|
||||
let mask = null_mask(&[a1, a2], 3 + 3);
|
||||
assert_eq!(vec![0b0010_0000], mask);
|
||||
}
|
||||
|
||||
fn mock_record_batch() -> RecordBatch {
|
||||
let arrow_schema = Arc::new(ArrowSchema::new(vec![
|
||||
Field::new("c1", DataType::UInt32, false),
|
||||
Field::new("c2", DataType::UInt32, false),
|
||||
]));
|
||||
let schema = Arc::new(Schema::try_from(arrow_schema).unwrap());
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("c1", ConcreteDataType::uint32_datatype(), true),
|
||||
ColumnSchema::new("c2", ConcreteDataType::uint32_datatype(), true),
|
||||
];
|
||||
let schema = Arc::new(Schema::try_new(column_schemas).unwrap());
|
||||
|
||||
let v1 = Arc::new(UInt32Vector::from(vec![Some(1), Some(2), None]));
|
||||
let v2 = Arc::new(UInt32Vector::from(vec![Some(1), None, None]));
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use api::v1::codec::InsertBatch;
|
||||
use api::v1::column::{SemanticType, Values};
|
||||
use api::v1::{Column, ColumnDataType};
|
||||
use common_base::BitVec;
|
||||
@@ -24,12 +23,14 @@ use crate::error::{Result, TypeMismatchSnafu};
|
||||
|
||||
type ColumnName = String;
|
||||
|
||||
type RowCount = u32;
|
||||
|
||||
// TODO(fys): will remove in the future.
|
||||
#[derive(Default)]
|
||||
pub struct LinesWriter {
|
||||
column_name_index: HashMap<ColumnName, usize>,
|
||||
null_masks: Vec<BitVec>,
|
||||
batch: InsertBatch,
|
||||
batch: (Vec<Column>, RowCount),
|
||||
lines: usize,
|
||||
}
|
||||
|
||||
@@ -44,11 +45,11 @@ impl LinesWriter {
|
||||
pub fn write_ts(&mut self, column_name: &str, value: (i64, Precision)) -> Result<()> {
|
||||
let (idx, column) = self.mut_column(
|
||||
column_name,
|
||||
ColumnDataType::Timestamp,
|
||||
ColumnDataType::TimestampMillisecond,
|
||||
SemanticType::Timestamp,
|
||||
);
|
||||
ensure!(
|
||||
column.datatype == ColumnDataType::Timestamp as i32,
|
||||
column.datatype == ColumnDataType::TimestampMillisecond as i32,
|
||||
TypeMismatchSnafu {
|
||||
column_name,
|
||||
expected: "timestamp",
|
||||
@@ -57,7 +58,9 @@ impl LinesWriter {
|
||||
);
|
||||
// It is safe to use unwrap here, because values has been initialized in mut_column()
|
||||
let values = column.values.as_mut().unwrap();
|
||||
values.ts_millis_values.push(to_ms_ts(value.1, value.0));
|
||||
values
|
||||
.ts_millisecond_values
|
||||
.push(to_ms_ts(value.1, value.0));
|
||||
self.null_masks[idx].push(false);
|
||||
Ok(())
|
||||
}
|
||||
@@ -171,20 +174,20 @@ impl LinesWriter {
|
||||
|
||||
pub fn commit(&mut self) {
|
||||
let batch = &mut self.batch;
|
||||
batch.row_count += 1;
|
||||
batch.1 += 1;
|
||||
|
||||
for i in 0..batch.columns.len() {
|
||||
for i in 0..batch.0.len() {
|
||||
let null_mask = &mut self.null_masks[i];
|
||||
if batch.row_count as usize > null_mask.len() {
|
||||
if batch.1 as usize > null_mask.len() {
|
||||
null_mask.push(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finish(mut self) -> InsertBatch {
|
||||
pub fn finish(mut self) -> (Vec<Column>, RowCount) {
|
||||
let null_masks = self.null_masks;
|
||||
for (i, null_mask) in null_masks.into_iter().enumerate() {
|
||||
let columns = &mut self.batch.columns;
|
||||
let columns = &mut self.batch.0;
|
||||
columns[i].null_mask = null_mask.into_vec();
|
||||
}
|
||||
self.batch
|
||||
@@ -204,9 +207,9 @@ impl LinesWriter {
|
||||
let batch = &mut self.batch;
|
||||
let to_insert = self.lines;
|
||||
let mut null_mask = BitVec::with_capacity(to_insert);
|
||||
null_mask.extend(BitVec::repeat(true, batch.row_count as usize));
|
||||
null_mask.extend(BitVec::repeat(true, batch.1 as usize));
|
||||
self.null_masks.push(null_mask);
|
||||
batch.columns.push(Column {
|
||||
batch.0.push(Column {
|
||||
column_name: column_name.to_string(),
|
||||
semantic_type: semantic_type.into(),
|
||||
values: Some(Values::with_capacity(datatype, to_insert)),
|
||||
@@ -217,29 +220,29 @@ impl LinesWriter {
|
||||
new_idx
|
||||
}
|
||||
};
|
||||
(column_idx, &mut self.batch.columns[column_idx])
|
||||
(column_idx, &mut self.batch.0[column_idx])
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ms_ts(p: Precision, ts: i64) -> i64 {
|
||||
match p {
|
||||
Precision::NANOSECOND => ts / 1_000_000,
|
||||
Precision::MICROSECOND => ts / 1000,
|
||||
Precision::MILLISECOND => ts,
|
||||
Precision::SECOND => ts * 1000,
|
||||
Precision::MINUTE => ts * 1000 * 60,
|
||||
Precision::HOUR => ts * 1000 * 60 * 60,
|
||||
Precision::Nanosecond => ts / 1_000_000,
|
||||
Precision::Microsecond => ts / 1000,
|
||||
Precision::Millisecond => ts,
|
||||
Precision::Second => ts * 1000,
|
||||
Precision::Minute => ts * 1000 * 60,
|
||||
Precision::Hour => ts * 1000 * 60 * 60,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Precision {
|
||||
NANOSECOND,
|
||||
MICROSECOND,
|
||||
MILLISECOND,
|
||||
SECOND,
|
||||
MINUTE,
|
||||
HOUR,
|
||||
Nanosecond,
|
||||
Microsecond,
|
||||
Millisecond,
|
||||
Second,
|
||||
Minute,
|
||||
Hour,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -260,13 +263,13 @@ mod tests {
|
||||
writer.write_f64("memory", 0.4).unwrap();
|
||||
writer.write_string("name", "name1").unwrap();
|
||||
writer
|
||||
.write_ts("ts", (101011000, Precision::MILLISECOND))
|
||||
.write_ts("ts", (101011000, Precision::Millisecond))
|
||||
.unwrap();
|
||||
writer.commit();
|
||||
|
||||
writer.write_tag("host", "host2").unwrap();
|
||||
writer
|
||||
.write_ts("ts", (102011001, Precision::MILLISECOND))
|
||||
.write_ts("ts", (102011001, Precision::Millisecond))
|
||||
.unwrap();
|
||||
writer.write_bool("enable_reboot", true).unwrap();
|
||||
writer.write_u64("year_of_service", 2).unwrap();
|
||||
@@ -277,14 +280,14 @@ mod tests {
|
||||
writer.write_f64("cpu", 0.4).unwrap();
|
||||
writer.write_u64("cpu_core_num", 16).unwrap();
|
||||
writer
|
||||
.write_ts("ts", (103011002, Precision::MILLISECOND))
|
||||
.write_ts("ts", (103011002, Precision::Millisecond))
|
||||
.unwrap();
|
||||
writer.commit();
|
||||
|
||||
let insert_batch = writer.finish();
|
||||
assert_eq!(3, insert_batch.row_count);
|
||||
assert_eq!(3, insert_batch.1);
|
||||
|
||||
let columns = insert_batch.columns;
|
||||
let columns = insert_batch.0;
|
||||
assert_eq!(9, columns.len());
|
||||
|
||||
let column = &columns[0];
|
||||
@@ -320,11 +323,11 @@ mod tests {
|
||||
|
||||
let column = &columns[4];
|
||||
assert_eq!("ts", column.column_name);
|
||||
assert_eq!(ColumnDataType::Timestamp as i32, column.datatype);
|
||||
assert_eq!(ColumnDataType::TimestampMillisecond as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Timestamp as i32, column.semantic_type);
|
||||
assert_eq!(
|
||||
vec![101011000, 102011001, 103011002],
|
||||
column.values.as_ref().unwrap().ts_millis_values
|
||||
column.values.as_ref().unwrap().ts_millisecond_values
|
||||
);
|
||||
verify_null_mask(&column.null_mask, vec![false, false, false]);
|
||||
|
||||
@@ -366,16 +369,16 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_to_ms() {
|
||||
assert_eq!(100, to_ms_ts(Precision::NANOSECOND, 100110000));
|
||||
assert_eq!(100110, to_ms_ts(Precision::MICROSECOND, 100110000));
|
||||
assert_eq!(100110000, to_ms_ts(Precision::MILLISECOND, 100110000));
|
||||
assert_eq!(100, to_ms_ts(Precision::Nanosecond, 100110000));
|
||||
assert_eq!(100110, to_ms_ts(Precision::Microsecond, 100110000));
|
||||
assert_eq!(100110000, to_ms_ts(Precision::Millisecond, 100110000));
|
||||
assert_eq!(
|
||||
100110000 * 1000 * 60,
|
||||
to_ms_ts(Precision::MINUTE, 100110000)
|
||||
to_ms_ts(Precision::Minute, 100110000)
|
||||
);
|
||||
assert_eq!(
|
||||
100110000 * 1000 * 60 * 60,
|
||||
to_ms_ts(Precision::HOUR, 100110000)
|
||||
to_ms_ts(Precision::Hour, 100110000)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,11 +9,9 @@ async-trait = "0.1"
|
||||
common-error = { path = "../error" }
|
||||
common-recordbatch = { path = "../recordbatch" }
|
||||
common-time = { path = "../time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datafusion = "14.0.0"
|
||||
datafusion-common = "14.0.0"
|
||||
datafusion-expr = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
statrs = "0.15"
|
||||
|
||||
@@ -14,18 +14,18 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use arrow::error::ArrowError;
|
||||
use common_error::prelude::*;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDatatype;
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use statrs::StatsError;
|
||||
|
||||
common_error::define_opaque_error!(Error);
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum InnerError {
|
||||
pub enum Error {
|
||||
#[snafu(display("Fail to execute function, source: {}", source))]
|
||||
ExecuteFunction {
|
||||
source: DataFusionError,
|
||||
@@ -51,6 +51,12 @@ pub enum InnerError {
|
||||
source: DataTypeError,
|
||||
},
|
||||
|
||||
#[snafu(display("Fail to cast arrow array into vector: {}", source))]
|
||||
FromArrowArray {
|
||||
#[snafu(backtrace)]
|
||||
source: DataTypeError,
|
||||
},
|
||||
|
||||
#[snafu(display("Fail to cast arrow array into vector: {:?}, {}", data_type, source))]
|
||||
IntoVector {
|
||||
#[snafu(backtrace)]
|
||||
@@ -70,8 +76,8 @@ pub enum InnerError {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid inputs: {}", err_msg))]
|
||||
InvalidInputs {
|
||||
#[snafu(display("Invalid input type: {}", err_msg))]
|
||||
InvalidInputType {
|
||||
#[snafu(backtrace)]
|
||||
source: DataTypeError,
|
||||
err_msg: String,
|
||||
@@ -120,34 +126,74 @@ pub enum InnerError {
|
||||
#[snafu(backtrace)]
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to cast array to {:?}, source: {}", typ, source))]
|
||||
TypeCast {
|
||||
source: ArrowError,
|
||||
typ: arrow::datatypes::DataType,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to perform compute operation on arrow arrays, source: {}",
|
||||
source
|
||||
))]
|
||||
ArrowCompute {
|
||||
source: ArrowError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Query engine fail to cast value: {}", source))]
|
||||
ToScalarValue {
|
||||
#[snafu(backtrace)]
|
||||
source: DataTypeError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get scalar vector, {}", source))]
|
||||
GetScalarVector {
|
||||
#[snafu(backtrace)]
|
||||
source: DataTypeError,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid function args: {}", err_msg))]
|
||||
InvalidFuncArgs {
|
||||
err_msg: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl ErrorExt for InnerError {
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
InnerError::ExecuteFunction { .. }
|
||||
| InnerError::GenerateFunction { .. }
|
||||
| InnerError::CreateAccumulator { .. }
|
||||
| InnerError::DowncastVector { .. }
|
||||
| InnerError::InvalidInputState { .. }
|
||||
| InnerError::InvalidInputCol { .. }
|
||||
| InnerError::BadAccumulatorImpl { .. } => StatusCode::EngineExecuteQuery,
|
||||
Error::ExecuteFunction { .. }
|
||||
| Error::GenerateFunction { .. }
|
||||
| Error::CreateAccumulator { .. }
|
||||
| Error::DowncastVector { .. }
|
||||
| Error::InvalidInputState { .. }
|
||||
| Error::InvalidInputCol { .. }
|
||||
| Error::BadAccumulatorImpl { .. }
|
||||
| Error::ToScalarValue { .. }
|
||||
| Error::GetScalarVector { .. }
|
||||
| Error::ArrowCompute { .. } => StatusCode::EngineExecuteQuery,
|
||||
|
||||
InnerError::InvalidInputs { source, .. }
|
||||
| InnerError::IntoVector { source, .. }
|
||||
| InnerError::FromScalarValue { source }
|
||||
| InnerError::ConvertArrowSchema { source } => source.status_code(),
|
||||
Error::InvalidInputType { source, .. }
|
||||
| Error::IntoVector { source, .. }
|
||||
| Error::FromScalarValue { source }
|
||||
| Error::ConvertArrowSchema { source }
|
||||
| Error::FromArrowArray { source } => source.status_code(),
|
||||
|
||||
InnerError::ExecuteRepeatedly { .. }
|
||||
| InnerError::GeneralDataFusion { .. }
|
||||
| InnerError::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
|
||||
Error::ExecuteRepeatedly { .. }
|
||||
| Error::GeneralDataFusion { .. }
|
||||
| Error::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
|
||||
|
||||
InnerError::UnsupportedInputDataType { .. } => StatusCode::InvalidArguments,
|
||||
Error::UnsupportedInputDataType { .. }
|
||||
| Error::TypeCast { .. }
|
||||
| Error::InvalidFuncArgs { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
InnerError::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
|
||||
InnerError::ExecutePhysicalPlan { source } => source.status_code(),
|
||||
Error::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
|
||||
Error::ExecutePhysicalPlan { source } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,12 +206,6 @@ impl ErrorExt for InnerError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<InnerError> for Error {
|
||||
fn from(e: InnerError) -> Error {
|
||||
Error::new(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Error> for DataFusionError {
|
||||
fn from(e: Error) -> DataFusionError {
|
||||
DataFusionError::External(Box::new(e))
|
||||
@@ -174,7 +214,7 @@ impl From<Error> for DataFusionError {
|
||||
|
||||
impl From<BoxedError> for Error {
|
||||
fn from(source: BoxedError) -> Self {
|
||||
InnerError::ExecutePhysicalPlan { source }.into()
|
||||
Error::ExecutePhysicalPlan { source }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,60 +230,51 @@ mod tests {
|
||||
}
|
||||
|
||||
fn assert_error(err: &Error, code: StatusCode) {
|
||||
let inner_err = err.as_any().downcast_ref::<InnerError>().unwrap();
|
||||
let inner_err = err.as_any().downcast_ref::<Error>().unwrap();
|
||||
assert_eq!(code, inner_err.status_code());
|
||||
assert!(inner_err.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datafusion_as_source() {
|
||||
let err: Error = throw_df_error()
|
||||
let err = throw_df_error()
|
||||
.context(ExecuteFunctionSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
.unwrap();
|
||||
assert_error(&err, StatusCode::EngineExecuteQuery);
|
||||
|
||||
let err: Error = throw_df_error()
|
||||
.context(GeneralDataFusionSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
.unwrap();
|
||||
assert_error(&err, StatusCode::Unexpected);
|
||||
|
||||
let err: Error = throw_df_error()
|
||||
let err = throw_df_error()
|
||||
.context(DataFusionExecutionPlanSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
.unwrap();
|
||||
assert_error(&err, StatusCode::Unexpected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_execute_repeatedly_error() {
|
||||
let error: Error = None::<i32>
|
||||
.context(ExecuteRepeatedlySnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
assert_eq!(error.inner.status_code(), StatusCode::Unexpected);
|
||||
let error = None::<i32>.context(ExecuteRepeatedlySnafu).err().unwrap();
|
||||
assert_eq!(error.status_code(), StatusCode::Unexpected);
|
||||
assert!(error.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_df_recordbatch_stream_error() {
|
||||
let result: std::result::Result<i32, common_recordbatch::error::Error> =
|
||||
Err(common_recordbatch::error::InnerError::PollStream {
|
||||
source: ArrowError::Overflow,
|
||||
Err(common_recordbatch::error::Error::PollStream {
|
||||
source: ArrowError::DivideByZero,
|
||||
backtrace: Backtrace::generate(),
|
||||
}
|
||||
.into());
|
||||
let error: Error = result
|
||||
});
|
||||
let error = result
|
||||
.context(ConvertDfRecordBatchStreamSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
assert_eq!(error.inner.status_code(), StatusCode::Internal);
|
||||
.unwrap();
|
||||
assert_eq!(error.status_code(), StatusCode::Internal);
|
||||
assert!(error.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
@@ -256,13 +287,12 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_into_vector_error() {
|
||||
let err: Error = raise_datatype_error()
|
||||
let err = raise_datatype_error()
|
||||
.context(IntoVectorSnafu {
|
||||
data_type: ArrowDatatype::Int32,
|
||||
})
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
.unwrap();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
let datatype_err = raise_datatype_error().err().unwrap();
|
||||
assert_eq!(datatype_err.status_code(), err.status_code());
|
||||
|
||||
@@ -22,7 +22,7 @@ use std::sync::Arc;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
|
||||
pub use self::accumulator::{Accumulator, AggregateFunctionCreator, AggregateFunctionCreatorRef};
|
||||
pub use self::expr::Expr;
|
||||
pub use self::expr::{DfExpr, Expr};
|
||||
pub use self::udaf::AggregateFunction;
|
||||
pub use self::udf::ScalarUdf;
|
||||
use crate::function::{ReturnTypeFunction, ScalarFunctionImplementation};
|
||||
@@ -148,9 +148,7 @@ mod tests {
|
||||
|
||||
let args = vec![
|
||||
DfColumnarValue::Scalar(ScalarValue::Boolean(Some(true))),
|
||||
DfColumnarValue::Array(Arc::new(BooleanArray::from_slice(vec![
|
||||
true, false, false, true,
|
||||
]))),
|
||||
DfColumnarValue::Array(Arc::new(BooleanArray::from(vec![true, false, false, true]))),
|
||||
];
|
||||
|
||||
// call the function
|
||||
@@ -17,12 +17,10 @@
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datafusion_common::Result as DfResult;
|
||||
use datafusion_expr::Accumulator as DfAccumulator;
|
||||
use datafusion_expr::{Accumulator as DfAccumulator, AggregateState};
|
||||
use datatypes::arrow::array::ArrayRef;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{Helper as VectorHelper, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -128,356 +126,53 @@ impl DfAccumulatorAdaptor {
|
||||
}
|
||||
|
||||
impl DfAccumulator for DfAccumulatorAdaptor {
|
||||
fn state(&self) -> DfResult<Vec<ScalarValue>> {
|
||||
fn state(&self) -> DfResult<Vec<AggregateState>> {
|
||||
let state_values = self.accumulator.state()?;
|
||||
let state_types = self.creator.state_types()?;
|
||||
if state_values.len() != state_types.len() {
|
||||
return error::BadAccumulatorImplSnafu {
|
||||
err_msg: format!("Accumulator {:?} returned state values size do not match its state types size.", self),
|
||||
}
|
||||
.fail()
|
||||
.map_err(Error::from)?;
|
||||
.fail()?;
|
||||
}
|
||||
Ok(state_values
|
||||
.into_iter()
|
||||
.zip(state_types.iter())
|
||||
.map(|(v, t)| try_into_scalar_value(v, t))
|
||||
.collect::<Result<Vec<_>>>()
|
||||
.map_err(Error::from)?)
|
||||
.map(|(v, t)| {
|
||||
let scalar = v
|
||||
.try_to_scalar_value(t)
|
||||
.context(error::ToScalarValueSnafu)?;
|
||||
Ok(AggregateState::Scalar(scalar))
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?)
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
|
||||
let vectors = VectorHelper::try_into_vectors(values)
|
||||
.context(FromScalarValueSnafu)
|
||||
.map_err(Error::from)?;
|
||||
self.accumulator
|
||||
.update_batch(&vectors)
|
||||
.map_err(|e| e.into())
|
||||
let vectors = VectorHelper::try_into_vectors(values).context(FromScalarValueSnafu)?;
|
||||
self.accumulator.update_batch(&vectors)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
|
||||
let mut vectors = Vec::with_capacity(states.len());
|
||||
for array in states.iter() {
|
||||
vectors.push(
|
||||
VectorHelper::try_into_vector(array)
|
||||
.context(IntoVectorSnafu {
|
||||
data_type: array.data_type().clone(),
|
||||
})
|
||||
.map_err(Error::from)?,
|
||||
VectorHelper::try_into_vector(array).context(IntoVectorSnafu {
|
||||
data_type: array.data_type().clone(),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
self.accumulator.merge_batch(&vectors).map_err(|e| e.into())
|
||||
self.accumulator.merge_batch(&vectors)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> DfResult<ScalarValue> {
|
||||
let value = self.accumulator.evaluate()?;
|
||||
let output_type = self.creator.output_type()?;
|
||||
Ok(try_into_scalar_value(value, &output_type)?)
|
||||
}
|
||||
}
|
||||
|
||||
fn try_into_scalar_value(value: Value, datatype: &ConcreteDataType) -> Result<ScalarValue> {
|
||||
if !matches!(value, Value::Null) && datatype != &value.data_type() {
|
||||
return error::BadAccumulatorImplSnafu {
|
||||
err_msg: format!(
|
||||
"expect value to return datatype {:?}, actual: {:?}",
|
||||
datatype,
|
||||
value.data_type()
|
||||
),
|
||||
}
|
||||
.fail()?;
|
||||
}
|
||||
|
||||
Ok(match value {
|
||||
Value::Boolean(v) => ScalarValue::Boolean(Some(v)),
|
||||
Value::UInt8(v) => ScalarValue::UInt8(Some(v)),
|
||||
Value::UInt16(v) => ScalarValue::UInt16(Some(v)),
|
||||
Value::UInt32(v) => ScalarValue::UInt32(Some(v)),
|
||||
Value::UInt64(v) => ScalarValue::UInt64(Some(v)),
|
||||
Value::Int8(v) => ScalarValue::Int8(Some(v)),
|
||||
Value::Int16(v) => ScalarValue::Int16(Some(v)),
|
||||
Value::Int32(v) => ScalarValue::Int32(Some(v)),
|
||||
Value::Int64(v) => ScalarValue::Int64(Some(v)),
|
||||
Value::Float32(v) => ScalarValue::Float32(Some(v.0)),
|
||||
Value::Float64(v) => ScalarValue::Float64(Some(v.0)),
|
||||
Value::String(v) => ScalarValue::Utf8(Some(v.as_utf8().to_string())),
|
||||
Value::Binary(v) => ScalarValue::LargeBinary(Some(v.to_vec())),
|
||||
Value::Date(v) => ScalarValue::Date32(Some(v.val())),
|
||||
Value::DateTime(v) => ScalarValue::Date64(Some(v.val())),
|
||||
Value::Null => try_convert_null_value(datatype)?,
|
||||
Value::List(list) => try_convert_list_value(list)?,
|
||||
Value::Timestamp(t) => timestamp_to_scalar_value(t.unit(), Some(t.value())),
|
||||
})
|
||||
}
|
||||
|
||||
fn timestamp_to_scalar_value(unit: TimeUnit, val: Option<i64>) -> ScalarValue {
|
||||
match unit {
|
||||
TimeUnit::Second => ScalarValue::TimestampSecond(val, None),
|
||||
TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(val, None),
|
||||
TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(val, None),
|
||||
TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(val, None),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_convert_null_value(datatype: &ConcreteDataType) -> Result<ScalarValue> {
|
||||
Ok(match datatype {
|
||||
ConcreteDataType::Boolean(_) => ScalarValue::Boolean(None),
|
||||
ConcreteDataType::Int8(_) => ScalarValue::Int8(None),
|
||||
ConcreteDataType::Int16(_) => ScalarValue::Int16(None),
|
||||
ConcreteDataType::Int32(_) => ScalarValue::Int32(None),
|
||||
ConcreteDataType::Int64(_) => ScalarValue::Int64(None),
|
||||
ConcreteDataType::UInt8(_) => ScalarValue::UInt8(None),
|
||||
ConcreteDataType::UInt16(_) => ScalarValue::UInt16(None),
|
||||
ConcreteDataType::UInt32(_) => ScalarValue::UInt32(None),
|
||||
ConcreteDataType::UInt64(_) => ScalarValue::UInt64(None),
|
||||
ConcreteDataType::Float32(_) => ScalarValue::Float32(None),
|
||||
ConcreteDataType::Float64(_) => ScalarValue::Float64(None),
|
||||
ConcreteDataType::Binary(_) => ScalarValue::LargeBinary(None),
|
||||
ConcreteDataType::String(_) => ScalarValue::Utf8(None),
|
||||
ConcreteDataType::Timestamp(t) => timestamp_to_scalar_value(t.unit, None),
|
||||
_ => {
|
||||
return error::BadAccumulatorImplSnafu {
|
||||
err_msg: format!(
|
||||
"undefined transition from null value to datatype {:?}",
|
||||
datatype
|
||||
),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn try_convert_list_value(list: ListValue) -> Result<ScalarValue> {
|
||||
let vs = if let Some(items) = list.items() {
|
||||
Some(Box::new(
|
||||
items
|
||||
.iter()
|
||||
.map(|v| try_into_scalar_value(v.clone(), list.datatype()))
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
Ok(ScalarValue::List(
|
||||
vs,
|
||||
Box::new(list.datatype().as_arrow_type()),
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::bytes::{Bytes, StringBytes};
|
||||
use datafusion_common::ScalarValue;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_not_null_value_to_scalar_value() {
|
||||
assert_eq!(
|
||||
ScalarValue::Boolean(Some(true)),
|
||||
try_into_scalar_value(Value::Boolean(true), &ConcreteDataType::boolean_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Boolean(Some(false)),
|
||||
try_into_scalar_value(Value::Boolean(false), &ConcreteDataType::boolean_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt8(Some(u8::MIN + 1)),
|
||||
try_into_scalar_value(
|
||||
Value::UInt8(u8::MIN + 1),
|
||||
&ConcreteDataType::uint8_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt16(Some(u16::MIN + 2)),
|
||||
try_into_scalar_value(
|
||||
Value::UInt16(u16::MIN + 2),
|
||||
&ConcreteDataType::uint16_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt32(Some(u32::MIN + 3)),
|
||||
try_into_scalar_value(
|
||||
Value::UInt32(u32::MIN + 3),
|
||||
&ConcreteDataType::uint32_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt64(Some(u64::MIN + 4)),
|
||||
try_into_scalar_value(
|
||||
Value::UInt64(u64::MIN + 4),
|
||||
&ConcreteDataType::uint64_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int8(Some(i8::MIN + 4)),
|
||||
try_into_scalar_value(Value::Int8(i8::MIN + 4), &ConcreteDataType::int8_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int16(Some(i16::MIN + 5)),
|
||||
try_into_scalar_value(
|
||||
Value::Int16(i16::MIN + 5),
|
||||
&ConcreteDataType::int16_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int32(Some(i32::MIN + 6)),
|
||||
try_into_scalar_value(
|
||||
Value::Int32(i32::MIN + 6),
|
||||
&ConcreteDataType::int32_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int64(Some(i64::MIN + 7)),
|
||||
try_into_scalar_value(
|
||||
Value::Int64(i64::MIN + 7),
|
||||
&ConcreteDataType::int64_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Float32(Some(8.0f32)),
|
||||
try_into_scalar_value(
|
||||
Value::Float32(OrderedFloat(8.0f32)),
|
||||
&ConcreteDataType::float32_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Float64(Some(9.0f64)),
|
||||
try_into_scalar_value(
|
||||
Value::Float64(OrderedFloat(9.0f64)),
|
||||
&ConcreteDataType::float64_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Utf8(Some("hello".to_string())),
|
||||
try_into_scalar_value(
|
||||
Value::String(StringBytes::from("hello")),
|
||||
&ConcreteDataType::string_datatype(),
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::LargeBinary(Some("world".as_bytes().to_vec())),
|
||||
try_into_scalar_value(
|
||||
Value::Binary(Bytes::from("world".as_bytes())),
|
||||
&ConcreteDataType::binary_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_value_to_scalar_value() {
|
||||
assert_eq!(
|
||||
ScalarValue::Boolean(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::boolean_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt8(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint8_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt16(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint16_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt32(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint32_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt64(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint64_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int8(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int8_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int16(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int16_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int32(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int32_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int64(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int64_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Float32(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::float32_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Float64(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::float64_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Utf8(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::string_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::LargeBinary(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::binary_datatype()).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_value_to_scalar_value() {
|
||||
let items = Some(Box::new(vec![Value::Int32(-1), Value::Null]));
|
||||
let list = Value::List(ListValue::new(items, ConcreteDataType::int32_datatype()));
|
||||
let df_list = try_into_scalar_value(
|
||||
list,
|
||||
&ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(matches!(df_list, ScalarValue::List(_, _)));
|
||||
match df_list {
|
||||
ScalarValue::List(vs, datatype) => {
|
||||
assert_eq!(*datatype, DataType::Int32);
|
||||
|
||||
assert!(vs.is_some());
|
||||
let vs = *vs.unwrap();
|
||||
assert_eq!(
|
||||
vs,
|
||||
vec![ScalarValue::Int32(Some(-1)), ScalarValue::Int32(None)]
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_timestamp_to_scalar_value() {
|
||||
assert_eq!(
|
||||
ScalarValue::TimestampSecond(Some(1), None),
|
||||
timestamp_to_scalar_value(TimeUnit::Second, Some(1))
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::TimestampMillisecond(Some(1), None),
|
||||
timestamp_to_scalar_value(TimeUnit::Millisecond, Some(1))
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::TimestampMicrosecond(Some(1), None),
|
||||
timestamp_to_scalar_value(TimeUnit::Microsecond, Some(1))
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::TimestampNanosecond(Some(1), None),
|
||||
timestamp_to_scalar_value(TimeUnit::Nanosecond, Some(1))
|
||||
);
|
||||
let scalar_value = value
|
||||
.try_to_scalar_value(&output_type)
|
||||
.context(error::ToScalarValueSnafu)
|
||||
.map_err(Error::from)?;
|
||||
Ok(scalar_value)
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user