mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-07 05:42:57 +00:00
Compare commits
30 Commits
replace-ar
...
v0.1.0-alp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0ffe640f7d | ||
|
|
0d660e45cf | ||
|
|
a640872cda | ||
|
|
7e3c59fb51 | ||
|
|
7bbc679c76 | ||
|
|
0b3a2cbcda | ||
|
|
53ee85cdad | ||
|
|
bc9a46dbb7 | ||
|
|
a61e96477b | ||
|
|
f8500e54c1 | ||
|
|
e85780b5e4 | ||
|
|
11bdb33d37 | ||
|
|
1daba75e7b | ||
|
|
dc52a51576 | ||
|
|
26af9e6214 | ||
|
|
e07791c5e8 | ||
|
|
b6d29afcd1 | ||
|
|
ea9af42091 | ||
|
|
d0ebcc3b5a | ||
|
|
77182f5024 | ||
|
|
539ead5460 | ||
|
|
bc0e4e2cb0 | ||
|
|
7d29670c86 | ||
|
|
afd88dd53a | ||
|
|
efd85df6be | ||
|
|
ea1896493b | ||
|
|
66bca11401 | ||
|
|
7c16a4a17b | ||
|
|
28bd7404ad | ||
|
|
0653301754 |
8
.github/workflows/coverage.yml
vendored
8
.github/workflows/coverage.yml
vendored
@@ -24,7 +24,7 @@ on:
|
|||||||
name: Code coverage
|
name: Code coverage
|
||||||
|
|
||||||
env:
|
env:
|
||||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
RUST_TOOLCHAIN: nightly-2022-12-20
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
coverage:
|
coverage:
|
||||||
@@ -34,6 +34,11 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: arduino/setup-protoc@v1
|
- uses: arduino/setup-protoc@v1
|
||||||
|
with:
|
||||||
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
- uses: KyleMayes/install-llvm-action@v1
|
||||||
|
with:
|
||||||
|
version: "14.0"
|
||||||
- name: Install toolchain
|
- name: Install toolchain
|
||||||
uses: dtolnay/rust-toolchain@master
|
uses: dtolnay/rust-toolchain@master
|
||||||
with:
|
with:
|
||||||
@@ -48,6 +53,7 @@ jobs:
|
|||||||
- name: Collect coverage data
|
- name: Collect coverage data
|
||||||
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info
|
run: cargo llvm-cov nextest --workspace --lcov --output-path lcov.info
|
||||||
env:
|
env:
|
||||||
|
CARGO_BUILD_RUSTFLAGS: "-C link-arg=-fuse-ld=lld"
|
||||||
RUST_BACKTRACE: 1
|
RUST_BACKTRACE: 1
|
||||||
CARGO_INCREMENTAL: 0
|
CARGO_INCREMENTAL: 0
|
||||||
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
|
GT_S3_BUCKET: ${{ secrets.S3_BUCKET }}
|
||||||
|
|||||||
10
.github/workflows/develop.yml
vendored
10
.github/workflows/develop.yml
vendored
@@ -23,7 +23,7 @@ on:
|
|||||||
name: CI
|
name: CI
|
||||||
|
|
||||||
env:
|
env:
|
||||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
RUST_TOOLCHAIN: nightly-2022-12-20
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
typos:
|
typos:
|
||||||
@@ -41,6 +41,8 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: arduino/setup-protoc@v1
|
- uses: arduino/setup-protoc@v1
|
||||||
|
with:
|
||||||
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
- uses: dtolnay/rust-toolchain@master
|
- uses: dtolnay/rust-toolchain@master
|
||||||
with:
|
with:
|
||||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||||
@@ -81,6 +83,8 @@ jobs:
|
|||||||
# path: ./llvm
|
# path: ./llvm
|
||||||
# key: llvm
|
# key: llvm
|
||||||
# - uses: arduino/setup-protoc@v1
|
# - uses: arduino/setup-protoc@v1
|
||||||
|
# with:
|
||||||
|
# repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
# - uses: KyleMayes/install-llvm-action@v1
|
# - uses: KyleMayes/install-llvm-action@v1
|
||||||
# with:
|
# with:
|
||||||
# version: "14.0"
|
# version: "14.0"
|
||||||
@@ -114,6 +118,8 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: arduino/setup-protoc@v1
|
- uses: arduino/setup-protoc@v1
|
||||||
|
with:
|
||||||
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
- uses: dtolnay/rust-toolchain@master
|
- uses: dtolnay/rust-toolchain@master
|
||||||
with:
|
with:
|
||||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||||
@@ -131,6 +137,8 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- uses: arduino/setup-protoc@v1
|
- uses: arduino/setup-protoc@v1
|
||||||
|
with:
|
||||||
|
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
- uses: dtolnay/rust-toolchain@master
|
- uses: dtolnay/rust-toolchain@master
|
||||||
with:
|
with:
|
||||||
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
toolchain: ${{ env.RUST_TOOLCHAIN }}
|
||||||
|
|||||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -10,7 +10,7 @@ on:
|
|||||||
name: Release
|
name: Release
|
||||||
|
|
||||||
env:
|
env:
|
||||||
RUST_TOOLCHAIN: nightly-2022-07-14
|
RUST_TOOLCHAIN: nightly-2022-12-20
|
||||||
|
|
||||||
# FIXME(zyy17): Would be better to use `gh release list -L 1 | cut -f 3` to get the latest release version tag, but for a long time, we will stay at 'v0.1.0-alpha-*'.
|
# FIXME(zyy17): Would be better to use `gh release list -L 1 | cut -f 3` to get the latest release version tag, but for a long time, we will stay at 'v0.1.0-alpha-*'.
|
||||||
SCHEDULED_BUILD_VERSION_PREFIX: v0.1.0-alpha
|
SCHEDULED_BUILD_VERSION_PREFIX: v0.1.0-alpha
|
||||||
|
|||||||
1990
Cargo.lock
generated
1990
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
19
Cargo.toml
19
Cargo.toml
@@ -20,7 +20,6 @@ members = [
|
|||||||
"src/common/time",
|
"src/common/time",
|
||||||
"src/datanode",
|
"src/datanode",
|
||||||
"src/datatypes",
|
"src/datatypes",
|
||||||
"src/datatypes2",
|
|
||||||
"src/frontend",
|
"src/frontend",
|
||||||
"src/log-store",
|
"src/log-store",
|
||||||
"src/meta-client",
|
"src/meta-client",
|
||||||
@@ -40,5 +39,23 @@ members = [
|
|||||||
"tests/runner",
|
"tests/runner",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[workspace.package]
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
license = "Apache-2.0"
|
||||||
|
|
||||||
|
[workspace.dependencies]
|
||||||
|
arrow = "29.0"
|
||||||
|
arrow-schema = { version = "29.0", features = ["serde"] }
|
||||||
|
# TODO(LFC): Use released Datafusion when it officially dpendent on Arrow 29.0
|
||||||
|
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||||
|
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||||
|
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||||
|
datafusion-optimizer = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||||
|
datafusion-physical-expr = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||||
|
datafusion-sql = { git = "https://github.com/apache/arrow-datafusion.git", rev = "4917235a398ae20145c87d20984e6367dc1a0c1e" }
|
||||||
|
parquet = "29.0"
|
||||||
|
sqlparser = "0.28"
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
debug = true
|
debug = true
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arrow = "10"
|
arrow.workspace = true
|
||||||
clap = { version = "4.0", features = ["derive"] }
|
clap = { version = "4.0", features = ["derive"] }
|
||||||
client = { path = "../src/client" }
|
client = { path = "../src/client" }
|
||||||
indicatif = "0.17.1"
|
indicatif = "0.17.1"
|
||||||
itertools = "0.10.5"
|
itertools = "0.10.5"
|
||||||
parquet = { version = "*" }
|
parquet.workspace = true
|
||||||
tokio = { version = "1.21", features = ["full"] }
|
tokio = { version = "1.21", features = ["full"] }
|
||||||
|
|||||||
@@ -15,12 +15,10 @@
|
|||||||
//! Use the taxi trip records from New York City dataset to bench. You can download the dataset from
|
//! Use the taxi trip records from New York City dataset to bench. You can download the dataset from
|
||||||
//! [here](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
|
//! [here](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
|
||||||
|
|
||||||
#![feature(once_cell)]
|
|
||||||
#![allow(clippy::print_stdout)]
|
#![allow(clippy::print_stdout)]
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use arrow::array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray};
|
use arrow::array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray};
|
||||||
@@ -29,12 +27,10 @@ use arrow::record_batch::RecordBatch;
|
|||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use client::admin::Admin;
|
use client::admin::Admin;
|
||||||
use client::api::v1::column::Values;
|
use client::api::v1::column::Values;
|
||||||
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
|
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateTableExpr, InsertExpr, TableId};
|
||||||
use client::{Client, Database, Select};
|
use client::{Client, Database, Select};
|
||||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||||
use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
|
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
||||||
use parquet::file::reader::FileReader;
|
|
||||||
use parquet::file::serialized_reader::SerializedFileReader;
|
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
|
|
||||||
const DATABASE_NAME: &str = "greptime";
|
const DATABASE_NAME: &str = "greptime";
|
||||||
@@ -86,14 +82,18 @@ async fn write_data(
|
|||||||
pb_style: ProgressStyle,
|
pb_style: ProgressStyle,
|
||||||
) -> u128 {
|
) -> u128 {
|
||||||
let file = std::fs::File::open(&path).unwrap();
|
let file = std::fs::File::open(&path).unwrap();
|
||||||
let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
|
let record_batch_reader_builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
|
||||||
let row_num = file_reader.metadata().file_metadata().num_rows();
|
let row_num = record_batch_reader_builder
|
||||||
let record_batch_reader = ParquetFileArrowReader::new(file_reader)
|
.metadata()
|
||||||
.get_record_reader(batch_size)
|
.file_metadata()
|
||||||
|
.num_rows();
|
||||||
|
let record_batch_reader = record_batch_reader_builder
|
||||||
|
.with_batch_size(batch_size)
|
||||||
|
.build()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let progress_bar = mpb.add(ProgressBar::new(row_num as _));
|
let progress_bar = mpb.add(ProgressBar::new(row_num as _));
|
||||||
progress_bar.set_style(pb_style);
|
progress_bar.set_style(pb_style);
|
||||||
progress_bar.set_message(format!("{:?}", path));
|
progress_bar.set_message(format!("{path:?}"));
|
||||||
|
|
||||||
let mut total_rpc_elapsed_ms = 0;
|
let mut total_rpc_elapsed_ms = 0;
|
||||||
|
|
||||||
@@ -114,10 +114,7 @@ async fn write_data(
|
|||||||
progress_bar.inc(row_count as _);
|
progress_bar.inc(row_count as _);
|
||||||
}
|
}
|
||||||
|
|
||||||
progress_bar.finish_with_message(format!(
|
progress_bar.finish_with_message(format!("file {path:?} done in {total_rpc_elapsed_ms}ms",));
|
||||||
"file {:?} done in {}ms",
|
|
||||||
path, total_rpc_elapsed_ms
|
|
||||||
));
|
|
||||||
total_rpc_elapsed_ms
|
total_rpc_elapsed_ms
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -210,133 +207,134 @@ fn build_values(column: &ArrayRef) -> Values {
|
|||||||
| DataType::FixedSizeList(_, _)
|
| DataType::FixedSizeList(_, _)
|
||||||
| DataType::LargeList(_)
|
| DataType::LargeList(_)
|
||||||
| DataType::Struct(_)
|
| DataType::Struct(_)
|
||||||
| DataType::Union(_, _)
|
| DataType::Union(_, _, _)
|
||||||
| DataType::Dictionary(_, _)
|
| DataType::Dictionary(_, _)
|
||||||
| DataType::Decimal(_, _)
|
| DataType::Decimal128(_, _)
|
||||||
|
| DataType::Decimal256(_, _)
|
||||||
| DataType::Map(_, _) => todo!(),
|
| DataType::Map(_, _) => todo!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_table_expr() -> CreateExpr {
|
fn create_table_expr() -> CreateTableExpr {
|
||||||
CreateExpr {
|
CreateTableExpr {
|
||||||
catalog_name: Some(CATALOG_NAME.to_string()),
|
catalog_name: CATALOG_NAME.to_string(),
|
||||||
schema_name: Some(SCHEMA_NAME.to_string()),
|
schema_name: SCHEMA_NAME.to_string(),
|
||||||
table_name: TABLE_NAME.to_string(),
|
table_name: TABLE_NAME.to_string(),
|
||||||
desc: None,
|
desc: "".to_string(),
|
||||||
column_defs: vec![
|
column_defs: vec![
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "VendorID".to_string(),
|
name: "VendorID".to_string(),
|
||||||
datatype: ColumnDataType::Int64 as i32,
|
datatype: ColumnDataType::Int64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "tpep_pickup_datetime".to_string(),
|
name: "tpep_pickup_datetime".to_string(),
|
||||||
datatype: ColumnDataType::Int64 as i32,
|
datatype: ColumnDataType::Int64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "tpep_dropoff_datetime".to_string(),
|
name: "tpep_dropoff_datetime".to_string(),
|
||||||
datatype: ColumnDataType::Int64 as i32,
|
datatype: ColumnDataType::Int64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "passenger_count".to_string(),
|
name: "passenger_count".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "trip_distance".to_string(),
|
name: "trip_distance".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "RatecodeID".to_string(),
|
name: "RatecodeID".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "store_and_fwd_flag".to_string(),
|
name: "store_and_fwd_flag".to_string(),
|
||||||
datatype: ColumnDataType::String as i32,
|
datatype: ColumnDataType::String as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "PULocationID".to_string(),
|
name: "PULocationID".to_string(),
|
||||||
datatype: ColumnDataType::Int64 as i32,
|
datatype: ColumnDataType::Int64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "DOLocationID".to_string(),
|
name: "DOLocationID".to_string(),
|
||||||
datatype: ColumnDataType::Int64 as i32,
|
datatype: ColumnDataType::Int64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "payment_type".to_string(),
|
name: "payment_type".to_string(),
|
||||||
datatype: ColumnDataType::Int64 as i32,
|
datatype: ColumnDataType::Int64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "fare_amount".to_string(),
|
name: "fare_amount".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "extra".to_string(),
|
name: "extra".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "mta_tax".to_string(),
|
name: "mta_tax".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "tip_amount".to_string(),
|
name: "tip_amount".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "tolls_amount".to_string(),
|
name: "tolls_amount".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "improvement_surcharge".to_string(),
|
name: "improvement_surcharge".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "total_amount".to_string(),
|
name: "total_amount".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "congestion_surcharge".to_string(),
|
name: "congestion_surcharge".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "airport_fee".to_string(),
|
name: "airport_fee".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: true,
|
is_nullable: true,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
time_index: "tpep_pickup_datetime".to_string(),
|
time_index: "tpep_pickup_datetime".to_string(),
|
||||||
@@ -344,7 +342,7 @@ fn create_table_expr() -> CreateExpr {
|
|||||||
create_if_not_exists: false,
|
create_if_not_exists: false,
|
||||||
table_options: Default::default(),
|
table_options: Default::default(),
|
||||||
region_ids: vec![0],
|
region_ids: vec![0],
|
||||||
table_id: Some(0),
|
table_id: Some(TableId { id: 0 }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -353,12 +351,12 @@ fn query_set() -> HashMap<String, String> {
|
|||||||
|
|
||||||
ret.insert(
|
ret.insert(
|
||||||
"count_all".to_string(),
|
"count_all".to_string(),
|
||||||
format!("SELECT COUNT(*) FROM {};", TABLE_NAME),
|
format!("SELECT COUNT(*) FROM {TABLE_NAME};"),
|
||||||
);
|
);
|
||||||
|
|
||||||
ret.insert(
|
ret.insert(
|
||||||
"fare_amt_by_passenger".to_string(),
|
"fare_amt_by_passenger".to_string(),
|
||||||
format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {} GROUP BY passenger_count",TABLE_NAME)
|
format!("SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), SUM(fare_amount) FROM {TABLE_NAME} GROUP BY passenger_count")
|
||||||
);
|
);
|
||||||
|
|
||||||
ret
|
ret
|
||||||
@@ -371,7 +369,7 @@ async fn do_write(args: &Args, client: &Client) {
|
|||||||
let mut write_jobs = JoinSet::new();
|
let mut write_jobs = JoinSet::new();
|
||||||
|
|
||||||
let create_table_result = admin.create(create_table_expr()).await;
|
let create_table_result = admin.create(create_table_expr()).await;
|
||||||
println!("Create table result: {:?}", create_table_result);
|
println!("Create table result: {create_table_result:?}");
|
||||||
|
|
||||||
let progress_bar_style = ProgressStyle::with_template(
|
let progress_bar_style = ProgressStyle::with_template(
|
||||||
"[{elapsed_precise}] {bar:60.cyan/blue} {pos:>7}/{len:7} {msg}",
|
"[{elapsed_precise}] {bar:60.cyan/blue} {pos:>7}/{len:7} {msg}",
|
||||||
@@ -404,7 +402,7 @@ async fn do_write(args: &Args, client: &Client) {
|
|||||||
|
|
||||||
async fn do_query(num_iter: usize, db: &Database) {
|
async fn do_query(num_iter: usize, db: &Database) {
|
||||||
for (query_name, query) in query_set() {
|
for (query_name, query) in query_set() {
|
||||||
println!("Running query: {}", query);
|
println!("Running query: {query}");
|
||||||
for i in 0..num_iter {
|
for i in 0..num_iter {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let _res = db.select(Select::Sql(query.clone())).await.unwrap();
|
let _res = db.select(Select::Sql(query.clone())).await.unwrap();
|
||||||
|
|||||||
@@ -7,6 +7,14 @@ mysql_addr = '127.0.0.1:4406'
|
|||||||
mysql_runtime_size = 4
|
mysql_runtime_size = 4
|
||||||
enable_memory_catalog = false
|
enable_memory_catalog = false
|
||||||
|
|
||||||
|
[wal]
|
||||||
|
dir = "/tmp/greptimedb/wal"
|
||||||
|
file_size = 1073741824
|
||||||
|
purge_interval = 600
|
||||||
|
purge_threshold = 53687091200
|
||||||
|
read_batch_size = 128
|
||||||
|
sync_write = false
|
||||||
|
|
||||||
[storage]
|
[storage]
|
||||||
type = 'File'
|
type = 'File'
|
||||||
data_dir = '/tmp/greptimedb/data/'
|
data_dir = '/tmp/greptimedb/data/'
|
||||||
|
|||||||
@@ -1,12 +1,20 @@
|
|||||||
node_id = 0
|
node_id = 0
|
||||||
mode = 'standalone'
|
mode = 'standalone'
|
||||||
wal_dir = '/tmp/greptimedb/wal/'
|
|
||||||
enable_memory_catalog = false
|
enable_memory_catalog = false
|
||||||
|
|
||||||
[http_options]
|
[http_options]
|
||||||
addr = '127.0.0.1:4000'
|
addr = '127.0.0.1:4000'
|
||||||
timeout = "30s"
|
timeout = "30s"
|
||||||
|
|
||||||
|
[wal]
|
||||||
|
dir = "/tmp/greptimedb/wal"
|
||||||
|
file_size = 1073741824
|
||||||
|
purge_interval = 600
|
||||||
|
purge_threshold = 53687091200
|
||||||
|
read_batch_size = 128
|
||||||
|
sync_write = false
|
||||||
|
|
||||||
|
|
||||||
[storage]
|
[storage]
|
||||||
type = 'File'
|
type = 'File'
|
||||||
data_dir = '/tmp/greptimedb/data/'
|
data_dir = '/tmp/greptimedb/data/'
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ RUN cargo build --release
|
|||||||
# TODO(zyy17): Maybe should use the more secure container image.
|
# TODO(zyy17): Maybe should use the more secure container image.
|
||||||
FROM ubuntu:22.04 as base
|
FROM ubuntu:22.04 as base
|
||||||
|
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates
|
||||||
|
|
||||||
WORKDIR /greptime
|
WORKDIR /greptime
|
||||||
COPY --from=builder /greptimedb/target/release/greptime /greptime/bin/
|
COPY --from=builder /greptimedb/target/release/greptime /greptime/bin/
|
||||||
ENV PATH /greptime/bin/:$PATH
|
ENV PATH /greptime/bin/:$PATH
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
FROM ubuntu:22.04
|
FROM ubuntu:22.04
|
||||||
|
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -y install ca-certificates
|
||||||
|
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
|
|
||||||
ADD $TARGETARCH/greptime /greptime/bin/
|
ADD $TARGETARCH/greptime /greptime/bin/
|
||||||
|
|||||||
BIN
docs/rfcs/2022-12-20-promql-in-rust/example.png
Normal file
BIN
docs/rfcs/2022-12-20-promql-in-rust/example.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 34 KiB |
BIN
docs/rfcs/2022-12-20-promql-in-rust/instant-and-vector.png
Normal file
BIN
docs/rfcs/2022-12-20-promql-in-rust/instant-and-vector.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 58 KiB |
BIN
docs/rfcs/2022-12-20-promql-in-rust/matrix-from-array.png
Normal file
BIN
docs/rfcs/2022-12-20-promql-in-rust/matrix-from-array.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 35 KiB |
BIN
docs/rfcs/2022-12-20-promql-in-rust/range-vector-with-matrix.png
Normal file
BIN
docs/rfcs/2022-12-20-promql-in-rust/range-vector-with-matrix.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 46 KiB |
175
docs/rfcs/2022-12-20-promql-in-rust/rfc.md
Normal file
175
docs/rfcs/2022-12-20-promql-in-rust/rfc.md
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
---
|
||||||
|
Feature Name: "promql-in-rust"
|
||||||
|
Tracking Issue: https://github.com/GreptimeTeam/greptimedb/issues/596
|
||||||
|
Date: 2022-12-20
|
||||||
|
Author: "Ruihang Xia <waynestxia@gmail.com>"
|
||||||
|
---
|
||||||
|
|
||||||
|
Rewrite PromQL in Rust
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
A Rust native implementation of PromQL, for GreptimeDB.
|
||||||
|
|
||||||
|
# Motivation
|
||||||
|
Prometheus and its query language PromQL prevails in the cloud-native observability area, which is an important scenario for time series database like GreptimeDB. We already have support for its remote read and write protocols. Users can now integrate GreptimeDB as the storage backend to existing Prometheus deployment, but cannot run PromQL query directly on GreptimeDB like SQL.
|
||||||
|
|
||||||
|
This RFC proposes to add support for PromQL. Because it was created in Go, we can't use the existing code easily. For interoperability, performance and extendability, porting its logic to Rust is a good choice.
|
||||||
|
|
||||||
|
# Details
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
One of the goals is to make use of our existing basic operators, execution model and runtime to reduce the work. So the entire proposal is built on top of Apache Arrow DataFusion. The rewrote PromQL logic is manifested as `Expr` or `Execution Plan` in DataFusion. And both the intermediate data structure and the result is in the format of `Arrow`'s `RecordBatch`.
|
||||||
|
|
||||||
|
The following sections are organized in a top-down manner. Starts with evaluation procedure. Then introduces the building blocks of our new PromQL operation. Follows by an explanation of data model. And end with an example logic plan.
|
||||||
|
|
||||||
|
*This RFC is heavily related to Prometheus and PromQL. It won't repeat some basic concepts of them.*
|
||||||
|
|
||||||
|
## Evaluation
|
||||||
|
|
||||||
|
The original implementation is like an interpreter of parsed PromQL AST. It has two characteristics: (1) Operations are evaluated in place after they are parsed to AST. And some key parameters are separated from the AST because they do not present in the query, but come from other places like another field in the HTTP payload. (2) calculation is performed per timestamp. You can see this pattern many times:
|
||||||
|
```go
|
||||||
|
for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval {}
|
||||||
|
```
|
||||||
|
|
||||||
|
These bring out two differences in the proposed implementation. First, to make it more general and clear, the evaluation procedure is reorganized into serval phases (and is the same as DataFusion's). And second, data are evaluated by time series (corresponding to "columnar calculation", if think timestamp as row number).
|
||||||
|
|
||||||
|
```
|
||||||
|
Logic
|
||||||
|
Query AST Plan
|
||||||
|
─────────► Parser ───────► Logical ────────► Physical ────┐
|
||||||
|
Planner Planner │
|
||||||
|
│
|
||||||
|
◄───────────────────────────── Executor ◄────────────────┘
|
||||||
|
Evaluation Result Execution
|
||||||
|
Plan
|
||||||
|
```
|
||||||
|
|
||||||
|
- Parser
|
||||||
|
|
||||||
|
Provided by [`promql-parser`](https://github.com/GreptimeTeam/promql-parser) crate. Same as the original implementation.
|
||||||
|
|
||||||
|
- Logical Planner
|
||||||
|
|
||||||
|
Generates a logical plan with all the needed parameters. It should accept something like `EvalStmt` in Go's implementation, which contains query time range, evaluation interval and lookback range.
|
||||||
|
|
||||||
|
Another important thing done here is assembling the logic plan, with all the operations baked into logically. Like what's the filter and time range to read, how the data then flows through a selector into a binary operation, etc. Or what's the output schema of every single step. The generated logic plan is deterministic without variables, and can be `EXPLAIN`ed clearly.
|
||||||
|
|
||||||
|
- Physical Planner
|
||||||
|
|
||||||
|
This step converts a logic plan into evaluatable execution plan. There are not many special things like the previous step. Except when a query is going to be executed distributedly. In this case, a logic plan will be divided into serval parts and sent to serval nodes. One physical planner only sees its own part.
|
||||||
|
|
||||||
|
- Executor
|
||||||
|
|
||||||
|
As its name shows, this step calculates data to result. And all new calculation logic, the implementation of PromQL in rust, is placed here. And the rewrote functions are using `RecordBatch` and `Array` from `Arrow` as the intermediate data structure.
|
||||||
|
|
||||||
|
Each "batch" contains only data from single time series. This is from the underlying storage implementation. Though it's not a requirement of this RFC, having this property can simplify some functions.
|
||||||
|
|
||||||
|
Another thing to mention is the rewrote functions don't aware of timestamp or value columns, they are defined only based on the input data types. For example, `increase()` function in PromQL calculates the unbiased delta of data, its implementation here only does this single thing. Let's compare the signature of two implementations:
|
||||||
|
|
||||||
|
- Go
|
||||||
|
```go
|
||||||
|
func funcIncrease(vals []parser.Value, args parser.Expressions) Vector {}
|
||||||
|
```
|
||||||
|
- Rust
|
||||||
|
```rust
|
||||||
|
fn prom_increase(input: Array) -> Array {}
|
||||||
|
```
|
||||||
|
|
||||||
|
Some unimportant parameters are omitted. The original Go version only writes the logic for `Point`'s value, either float or histogram. But the proposed rewritten one accepts a generic `Array` as input, which can be any type that suits, from `i8` to `u64` to `TimestampNanosecond`.
|
||||||
|
|
||||||
|
## Plan and Expression
|
||||||
|
|
||||||
|
They are structures to express logic from PromQL. The proposed implementation is built on top of DataFusion, thus our plan and expression are in form of `ExtensionPlan` and `ScalarUDF`. The only difference between them in this context is the return type: plan returns a record batch while expression returns a single column.
|
||||||
|
|
||||||
|
This RFC proposes to add four new plans, they are fundamental building blocks that mainly handle data selection logic in PromQL, for the following calculation expressions.
|
||||||
|
|
||||||
|
- `SeriesNormalize`
|
||||||
|
|
||||||
|
Sort data inside one series on the timestamp column, and bias "offset" if has. This plan usually comes after `TableScan` (or `TableScan` and `Filter`) plan.
|
||||||
|
|
||||||
|
- `VectorManipulator` and `MatrixManipulator`
|
||||||
|
|
||||||
|
Corresponding to `InstantSelector` and `RangeSelector`. We don't calculate timestamp by timestamp, thus use "vector" instead of "instant", this image shows the difference. And "matrix" is another name for "range vector", for not confused with our "vector". The following section will detail how they are implemented using Arrow.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Due to "interval" parameter in PromQL, data after "selector" (or "manipulator" here) are usually shorter than input. And we have to modify the entire record batch to shorten both timestamp, value and tag columns. So they are formed as plan.
|
||||||
|
|
||||||
|
- `PromAggregator`
|
||||||
|
|
||||||
|
The carrier of aggregator expressions. This should not be very different from the DataFusion built-in `Aggregate` plan, except PromQL can use "group without" to do reverse selection.
|
||||||
|
|
||||||
|
PromQL has around 70 expressions and functions. But luckily we can reuse lots of them from DataFusion. Like unary expression, binary expression and aggregator. We only need to implement those PromQL-specific expressions, like `rate` or `percentile`. The following table lists some typical functions in PromQL, and their signature in the proposed implementation. Other function should be the same.
|
||||||
|
|
||||||
|
| Name | In Param(s) | Out Param(s) | Explain |
|
||||||
|
|-------------------- |------------------------------------------------------ |-------------- |-------------------- |
|
||||||
|
| instant_delta | Matrix T | Array T | idelta in PromQL |
|
||||||
|
| increase | Matrix T | Array T | increase in PromQL |
|
||||||
|
| extrapolate_factor | - Matrix T<br>- Array Timestamp<br>- Array Timestamp | Array T | * |
|
||||||
|
|
||||||
|
*: *`extrapolate_factor` is one of the "dark sides" in PromQL. In short it's a translation of this [paragraph](https://github.com/prometheus/prometheus/blob/0372e259baf014bbade3134fd79bcdfd8cbdef2c/promql/functions.go#L134-L159)*
|
||||||
|
|
||||||
|
To reuse those common calculation logic, we can break them into serval expressions, and assemble in the logic planning phase. Like `rate()` in PromQL can be represented as `increase / extrapolate_factor`.
|
||||||
|
|
||||||
|
## Data Model
|
||||||
|
|
||||||
|
This part explains how data is represented. Following the data model in GreptimeDB, all the data are stored as table, with tag columns, timestamp column and value column. Table to record batch is very straightforward. So an instant vector can be thought of as a row (though as said before, we don't use instant vectors) in the table. Given four basic types in PromQL: scalar, string, instant vector and range vector, only the last "range vector" need some tricks to adapt our columnar calculation.
|
||||||
|
|
||||||
|
Range vector is some sort of matrix, it's consisted of small one-dimension vectors, with each being an input of range function. And, applying range function to a range vector can be thought of kind of convolution.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
(Left is an illustration of range vector. Notice the Y-axis has no meaning, it's just put different pieces separately. The right side is an imagined "matrix" as range function. Multiplying the left side to it can get a one-dimension "matrix" with four elements. That's the evaluation result of a range vector.)
|
||||||
|
|
||||||
|
To adapt this range vector to record batch, it should be represented by a column. This RFC proposes to use `DictionaryArray` from Arrow to represent range vector, or `Matrix`. This is "misusing" `DictionaryArray` to ship some additional information about an array. Because the range vector is sliding over one series, we only need to know the `offset` and `length` of each slides to reconstruct the matrix from an array:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
The length is not fixed, it depends on the input's timestamp. An PoC implementation of `Matrix` and `increase()` can be found in [this repo](https://github.com/waynexia/corroding-prometheus).
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
The logic plan of PromQL query
|
||||||
|
```promql
|
||||||
|
# start: 2022-12-20T10:00:00
|
||||||
|
# end: 2022-12-21T10:00:00
|
||||||
|
# interval: 1m
|
||||||
|
# lookback: 30s
|
||||||
|
sum (rate(request_duration[5m])) by (idc)
|
||||||
|
```
|
||||||
|
looks like
|
||||||
|
|
||||||
|
<!-- title: 'PromAggregator: \naggr = sum, column = idc'
|
||||||
|
operator: prom
|
||||||
|
inputs:
|
||||||
|
- title: 'Matrix Manipulator: \ninterval = 1m, range = 5m, expr = div(increase(value), extrapolate_factor(timestamp))'
|
||||||
|
operator: prom
|
||||||
|
inputs:
|
||||||
|
- title: 'Series Normalize: \noffset = 0'
|
||||||
|
operator: prom
|
||||||
|
inputs:
|
||||||
|
- title: 'Filter: \ntimetamp > 2022-12-20T10:00:00 && timestamp < 2022-12-21T10:00:00'
|
||||||
|
operator: filter
|
||||||
|
inputs:
|
||||||
|
- title: 'Table Scan: \ntable = request_duration, timetamp > 2022-12-20T10:00:00 && timestamp < 2022-12-21T10:00:00'
|
||||||
|
operator: scan -->
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
# Drawbacks
|
||||||
|
|
||||||
|
Human-being is always error-prone. It's harder to endeavor to rewrite from the ground and requires more attention to ensure correctness, than translate line-by-line. And, since the evaluator's architecture are different, it might be painful to catch up with PromQL's breaking update (if any) in the future.
|
||||||
|
|
||||||
|
Misusing Arrow's DictionaryVector as Matrix is another point. This hack needs some `unsafe` function call to bypass Arrow's check. And though Arrow's API is stable, this is still an undocumented behavior.
|
||||||
|
|
||||||
|
# Alternatives
|
||||||
|
|
||||||
|
There are a few alternatives we've considered:
|
||||||
|
- Wrap the existing PromQL's implementation via FFI, and import it to GreptimeDB.
|
||||||
|
- Translate its evaluator engine line-by-line, rather than rewrite one.
|
||||||
|
- Integrate the Prometheus server into GreptimeDB via RPC, making it a detached execution engine for PromQL.
|
||||||
|
|
||||||
|
The first and second options are making a separate execution engine in GreptimeDB, they may alleviate the pain during rewriting, but will have negative impacts to afterward evolve like resource management. And introduce another deploy component in the last option will bring a complex deploy architecture.
|
||||||
|
|
||||||
|
And all of them are more or less redundant in data transportation that affects performance and resources. The proposed built-in executing procedure is also easy to integrate and expose to the existing SQL interface GreptimeDB currently provides. Some concepts in PromQL like sliding windows (range vector in PromQL) are very convenient and ergonomic in analyzing series data. This makes it not only a PromQL evaluator, but also an enhancement to our query system.
|
||||||
@@ -1 +1 @@
|
|||||||
nightly-2022-07-14
|
nightly-2022-12-20
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "api"
|
name = "api"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
common-base = { path = "../common/base" }
|
common-base = { path = "../common/base" }
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ message AdminResponse {
|
|||||||
message AdminExpr {
|
message AdminExpr {
|
||||||
ExprHeader header = 1;
|
ExprHeader header = 1;
|
||||||
oneof expr {
|
oneof expr {
|
||||||
CreateExpr create = 2;
|
CreateTableExpr create_table = 2;
|
||||||
AlterExpr alter = 3;
|
AlterExpr alter = 3;
|
||||||
CreateDatabaseExpr create_database = 4;
|
CreateDatabaseExpr create_database = 4;
|
||||||
DropTableExpr drop_table = 5;
|
DropTableExpr drop_table = 5;
|
||||||
@@ -31,24 +31,23 @@ message AdminResult {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(hl): rename to CreateTableExpr
|
message CreateTableExpr {
|
||||||
message CreateExpr {
|
string catalog_name = 1;
|
||||||
optional string catalog_name = 1;
|
string schema_name = 2;
|
||||||
optional string schema_name = 2;
|
|
||||||
string table_name = 3;
|
string table_name = 3;
|
||||||
optional string desc = 4;
|
string desc = 4;
|
||||||
repeated ColumnDef column_defs = 5;
|
repeated ColumnDef column_defs = 5;
|
||||||
string time_index = 6;
|
string time_index = 6;
|
||||||
repeated string primary_keys = 7;
|
repeated string primary_keys = 7;
|
||||||
bool create_if_not_exists = 8;
|
bool create_if_not_exists = 8;
|
||||||
map<string, string> table_options = 9;
|
map<string, string> table_options = 9;
|
||||||
optional uint32 table_id = 10;
|
TableId table_id = 10;
|
||||||
repeated uint32 region_ids = 11;
|
repeated uint32 region_ids = 11;
|
||||||
}
|
}
|
||||||
|
|
||||||
message AlterExpr {
|
message AlterExpr {
|
||||||
optional string catalog_name = 1;
|
string catalog_name = 1;
|
||||||
optional string schema_name = 2;
|
string schema_name = 2;
|
||||||
string table_name = 3;
|
string table_name = 3;
|
||||||
oneof kind {
|
oneof kind {
|
||||||
AddColumns add_columns = 4;
|
AddColumns add_columns = 4;
|
||||||
@@ -62,6 +61,11 @@ message DropTableExpr {
|
|||||||
string table_name = 3;
|
string table_name = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
message CreateDatabaseExpr {
|
||||||
|
//TODO(hl): maybe rename to schema_name?
|
||||||
|
string database_name = 1;
|
||||||
|
}
|
||||||
|
|
||||||
message AddColumns {
|
message AddColumns {
|
||||||
repeated AddColumn add_columns = 1;
|
repeated AddColumn add_columns = 1;
|
||||||
}
|
}
|
||||||
@@ -79,7 +83,6 @@ message DropColumn {
|
|||||||
string name = 1;
|
string name = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
message CreateDatabaseExpr {
|
message TableId {
|
||||||
//TODO(hl): maybe rename to schema_name?
|
uint32 id = 1;
|
||||||
string database_name = 1;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,7 +32,10 @@ message Column {
|
|||||||
|
|
||||||
repeated int32 date_values = 14;
|
repeated int32 date_values = 14;
|
||||||
repeated int64 datetime_values = 15;
|
repeated int64 datetime_values = 15;
|
||||||
repeated int64 ts_millis_values = 16;
|
repeated int64 ts_second_values = 16;
|
||||||
|
repeated int64 ts_millisecond_values = 17;
|
||||||
|
repeated int64 ts_microsecond_values = 18;
|
||||||
|
repeated int64 ts_nanosecond_values = 19;
|
||||||
}
|
}
|
||||||
// The array of non-null values in this column.
|
// The array of non-null values in this column.
|
||||||
//
|
//
|
||||||
@@ -56,7 +59,7 @@ message ColumnDef {
|
|||||||
string name = 1;
|
string name = 1;
|
||||||
ColumnDataType datatype = 2;
|
ColumnDataType datatype = 2;
|
||||||
bool is_nullable = 3;
|
bool is_nullable = 3;
|
||||||
optional bytes default_constraint = 4;
|
bytes default_constraint = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum ColumnDataType {
|
enum ColumnDataType {
|
||||||
@@ -75,5 +78,8 @@ enum ColumnDataType {
|
|||||||
STRING = 12;
|
STRING = 12;
|
||||||
DATE = 13;
|
DATE = 13;
|
||||||
DATETIME = 14;
|
DATETIME = 14;
|
||||||
TIMESTAMP = 15;
|
TIMESTAMP_SECOND = 15;
|
||||||
|
TIMESTAMP_MILLISECOND = 16;
|
||||||
|
TIMESTAMP_MICROSECOND = 17;
|
||||||
|
TIMESTAMP_NANOSECOND = 18;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
use common_base::BitVec;
|
use common_base::BitVec;
|
||||||
use common_time::timestamp::TimeUnit;
|
use common_time::timestamp::TimeUnit;
|
||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
|
use datatypes::types::TimestampType;
|
||||||
use datatypes::value::Value;
|
use datatypes::value::Value;
|
||||||
use datatypes::vectors::VectorRef;
|
use datatypes::vectors::VectorRef;
|
||||||
use snafu::prelude::*;
|
use snafu::prelude::*;
|
||||||
@@ -56,7 +57,16 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
|||||||
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
||||||
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
||||||
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
||||||
ColumnDataType::Timestamp => ConcreteDataType::timestamp_millis_datatype(),
|
ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||||
|
ColumnDataType::TimestampMillisecond => {
|
||||||
|
ConcreteDataType::timestamp_millisecond_datatype()
|
||||||
|
}
|
||||||
|
ColumnDataType::TimestampMicrosecond => {
|
||||||
|
ConcreteDataType::timestamp_microsecond_datatype()
|
||||||
|
}
|
||||||
|
ColumnDataType::TimestampNanosecond => {
|
||||||
|
ConcreteDataType::timestamp_nanosecond_datatype()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -81,7 +91,12 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
|||||||
ConcreteDataType::String(_) => ColumnDataType::String,
|
ConcreteDataType::String(_) => ColumnDataType::String,
|
||||||
ConcreteDataType::Date(_) => ColumnDataType::Date,
|
ConcreteDataType::Date(_) => ColumnDataType::Date,
|
||||||
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
|
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
|
||||||
ConcreteDataType::Timestamp(_) => ColumnDataType::Timestamp,
|
ConcreteDataType::Timestamp(unit) => match unit {
|
||||||
|
TimestampType::Second(_) => ColumnDataType::TimestampSecond,
|
||||||
|
TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
|
||||||
|
TimestampType::Microsecond(_) => ColumnDataType::TimestampMicrosecond,
|
||||||
|
TimestampType::Nanosecond(_) => ColumnDataType::TimestampNanosecond,
|
||||||
|
},
|
||||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => {
|
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => {
|
||||||
return error::IntoColumnDataTypeSnafu { from: datatype }.fail()
|
return error::IntoColumnDataTypeSnafu { from: datatype }.fail()
|
||||||
}
|
}
|
||||||
@@ -153,8 +168,20 @@ impl Values {
|
|||||||
datetime_values: Vec::with_capacity(capacity),
|
datetime_values: Vec::with_capacity(capacity),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
},
|
},
|
||||||
ColumnDataType::Timestamp => Values {
|
ColumnDataType::TimestampSecond => Values {
|
||||||
ts_millis_values: Vec::with_capacity(capacity),
|
ts_second_values: Vec::with_capacity(capacity),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
ColumnDataType::TimestampMillisecond => Values {
|
||||||
|
ts_millisecond_values: Vec::with_capacity(capacity),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
ColumnDataType::TimestampMicrosecond => Values {
|
||||||
|
ts_microsecond_values: Vec::with_capacity(capacity),
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
ColumnDataType::TimestampNanosecond => Values {
|
||||||
|
ts_nanosecond_values: Vec::with_capacity(capacity),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -187,9 +214,12 @@ impl Column {
|
|||||||
Value::Binary(val) => values.binary_values.push(val.to_vec()),
|
Value::Binary(val) => values.binary_values.push(val.to_vec()),
|
||||||
Value::Date(val) => values.date_values.push(val.val()),
|
Value::Date(val) => values.date_values.push(val.val()),
|
||||||
Value::DateTime(val) => values.datetime_values.push(val.val()),
|
Value::DateTime(val) => values.datetime_values.push(val.val()),
|
||||||
Value::Timestamp(val) => values
|
Value::Timestamp(val) => match val.unit() {
|
||||||
.ts_millis_values
|
TimeUnit::Second => values.ts_second_values.push(val.value()),
|
||||||
.push(val.convert_to(TimeUnit::Millisecond)),
|
TimeUnit::Millisecond => values.ts_millisecond_values.push(val.value()),
|
||||||
|
TimeUnit::Microsecond => values.ts_microsecond_values.push(val.value()),
|
||||||
|
TimeUnit::Nanosecond => values.ts_nanosecond_values.push(val.value()),
|
||||||
|
},
|
||||||
Value::List(_) => unreachable!(),
|
Value::List(_) => unreachable!(),
|
||||||
});
|
});
|
||||||
self.null_mask = null_mask.into_vec();
|
self.null_mask = null_mask.into_vec();
|
||||||
@@ -200,7 +230,10 @@ impl Column {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datatypes::vectors::BooleanVector;
|
use datatypes::vectors::{
|
||||||
|
BooleanVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||||
|
TimestampNanosecondVector, TimestampSecondVector,
|
||||||
|
};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@@ -258,8 +291,8 @@ mod tests {
|
|||||||
let values = values.datetime_values;
|
let values = values.datetime_values;
|
||||||
assert_eq!(2, values.capacity());
|
assert_eq!(2, values.capacity());
|
||||||
|
|
||||||
let values = Values::with_capacity(ColumnDataType::Timestamp, 2);
|
let values = Values::with_capacity(ColumnDataType::TimestampMillisecond, 2);
|
||||||
let values = values.ts_millis_values;
|
let values = values.ts_millisecond_values;
|
||||||
assert_eq!(2, values.capacity());
|
assert_eq!(2, values.capacity());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -326,8 +359,8 @@ mod tests {
|
|||||||
ColumnDataTypeWrapper(ColumnDataType::Datetime).into()
|
ColumnDataTypeWrapper(ColumnDataType::Datetime).into()
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
ConcreteDataType::timestamp_millis_datatype(),
|
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||||
ColumnDataTypeWrapper(ColumnDataType::Timestamp).into()
|
ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond).into()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -394,8 +427,8 @@ mod tests {
|
|||||||
ConcreteDataType::datetime_datatype().try_into().unwrap()
|
ConcreteDataType::datetime_datatype().try_into().unwrap()
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
ColumnDataTypeWrapper(ColumnDataType::Timestamp),
|
ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond),
|
||||||
ConcreteDataType::timestamp_millis_datatype()
|
ConcreteDataType::timestamp_millisecond_datatype()
|
||||||
.try_into()
|
.try_into()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
);
|
);
|
||||||
@@ -412,7 +445,48 @@ mod tests {
|
|||||||
assert!(result.is_err());
|
assert!(result.is_err());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.unwrap_err().to_string(),
|
result.unwrap_err().to_string(),
|
||||||
"Failed to create column datatype from List(ListType { inner: Boolean(BooleanType) })"
|
"Failed to create column datatype from List(ListType { item_type: Boolean(BooleanType) })"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_column_put_timestamp_values() {
|
||||||
|
let mut column = Column {
|
||||||
|
column_name: "test".to_string(),
|
||||||
|
semantic_type: 0,
|
||||||
|
values: Some(Values {
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
null_mask: vec![],
|
||||||
|
datatype: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
let vector = Arc::new(TimestampNanosecondVector::from_vec(vec![1, 2, 3]));
|
||||||
|
column.push_vals(3, vector);
|
||||||
|
assert_eq!(
|
||||||
|
vec![1, 2, 3],
|
||||||
|
column.values.as_ref().unwrap().ts_nanosecond_values
|
||||||
|
);
|
||||||
|
|
||||||
|
let vector = Arc::new(TimestampMillisecondVector::from_vec(vec![4, 5, 6]));
|
||||||
|
column.push_vals(3, vector);
|
||||||
|
assert_eq!(
|
||||||
|
vec![4, 5, 6],
|
||||||
|
column.values.as_ref().unwrap().ts_millisecond_values
|
||||||
|
);
|
||||||
|
|
||||||
|
let vector = Arc::new(TimestampMicrosecondVector::from_vec(vec![7, 8, 9]));
|
||||||
|
column.push_vals(3, vector);
|
||||||
|
assert_eq!(
|
||||||
|
vec![7, 8, 9],
|
||||||
|
column.values.as_ref().unwrap().ts_microsecond_values
|
||||||
|
);
|
||||||
|
|
||||||
|
let vector = Arc::new(TimestampSecondVector::from_vec(vec![10, 11, 12]));
|
||||||
|
column.push_vals(3, vector);
|
||||||
|
assert_eq!(
|
||||||
|
vec![10, 11, 12],
|
||||||
|
column.values.as_ref().unwrap().ts_second_values
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -23,12 +23,13 @@ impl ColumnDef {
|
|||||||
pub fn try_as_column_schema(&self) -> Result<ColumnSchema> {
|
pub fn try_as_column_schema(&self) -> Result<ColumnSchema> {
|
||||||
let data_type = ColumnDataTypeWrapper::try_new(self.datatype)?;
|
let data_type = ColumnDataTypeWrapper::try_new(self.datatype)?;
|
||||||
|
|
||||||
let constraint = match &self.default_constraint {
|
let constraint = if self.default_constraint.is_empty() {
|
||||||
None => None,
|
None
|
||||||
Some(v) => Some(
|
} else {
|
||||||
ColumnDefaultConstraint::try_from(&v[..])
|
Some(
|
||||||
|
ColumnDefaultConstraint::try_from(self.default_constraint.as_slice())
|
||||||
.context(error::ConvertColumnDefaultConstraintSnafu { column: &self.name })?,
|
.context(error::ConvertColumnDefaultConstraintSnafu { column: &self.name })?,
|
||||||
),
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
ColumnSchema::new(&self.name, data_type.into(), self.is_nullable)
|
ColumnSchema::new(&self.name, data_type.into(), self.is_nullable)
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "catalog"
|
name = "catalog"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
api = { path = "../api" }
|
api = { path = "../api" }
|
||||||
@@ -19,9 +18,7 @@ common-recordbatch = { path = "../common/recordbatch" }
|
|||||||
common-runtime = { path = "../common/runtime" }
|
common-runtime = { path = "../common/runtime" }
|
||||||
common-telemetry = { path = "../common/telemetry" }
|
common-telemetry = { path = "../common/telemetry" }
|
||||||
common-time = { path = "../common/time" }
|
common-time = { path = "../common/time" }
|
||||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
datafusion.workspace = true
|
||||||
"simd",
|
|
||||||
] }
|
|
||||||
datatypes = { path = "../datatypes" }
|
datatypes = { path = "../datatypes" }
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
futures-util = "0.3"
|
futures-util = "0.3"
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ use std::any::Any;
|
|||||||
use common_error::ext::{BoxedError, ErrorExt};
|
use common_error::ext::{BoxedError, ErrorExt};
|
||||||
use common_error::prelude::{Snafu, StatusCode};
|
use common_error::prelude::{Snafu, StatusCode};
|
||||||
use datafusion::error::DataFusionError;
|
use datafusion::error::DataFusionError;
|
||||||
use datatypes::arrow;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
use datatypes::schema::RawSchema;
|
use datatypes::schema::RawSchema;
|
||||||
use snafu::{Backtrace, ErrorCompat};
|
use snafu::{Backtrace, ErrorCompat};
|
||||||
|
|
||||||
@@ -51,14 +51,12 @@ pub enum Error {
|
|||||||
SystemCatalog { msg: String, backtrace: Backtrace },
|
SystemCatalog { msg: String, backtrace: Backtrace },
|
||||||
|
|
||||||
#[snafu(display(
|
#[snafu(display(
|
||||||
"System catalog table type mismatch, expected: binary, found: {:?} source: {}",
|
"System catalog table type mismatch, expected: binary, found: {:?}",
|
||||||
data_type,
|
data_type,
|
||||||
source
|
|
||||||
))]
|
))]
|
||||||
SystemCatalogTypeMismatch {
|
SystemCatalogTypeMismatch {
|
||||||
data_type: arrow::datatypes::DataType,
|
data_type: ConcreteDataType,
|
||||||
#[snafu(backtrace)]
|
backtrace: Backtrace,
|
||||||
source: datatypes::error::Error,
|
|
||||||
},
|
},
|
||||||
|
|
||||||
#[snafu(display("Invalid system catalog entry type: {:?}", entry_type))]
|
#[snafu(display("Invalid system catalog entry type: {:?}", entry_type))]
|
||||||
@@ -222,10 +220,11 @@ impl ErrorExt for Error {
|
|||||||
| Error::ValueDeserialize { .. }
|
| Error::ValueDeserialize { .. }
|
||||||
| Error::Io { .. } => StatusCode::StorageUnavailable,
|
| Error::Io { .. } => StatusCode::StorageUnavailable,
|
||||||
|
|
||||||
Error::RegisterTable { .. } => StatusCode::Internal,
|
Error::RegisterTable { .. } | Error::SystemCatalogTypeMismatch { .. } => {
|
||||||
|
StatusCode::Internal
|
||||||
|
}
|
||||||
|
|
||||||
Error::ReadSystemCatalog { source, .. } => source.status_code(),
|
Error::ReadSystemCatalog { source, .. } => source.status_code(),
|
||||||
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
|
|
||||||
Error::InvalidCatalogValue { source, .. } => source.status_code(),
|
Error::InvalidCatalogValue { source, .. } => source.status_code(),
|
||||||
|
|
||||||
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
|
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
|
||||||
@@ -265,7 +264,6 @@ impl From<Error> for DataFusionError {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use common_error::mock::MockError;
|
use common_error::mock::MockError;
|
||||||
use datatypes::arrow::datatypes::DataType;
|
|
||||||
use snafu::GenerateImplicitData;
|
use snafu::GenerateImplicitData;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -314,11 +312,8 @@ mod tests {
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
StatusCode::Internal,
|
StatusCode::Internal,
|
||||||
Error::SystemCatalogTypeMismatch {
|
Error::SystemCatalogTypeMismatch {
|
||||||
data_type: DataType::Boolean,
|
data_type: ConcreteDataType::binary_datatype(),
|
||||||
source: datatypes::error::Error::UnsupportedArrowType {
|
backtrace: Backtrace::generate(),
|
||||||
arrow_type: DataType::Boolean,
|
|
||||||
backtrace: Backtrace::generate()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
.status_code()
|
.status_code()
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -33,48 +33,38 @@ const ALPHANUMERICS_NAME_PATTERN: &str = "[a-zA-Z_][a-zA-Z0-9_]*";
|
|||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
|
static ref CATALOG_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||||
"^{}-({})$",
|
"^{CATALOG_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})$"
|
||||||
CATALOG_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN
|
|
||||||
))
|
))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
|
static ref SCHEMA_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||||
"^{}-({})-({})$",
|
"^{SCHEMA_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})$"
|
||||||
SCHEMA_KEY_PREFIX, ALPHANUMERICS_NAME_PATTERN, ALPHANUMERICS_NAME_PATTERN
|
|
||||||
))
|
))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
static ref TABLE_GLOBAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||||
"^{}-({})-({})-({})$",
|
"^{TABLE_GLOBAL_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})$"
|
||||||
TABLE_GLOBAL_KEY_PREFIX,
|
|
||||||
ALPHANUMERICS_NAME_PATTERN,
|
|
||||||
ALPHANUMERICS_NAME_PATTERN,
|
|
||||||
ALPHANUMERICS_NAME_PATTERN
|
|
||||||
))
|
))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
static ref TABLE_REGIONAL_KEY_PATTERN: Regex = Regex::new(&format!(
|
||||||
"^{}-({})-({})-({})-([0-9]+)$",
|
"^{TABLE_REGIONAL_KEY_PREFIX}-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-({ALPHANUMERICS_NAME_PATTERN})-([0-9]+)$"
|
||||||
TABLE_REGIONAL_KEY_PREFIX,
|
|
||||||
ALPHANUMERICS_NAME_PATTERN,
|
|
||||||
ALPHANUMERICS_NAME_PATTERN,
|
|
||||||
ALPHANUMERICS_NAME_PATTERN
|
|
||||||
))
|
))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_catalog_prefix() -> String {
|
pub fn build_catalog_prefix() -> String {
|
||||||
format!("{}-", CATALOG_KEY_PREFIX)
|
format!("{CATALOG_KEY_PREFIX}-")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
|
pub fn build_schema_prefix(catalog_name: impl AsRef<str>) -> String {
|
||||||
format!("{}-{}-", SCHEMA_KEY_PREFIX, catalog_name.as_ref())
|
format!("{SCHEMA_KEY_PREFIX}-{}-", catalog_name.as_ref())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_table_global_prefix(
|
pub fn build_table_global_prefix(
|
||||||
@@ -82,8 +72,7 @@ pub fn build_table_global_prefix(
|
|||||||
schema_name: impl AsRef<str>,
|
schema_name: impl AsRef<str>,
|
||||||
) -> String {
|
) -> String {
|
||||||
format!(
|
format!(
|
||||||
"{}-{}-{}-",
|
"{TABLE_GLOBAL_KEY_PREFIX}-{}-{}-",
|
||||||
TABLE_GLOBAL_KEY_PREFIX,
|
|
||||||
catalog_name.as_ref(),
|
catalog_name.as_ref(),
|
||||||
schema_name.as_ref()
|
schema_name.as_ref()
|
||||||
)
|
)
|
||||||
@@ -138,7 +127,7 @@ impl TableGlobalKey {
|
|||||||
|
|
||||||
/// Table global info contains necessary info for a datanode to create table regions, including
|
/// Table global info contains necessary info for a datanode to create table regions, including
|
||||||
/// table id, table meta(schema...), region id allocation across datanodes.
|
/// table id, table meta(schema...), region id allocation across datanodes.
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
pub struct TableGlobalValue {
|
pub struct TableGlobalValue {
|
||||||
/// Id of datanode that created the global table info kv. only for debugging.
|
/// Id of datanode that created the global table info kv. only for debugging.
|
||||||
pub node_id: u64,
|
pub node_id: u64,
|
||||||
@@ -378,7 +367,7 @@ mod tests {
|
|||||||
table_info,
|
table_info,
|
||||||
};
|
};
|
||||||
let serialized = serde_json::to_string(&value).unwrap();
|
let serialized = serde_json::to_string(&value).unwrap();
|
||||||
let deserialized = TableGlobalValue::parse(&serialized).unwrap();
|
let deserialized = TableGlobalValue::parse(serialized).unwrap();
|
||||||
assert_eq!(value, deserialized);
|
assert_eq!(value, deserialized);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -157,7 +157,7 @@ pub struct RegisterSchemaRequest {
|
|||||||
|
|
||||||
/// Formats table fully-qualified name
|
/// Formats table fully-qualified name
|
||||||
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {
|
pub fn format_full_table_name(catalog: &str, schema: &str, table: &str) -> String {
|
||||||
format!("{}.{}.{}", catalog, schema, table)
|
format!("{catalog}.{schema}.{table}")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait CatalogProviderFactory {
|
pub trait CatalogProviderFactory {
|
||||||
@@ -187,8 +187,7 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
|
|||||||
.await
|
.await
|
||||||
.with_context(|_| CreateTableSnafu {
|
.with_context(|_| CreateTableSnafu {
|
||||||
table_info: format!(
|
table_info: format!(
|
||||||
"{}.{}.{}, id: {}",
|
"{catalog_name}.{schema_name}.{table_name}, id: {table_id}",
|
||||||
catalog_name, schema_name, table_name, table_id,
|
|
||||||
),
|
),
|
||||||
})?;
|
})?;
|
||||||
manager
|
manager
|
||||||
@@ -200,7 +199,7 @@ pub(crate) async fn handle_system_table_request<'a, M: CatalogManager>(
|
|||||||
table: table.clone(),
|
table: table.clone(),
|
||||||
})
|
})
|
||||||
.await?;
|
.await?;
|
||||||
info!("Created and registered system table: {}", table_name);
|
info!("Created and registered system table: {table_name}");
|
||||||
table
|
table
|
||||||
};
|
};
|
||||||
if let Some(hook) = req.open_hook {
|
if let Some(hook) = req.open_hook {
|
||||||
|
|||||||
@@ -145,27 +145,34 @@ impl LocalCatalogManager {
|
|||||||
/// Convert `RecordBatch` to a vector of `Entry`.
|
/// Convert `RecordBatch` to a vector of `Entry`.
|
||||||
fn record_batch_to_entry(rb: RecordBatch) -> Result<Vec<Entry>> {
|
fn record_batch_to_entry(rb: RecordBatch) -> Result<Vec<Entry>> {
|
||||||
ensure!(
|
ensure!(
|
||||||
rb.df_recordbatch.columns().len() >= 6,
|
rb.num_columns() >= 6,
|
||||||
SystemCatalogSnafu {
|
SystemCatalogSnafu {
|
||||||
msg: format!("Length mismatch: {}", rb.df_recordbatch.columns().len())
|
msg: format!("Length mismatch: {}", rb.num_columns())
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
let entry_type = UInt8Vector::try_from_arrow_array(&rb.df_recordbatch.columns()[0])
|
let entry_type = rb
|
||||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
.column(ENTRY_TYPE_INDEX)
|
||||||
data_type: rb.df_recordbatch.columns()[ENTRY_TYPE_INDEX]
|
.as_any()
|
||||||
.data_type()
|
.downcast_ref::<UInt8Vector>()
|
||||||
.clone(),
|
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||||
|
data_type: rb.column(ENTRY_TYPE_INDEX).data_type(),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let key = BinaryVector::try_from_arrow_array(&rb.df_recordbatch.columns()[1])
|
let key = rb
|
||||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
.column(KEY_INDEX)
|
||||||
data_type: rb.df_recordbatch.columns()[KEY_INDEX].data_type().clone(),
|
.as_any()
|
||||||
|
.downcast_ref::<BinaryVector>()
|
||||||
|
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||||
|
data_type: rb.column(KEY_INDEX).data_type(),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let value = BinaryVector::try_from_arrow_array(&rb.df_recordbatch.columns()[3])
|
let value = rb
|
||||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
.column(VALUE_INDEX)
|
||||||
data_type: rb.df_recordbatch.columns()[VALUE_INDEX].data_type().clone(),
|
.as_any()
|
||||||
|
.downcast_ref::<BinaryVector>()
|
||||||
|
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||||
|
data_type: rb.column(VALUE_INDEX).data_type(),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let mut res = Vec::with_capacity(rb.num_rows());
|
let mut res = Vec::with_capacity(rb.num_rows());
|
||||||
@@ -331,7 +338,7 @@ impl CatalogManager for LocalCatalogManager {
|
|||||||
let schema = catalog
|
let schema = catalog
|
||||||
.schema(schema_name)?
|
.schema(schema_name)?
|
||||||
.with_context(|| SchemaNotFoundSnafu {
|
.with_context(|| SchemaNotFoundSnafu {
|
||||||
schema_info: format!("{}.{}", catalog_name, schema_name),
|
schema_info: format!("{catalog_name}.{schema_name}"),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -445,7 +452,7 @@ impl CatalogManager for LocalCatalogManager {
|
|||||||
let schema = catalog
|
let schema = catalog
|
||||||
.schema(schema_name)?
|
.schema(schema_name)?
|
||||||
.with_context(|| SchemaNotFoundSnafu {
|
.with_context(|| SchemaNotFoundSnafu {
|
||||||
schema_info: format!("{}.{}", catalog_name, schema_name),
|
schema_info: format!("{catalog_name}.{schema_name}"),
|
||||||
})?;
|
})?;
|
||||||
schema.table(table_name)
|
schema.table(table_name)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -331,10 +331,7 @@ impl RemoteCatalogManager {
|
|||||||
.open_table(&context, request)
|
.open_table(&context, request)
|
||||||
.await
|
.await
|
||||||
.with_context(|_| OpenTableSnafu {
|
.with_context(|_| OpenTableSnafu {
|
||||||
table_info: format!(
|
table_info: format!("{catalog_name}.{schema_name}.{table_name}, id:{table_id}"),
|
||||||
"{}.{}.{}, id:{}",
|
|
||||||
catalog_name, schema_name, table_name, table_id
|
|
||||||
),
|
|
||||||
})? {
|
})? {
|
||||||
Some(table) => {
|
Some(table) => {
|
||||||
info!(
|
info!(
|
||||||
@@ -355,7 +352,7 @@ impl RemoteCatalogManager {
|
|||||||
.clone()
|
.clone()
|
||||||
.try_into()
|
.try_into()
|
||||||
.context(InvalidTableSchemaSnafu {
|
.context(InvalidTableSchemaSnafu {
|
||||||
table_info: format!("{}.{}.{}", catalog_name, schema_name, table_name,),
|
table_info: format!("{catalog_name}.{schema_name}.{table_name}"),
|
||||||
schema: meta.schema.clone(),
|
schema: meta.schema.clone(),
|
||||||
})?;
|
})?;
|
||||||
let req = CreateTableRequest {
|
let req = CreateTableRequest {
|
||||||
@@ -477,7 +474,7 @@ impl CatalogManager for RemoteCatalogManager {
|
|||||||
let schema = catalog
|
let schema = catalog
|
||||||
.schema(schema_name)?
|
.schema(schema_name)?
|
||||||
.with_context(|| SchemaNotFoundSnafu {
|
.with_context(|| SchemaNotFoundSnafu {
|
||||||
schema_info: format!("{}.{}", catalog_name, schema_name),
|
schema_info: format!("{catalog_name}.{schema_name}"),
|
||||||
})?;
|
})?;
|
||||||
schema.table(table_name)
|
schema.table(table_name)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,14 +21,13 @@ use common_catalog::consts::{
|
|||||||
SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
|
SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
|
||||||
};
|
};
|
||||||
use common_query::logical_plan::Expr;
|
use common_query::logical_plan::Expr;
|
||||||
use common_query::physical_plan::{PhysicalPlanRef, RuntimeEnv};
|
use common_query::physical_plan::{PhysicalPlanRef, SessionContext};
|
||||||
use common_recordbatch::SendableRecordBatchStream;
|
use common_recordbatch::SendableRecordBatchStream;
|
||||||
use common_telemetry::debug;
|
use common_telemetry::debug;
|
||||||
use common_time::timestamp::Timestamp;
|
|
||||||
use common_time::util;
|
use common_time::util;
|
||||||
use datatypes::prelude::{ConcreteDataType, ScalarVector};
|
use datatypes::prelude::{ConcreteDataType, ScalarVector};
|
||||||
use datatypes::schema::{ColumnSchema, Schema, SchemaBuilder, SchemaRef};
|
use datatypes::schema::{ColumnSchema, Schema, SchemaBuilder, SchemaRef};
|
||||||
use datatypes::vectors::{BinaryVector, TimestampVector, UInt8Vector};
|
use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use snafu::{ensure, OptionExt, ResultExt};
|
use snafu::{ensure, OptionExt, ResultExt};
|
||||||
use table::engine::{EngineContext, TableEngineRef};
|
use table::engine::{EngineContext, TableEngineRef};
|
||||||
@@ -62,7 +61,7 @@ impl Table for SystemCatalogTable {
|
|||||||
|
|
||||||
async fn scan(
|
async fn scan(
|
||||||
&self,
|
&self,
|
||||||
_projection: &Option<Vec<usize>>,
|
_projection: Option<&Vec<usize>>,
|
||||||
_filters: &[Expr],
|
_filters: &[Expr],
|
||||||
_limit: Option<usize>,
|
_limit: Option<usize>,
|
||||||
) -> table::Result<PhysicalPlanRef> {
|
) -> table::Result<PhysicalPlanRef> {
|
||||||
@@ -127,13 +126,14 @@ impl SystemCatalogTable {
|
|||||||
/// Create a stream of all entries inside system catalog table
|
/// Create a stream of all entries inside system catalog table
|
||||||
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
|
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
|
||||||
let full_projection = None;
|
let full_projection = None;
|
||||||
|
let ctx = SessionContext::new();
|
||||||
let scan = self
|
let scan = self
|
||||||
.table
|
.table
|
||||||
.scan(&full_projection, &[], None)
|
.scan(full_projection, &[], None)
|
||||||
.await
|
.await
|
||||||
.context(error::SystemCatalogTableScanSnafu)?;
|
.context(error::SystemCatalogTableScanSnafu)?;
|
||||||
let stream = scan
|
let stream = scan
|
||||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
.execute(0, ctx.task_ctx())
|
||||||
.context(error::SystemCatalogTableScanExecSnafu)?;
|
.context(error::SystemCatalogTableScanExecSnafu)?;
|
||||||
Ok(stream)
|
Ok(stream)
|
||||||
}
|
}
|
||||||
@@ -161,7 +161,7 @@ fn build_system_catalog_schema() -> Schema {
|
|||||||
),
|
),
|
||||||
ColumnSchema::new(
|
ColumnSchema::new(
|
||||||
"timestamp".to_string(),
|
"timestamp".to_string(),
|
||||||
ConcreteDataType::timestamp_millis_datatype(),
|
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
.with_time_index(true),
|
.with_time_index(true),
|
||||||
@@ -172,12 +172,12 @@ fn build_system_catalog_schema() -> Schema {
|
|||||||
),
|
),
|
||||||
ColumnSchema::new(
|
ColumnSchema::new(
|
||||||
"gmt_created".to_string(),
|
"gmt_created".to_string(),
|
||||||
ConcreteDataType::timestamp_millis_datatype(),
|
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||||
false,
|
false,
|
||||||
),
|
),
|
||||||
ColumnSchema::new(
|
ColumnSchema::new(
|
||||||
"gmt_modified".to_string(),
|
"gmt_modified".to_string(),
|
||||||
ConcreteDataType::timestamp_millis_datatype(),
|
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||||
false,
|
false,
|
||||||
),
|
),
|
||||||
];
|
];
|
||||||
@@ -197,7 +197,7 @@ pub fn build_table_insert_request(full_table_name: String, table_id: TableId) ->
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_schema_insert_request(catalog_name: String, schema_name: String) -> InsertRequest {
|
pub fn build_schema_insert_request(catalog_name: String, schema_name: String) -> InsertRequest {
|
||||||
let full_schema_name = format!("{}.{}", catalog_name, schema_name);
|
let full_schema_name = format!("{catalog_name}.{schema_name}");
|
||||||
build_insert_request(
|
build_insert_request(
|
||||||
EntryType::Schema,
|
EntryType::Schema,
|
||||||
full_schema_name.as_bytes(),
|
full_schema_name.as_bytes(),
|
||||||
@@ -222,7 +222,7 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
|
|||||||
// Timestamp in key part is intentionally left to 0
|
// Timestamp in key part is intentionally left to 0
|
||||||
columns_values.insert(
|
columns_values.insert(
|
||||||
"timestamp".to_string(),
|
"timestamp".to_string(),
|
||||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(0)])) as _,
|
Arc::new(TimestampMillisecondVector::from_slice(&[0])) as _,
|
||||||
);
|
);
|
||||||
|
|
||||||
columns_values.insert(
|
columns_values.insert(
|
||||||
@@ -230,18 +230,15 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
|
|||||||
Arc::new(BinaryVector::from_slice(&[value])) as _,
|
Arc::new(BinaryVector::from_slice(&[value])) as _,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let now = util::current_time_millis();
|
||||||
columns_values.insert(
|
columns_values.insert(
|
||||||
"gmt_created".to_string(),
|
"gmt_created".to_string(),
|
||||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(
|
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
|
||||||
util::current_time_millis(),
|
|
||||||
)])) as _,
|
|
||||||
);
|
);
|
||||||
|
|
||||||
columns_values.insert(
|
columns_values.insert(
|
||||||
"gmt_modified".to_string(),
|
"gmt_modified".to_string(),
|
||||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(
|
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
|
||||||
util::current_time_millis(),
|
|
||||||
)])) as _,
|
|
||||||
);
|
);
|
||||||
|
|
||||||
InsertRequest {
|
InsertRequest {
|
||||||
@@ -370,7 +367,7 @@ pub struct TableEntryValue {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use log_store::fs::noop::NoopLogStore;
|
use log_store::NoopLogStore;
|
||||||
use mito::config::EngineConfig;
|
use mito::config::EngineConfig;
|
||||||
use mito::engine::MitoEngine;
|
use mito::engine::MitoEngine;
|
||||||
use object_store::ObjectStore;
|
use object_store::ObjectStore;
|
||||||
@@ -393,7 +390,7 @@ mod tests {
|
|||||||
if let Entry::Catalog(e) = entry {
|
if let Entry::Catalog(e) = entry {
|
||||||
assert_eq!("some_catalog", e.catalog_name);
|
assert_eq!("some_catalog", e.catalog_name);
|
||||||
} else {
|
} else {
|
||||||
panic!("Unexpected type: {:?}", entry);
|
panic!("Unexpected type: {entry:?}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -410,7 +407,7 @@ mod tests {
|
|||||||
assert_eq!("some_catalog", e.catalog_name);
|
assert_eq!("some_catalog", e.catalog_name);
|
||||||
assert_eq!("some_schema", e.schema_name);
|
assert_eq!("some_schema", e.schema_name);
|
||||||
} else {
|
} else {
|
||||||
panic!("Unexpected type: {:?}", entry);
|
panic!("Unexpected type: {entry:?}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -429,7 +426,7 @@ mod tests {
|
|||||||
assert_eq!("some_table", e.table_name);
|
assert_eq!("some_table", e.table_name);
|
||||||
assert_eq!(42, e.table_id);
|
assert_eq!(42, e.table_id);
|
||||||
} else {
|
} else {
|
||||||
panic!("Unexpected type: {:?}", entry);
|
panic!("Unexpected type: {entry:?}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,9 +26,9 @@ use common_query::logical_plan::Expr;
|
|||||||
use common_query::physical_plan::PhysicalPlanRef;
|
use common_query::physical_plan::PhysicalPlanRef;
|
||||||
use common_recordbatch::error::Result as RecordBatchResult;
|
use common_recordbatch::error::Result as RecordBatchResult;
|
||||||
use common_recordbatch::{RecordBatch, RecordBatchStream};
|
use common_recordbatch::{RecordBatch, RecordBatchStream};
|
||||||
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
|
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||||
use datatypes::value::Value;
|
use datatypes::value::ValueRef;
|
||||||
use datatypes::vectors::VectorRef;
|
use datatypes::vectors::VectorRef;
|
||||||
use futures::Stream;
|
use futures::Stream;
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
@@ -77,7 +77,7 @@ impl Table for Tables {
|
|||||||
|
|
||||||
async fn scan(
|
async fn scan(
|
||||||
&self,
|
&self,
|
||||||
_projection: &Option<Vec<usize>>,
|
_projection: Option<&Vec<usize>>,
|
||||||
_filters: &[Expr],
|
_filters: &[Expr],
|
||||||
_limit: Option<usize>,
|
_limit: Option<usize>,
|
||||||
) -> table::error::Result<PhysicalPlanRef> {
|
) -> table::error::Result<PhysicalPlanRef> {
|
||||||
@@ -149,26 +149,33 @@ fn tables_to_record_batch(
|
|||||||
engine: &str,
|
engine: &str,
|
||||||
) -> Vec<VectorRef> {
|
) -> Vec<VectorRef> {
|
||||||
let mut catalog_vec =
|
let mut catalog_vec =
|
||||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||||
let mut schema_vec =
|
let mut schema_vec =
|
||||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||||
let mut table_name_vec =
|
let mut table_name_vec =
|
||||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||||
let mut engine_vec =
|
let mut engine_vec =
|
||||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||||
|
|
||||||
for table_name in table_names {
|
for table_name in table_names {
|
||||||
catalog_vec.push(&Value::String(catalog_name.into()));
|
// Safety: All these vectors are string type.
|
||||||
schema_vec.push(&Value::String(schema_name.into()));
|
catalog_vec
|
||||||
table_name_vec.push(&Value::String(table_name.into()));
|
.push_value_ref(ValueRef::String(catalog_name))
|
||||||
engine_vec.push(&Value::String(engine.into()));
|
.unwrap();
|
||||||
|
schema_vec
|
||||||
|
.push_value_ref(ValueRef::String(schema_name))
|
||||||
|
.unwrap();
|
||||||
|
table_name_vec
|
||||||
|
.push_value_ref(ValueRef::String(&table_name))
|
||||||
|
.unwrap();
|
||||||
|
engine_vec.push_value_ref(ValueRef::String(engine)).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
vec![
|
vec![
|
||||||
catalog_vec.finish(),
|
catalog_vec.to_vector(),
|
||||||
schema_vec.finish(),
|
schema_vec.to_vector(),
|
||||||
table_name_vec.finish(),
|
table_name_vec.to_vector(),
|
||||||
engine_vec.finish(),
|
engine_vec.to_vector(),
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -340,9 +347,7 @@ fn build_schema_for_tables() -> Schema {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||||
use common_query::physical_plan::RuntimeEnv;
|
use common_query::physical_plan::SessionContext;
|
||||||
use datatypes::arrow::array::Utf8Array;
|
|
||||||
use datatypes::arrow::datatypes::DataType;
|
|
||||||
use futures_util::StreamExt;
|
use futures_util::StreamExt;
|
||||||
use table::table::numbers::NumbersTable;
|
use table::table::numbers::NumbersTable;
|
||||||
|
|
||||||
@@ -365,57 +370,48 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let tables = Tables::new(catalog_list, "test_engine".to_string());
|
let tables = Tables::new(catalog_list, "test_engine".to_string());
|
||||||
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
|
let tables_stream = tables.scan(None, &[], None).await.unwrap();
|
||||||
let mut tables_stream = tables_stream
|
let session_ctx = SessionContext::new();
|
||||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
let mut tables_stream = tables_stream.execute(0, session_ctx.task_ctx()).unwrap();
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
if let Some(t) = tables_stream.next().await {
|
if let Some(t) = tables_stream.next().await {
|
||||||
let batch = t.unwrap().df_recordbatch;
|
let batch = t.unwrap();
|
||||||
assert_eq!(1, batch.num_rows());
|
assert_eq!(1, batch.num_rows());
|
||||||
assert_eq!(4, batch.num_columns());
|
assert_eq!(4, batch.num_columns());
|
||||||
assert_eq!(&DataType::Utf8, batch.column(0).data_type());
|
assert_eq!(
|
||||||
assert_eq!(&DataType::Utf8, batch.column(1).data_type());
|
ConcreteDataType::string_datatype(),
|
||||||
assert_eq!(&DataType::Utf8, batch.column(2).data_type());
|
batch.column(0).data_type()
|
||||||
assert_eq!(&DataType::Utf8, batch.column(3).data_type());
|
);
|
||||||
|
assert_eq!(
|
||||||
|
ConcreteDataType::string_datatype(),
|
||||||
|
batch.column(1).data_type()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
ConcreteDataType::string_datatype(),
|
||||||
|
batch.column(2).data_type()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
ConcreteDataType::string_datatype(),
|
||||||
|
batch.column(3).data_type()
|
||||||
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
"greptime",
|
"greptime",
|
||||||
batch
|
batch.column(0).get_ref(0).as_string().unwrap().unwrap()
|
||||||
.column(0)
|
|
||||||
.as_any()
|
|
||||||
.downcast_ref::<Utf8Array<i32>>()
|
|
||||||
.unwrap()
|
|
||||||
.value(0)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
"public",
|
"public",
|
||||||
batch
|
batch.column(1).get_ref(0).as_string().unwrap().unwrap()
|
||||||
.column(1)
|
|
||||||
.as_any()
|
|
||||||
.downcast_ref::<Utf8Array<i32>>()
|
|
||||||
.unwrap()
|
|
||||||
.value(0)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
"test_table",
|
"test_table",
|
||||||
batch
|
batch.column(2).get_ref(0).as_string().unwrap().unwrap()
|
||||||
.column(2)
|
|
||||||
.as_any()
|
|
||||||
.downcast_ref::<Utf8Array<i32>>()
|
|
||||||
.unwrap()
|
|
||||||
.value(0)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
"test_engine",
|
"test_engine",
|
||||||
batch
|
batch.column(3).get_ref(0).as_string().unwrap().unwrap()
|
||||||
.column(3)
|
|
||||||
.as_any()
|
|
||||||
.downcast_ref::<Utf8Array<i32>>()
|
|
||||||
.unwrap()
|
|
||||||
.value(0)
|
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
panic!("Record batch should not be empty!")
|
panic!("Record batch should not be empty!")
|
||||||
|
|||||||
@@ -69,8 +69,7 @@ mod tests {
|
|||||||
assert!(
|
assert!(
|
||||||
err.to_string()
|
err.to_string()
|
||||||
.contains("Table `greptime.public.test_table` already exists"),
|
.contains("Table `greptime.public.test_table` already exists"),
|
||||||
"Actual error message: {}",
|
"Actual error message: {err}",
|
||||||
err
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -189,10 +189,10 @@ impl TableEngine for MockTableEngine {
|
|||||||
unimplemented!()
|
unimplemented!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_table<'a>(
|
fn get_table(
|
||||||
&self,
|
&self,
|
||||||
_ctx: &EngineContext,
|
_ctx: &EngineContext,
|
||||||
table_ref: &'a TableReference,
|
table_ref: &TableReference,
|
||||||
) -> table::Result<Option<TableRef>> {
|
) -> table::Result<Option<TableRef>> {
|
||||||
futures::executor::block_on(async {
|
futures::executor::block_on(async {
|
||||||
Ok(self
|
Ok(self
|
||||||
@@ -204,7 +204,7 @@ impl TableEngine for MockTableEngine {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn table_exists<'a>(&self, _ctx: &EngineContext, table_ref: &'a TableReference) -> bool {
|
fn table_exists(&self, _ctx: &EngineContext, table_ref: &TableReference) -> bool {
|
||||||
futures::executor::block_on(async {
|
futures::executor::block_on(async {
|
||||||
self.tables
|
self.tables
|
||||||
.read()
|
.read()
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "client"
|
name = "client"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
api = { path = "../api" }
|
api = { path = "../api" }
|
||||||
@@ -15,9 +14,7 @@ common-grpc-expr = { path = "../common/grpc-expr" }
|
|||||||
common-query = { path = "../common/query" }
|
common-query = { path = "../common/query" }
|
||||||
common-recordbatch = { path = "../common/recordbatch" }
|
common-recordbatch = { path = "../common/recordbatch" }
|
||||||
common-time = { path = "../common/time" }
|
common-time = { path = "../common/time" }
|
||||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
datafusion.workspace = true
|
||||||
"simd",
|
|
||||||
] }
|
|
||||||
datatypes = { path = "../datatypes" }
|
datatypes = { path = "../datatypes" }
|
||||||
enum_dispatch = "0.3"
|
enum_dispatch = "0.3"
|
||||||
parking_lot = "0.12"
|
parking_lot = "0.12"
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use api::v1::{ColumnDataType, ColumnDef, CreateExpr};
|
use api::v1::{ColumnDataType, ColumnDef, CreateTableExpr, TableId};
|
||||||
use client::admin::Admin;
|
use client::admin::Admin;
|
||||||
use client::{Client, Database};
|
use client::{Client, Database};
|
||||||
use prost_09::Message;
|
use prost_09::Message;
|
||||||
@@ -33,36 +33,36 @@ fn main() {
|
|||||||
async fn run() {
|
async fn run() {
|
||||||
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
let client = Client::with_urls(vec!["127.0.0.1:3001"]);
|
||||||
|
|
||||||
let create_table_expr = CreateExpr {
|
let create_table_expr = CreateTableExpr {
|
||||||
catalog_name: Some("greptime".to_string()),
|
catalog_name: "greptime".to_string(),
|
||||||
schema_name: Some("public".to_string()),
|
schema_name: "public".to_string(),
|
||||||
table_name: "test_logical_dist_exec".to_string(),
|
table_name: "test_logical_dist_exec".to_string(),
|
||||||
desc: None,
|
desc: "".to_string(),
|
||||||
column_defs: vec![
|
column_defs: vec![
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "timestamp".to_string(),
|
name: "timestamp".to_string(),
|
||||||
datatype: ColumnDataType::Timestamp as i32,
|
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||||
is_nullable: false,
|
is_nullable: false,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "key".to_string(),
|
name: "key".to_string(),
|
||||||
datatype: ColumnDataType::Uint64 as i32,
|
datatype: ColumnDataType::Uint64 as i32,
|
||||||
is_nullable: false,
|
is_nullable: false,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
ColumnDef {
|
ColumnDef {
|
||||||
name: "value".to_string(),
|
name: "value".to_string(),
|
||||||
datatype: ColumnDataType::Uint64 as i32,
|
datatype: ColumnDataType::Uint64 as i32,
|
||||||
is_nullable: false,
|
is_nullable: false,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
time_index: "timestamp".to_string(),
|
time_index: "timestamp".to_string(),
|
||||||
primary_keys: vec!["key".to_string()],
|
primary_keys: vec!["key".to_string()],
|
||||||
create_if_not_exists: false,
|
create_if_not_exists: false,
|
||||||
table_options: Default::default(),
|
table_options: Default::default(),
|
||||||
table_id: Some(1024),
|
table_id: Some(TableId { id: 1024 }),
|
||||||
region_ids: vec![0],
|
region_ids: vec![0],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -34,13 +34,13 @@ impl Admin {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn create(&self, expr: CreateExpr) -> Result<AdminResult> {
|
pub async fn create(&self, expr: CreateTableExpr) -> Result<AdminResult> {
|
||||||
let header = ExprHeader {
|
let header = ExprHeader {
|
||||||
version: PROTOCOL_VERSION,
|
version: PROTOCOL_VERSION,
|
||||||
};
|
};
|
||||||
let expr = AdminExpr {
|
let expr = AdminExpr {
|
||||||
header: Some(header),
|
header: Some(header),
|
||||||
expr: Some(admin_expr::Expr::Create(expr)),
|
expr: Some(admin_expr::Expr::CreateTable(expr)),
|
||||||
};
|
};
|
||||||
self.do_request(expr).await
|
self.do_request(expr).await
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -318,12 +318,11 @@ mod tests {
|
|||||||
|
|
||||||
fn create_test_column(vector: VectorRef) -> Column {
|
fn create_test_column(vector: VectorRef) -> Column {
|
||||||
let wrapper: ColumnDataTypeWrapper = vector.data_type().try_into().unwrap();
|
let wrapper: ColumnDataTypeWrapper = vector.data_type().try_into().unwrap();
|
||||||
let array = vector.to_arrow_array();
|
|
||||||
Column {
|
Column {
|
||||||
column_name: "test".to_string(),
|
column_name: "test".to_string(),
|
||||||
semantic_type: 1,
|
semantic_type: 1,
|
||||||
values: Some(values(&[array.clone()]).unwrap()),
|
values: Some(values(&[vector.clone()]).unwrap()),
|
||||||
null_mask: null_mask(&vec![array], vector.len()),
|
null_mask: null_mask(&[vector.clone()], vector.len()),
|
||||||
datatype: wrapper.datatype() as i32,
|
datatype: wrapper.datatype() as i32,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "cmd"
|
name = "cmd"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
default-run = "greptime"
|
default-run = "greptime"
|
||||||
license = "Apache-2.0"
|
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "greptime"
|
name = "greptime"
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ impl TryFrom<StartCommand> for DatanodeOptions {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Some(wal_dir) = cmd.wal_dir {
|
if let Some(wal_dir) = cmd.wal_dir {
|
||||||
opts.wal_dir = wal_dir;
|
opts.wal.dir = wal_dir;
|
||||||
}
|
}
|
||||||
Ok(opts)
|
Ok(opts)
|
||||||
}
|
}
|
||||||
@@ -151,7 +151,7 @@ mod tests {
|
|||||||
};
|
};
|
||||||
let options: DatanodeOptions = cmd.try_into().unwrap();
|
let options: DatanodeOptions = cmd.try_into().unwrap();
|
||||||
assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
|
assert_eq!("127.0.0.1:3001".to_string(), options.rpc_addr);
|
||||||
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal_dir);
|
assert_eq!("/tmp/greptimedb/wal".to_string(), options.wal.dir);
|
||||||
assert_eq!("127.0.0.1:4406".to_string(), options.mysql_addr);
|
assert_eq!("127.0.0.1:4406".to_string(), options.mysql_addr);
|
||||||
assert_eq!(4, options.mysql_runtime_size);
|
assert_eq!(4, options.mysql_runtime_size);
|
||||||
let MetaClientOpts {
|
let MetaClientOpts {
|
||||||
|
|||||||
@@ -14,7 +14,6 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anymap::AnyMap;
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use frontend::frontend::{Frontend, FrontendOptions};
|
use frontend::frontend::{Frontend, FrontendOptions};
|
||||||
use frontend::grpc::GrpcOptions;
|
use frontend::grpc::GrpcOptions;
|
||||||
@@ -23,6 +22,7 @@ use frontend::instance::Instance;
|
|||||||
use frontend::mysql::MysqlOptions;
|
use frontend::mysql::MysqlOptions;
|
||||||
use frontend::opentsdb::OpentsdbOptions;
|
use frontend::opentsdb::OpentsdbOptions;
|
||||||
use frontend::postgres::PostgresOptions;
|
use frontend::postgres::PostgresOptions;
|
||||||
|
use frontend::Plugins;
|
||||||
use meta_client::MetaClientOpts;
|
use meta_client::MetaClientOpts;
|
||||||
use servers::auth::UserProviderRef;
|
use servers::auth::UserProviderRef;
|
||||||
use servers::http::HttpOptions;
|
use servers::http::HttpOptions;
|
||||||
@@ -88,21 +88,21 @@ pub struct StartCommand {
|
|||||||
|
|
||||||
impl StartCommand {
|
impl StartCommand {
|
||||||
async fn run(self) -> Result<()> {
|
async fn run(self) -> Result<()> {
|
||||||
let plugins = load_frontend_plugins(&self.user_provider)?;
|
let plugins = Arc::new(load_frontend_plugins(&self.user_provider)?);
|
||||||
let opts: FrontendOptions = self.try_into()?;
|
let opts: FrontendOptions = self.try_into()?;
|
||||||
let mut frontend = Frontend::new(
|
|
||||||
opts.clone(),
|
let mut instance = Instance::try_new_distributed(&opts)
|
||||||
Instance::try_new_distributed(&opts)
|
.await
|
||||||
.await
|
.context(error::StartFrontendSnafu)?;
|
||||||
.context(error::StartFrontendSnafu)?,
|
instance.set_plugins(plugins.clone());
|
||||||
plugins,
|
|
||||||
);
|
let mut frontend = Frontend::new(opts, instance, plugins);
|
||||||
frontend.start().await.context(error::StartFrontendSnafu)
|
frontend.start().await.context(error::StartFrontendSnafu)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn load_frontend_plugins(user_provider: &Option<String>) -> Result<AnyMap> {
|
pub fn load_frontend_plugins(user_provider: &Option<String>) -> Result<Plugins> {
|
||||||
let mut plugins = AnyMap::new();
|
let mut plugins = Plugins::new();
|
||||||
|
|
||||||
if let Some(provider) = user_provider {
|
if let Some(provider) = user_provider {
|
||||||
let provider = auth::user_provider_from_option(provider).context(IllegalAuthConfigSnafu)?;
|
let provider = auth::user_provider_from_option(provider).context(IllegalAuthConfigSnafu)?;
|
||||||
@@ -138,14 +138,14 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
|||||||
if let Some(addr) = cmd.mysql_addr {
|
if let Some(addr) = cmd.mysql_addr {
|
||||||
opts.mysql_options = Some(MysqlOptions {
|
opts.mysql_options = Some(MysqlOptions {
|
||||||
addr,
|
addr,
|
||||||
tls: Arc::new(tls_option.clone()),
|
tls: tls_option.clone(),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if let Some(addr) = cmd.postgres_addr {
|
if let Some(addr) = cmd.postgres_addr {
|
||||||
opts.postgres_options = Some(PostgresOptions {
|
opts.postgres_options = Some(PostgresOptions {
|
||||||
addr,
|
addr,
|
||||||
tls: Arc::new(tls_option),
|
tls: tls_option,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,10 +14,9 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anymap::AnyMap;
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use common_telemetry::info;
|
use common_telemetry::info;
|
||||||
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig};
|
use datanode::datanode::{Datanode, DatanodeOptions, ObjectStoreConfig, WalConfig};
|
||||||
use datanode::instance::InstanceRef;
|
use datanode::instance::InstanceRef;
|
||||||
use frontend::frontend::{Frontend, FrontendOptions};
|
use frontend::frontend::{Frontend, FrontendOptions};
|
||||||
use frontend::grpc::GrpcOptions;
|
use frontend::grpc::GrpcOptions;
|
||||||
@@ -27,6 +26,7 @@ use frontend::mysql::MysqlOptions;
|
|||||||
use frontend::opentsdb::OpentsdbOptions;
|
use frontend::opentsdb::OpentsdbOptions;
|
||||||
use frontend::postgres::PostgresOptions;
|
use frontend::postgres::PostgresOptions;
|
||||||
use frontend::prometheus::PrometheusOptions;
|
use frontend::prometheus::PrometheusOptions;
|
||||||
|
use frontend::Plugins;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use servers::http::HttpOptions;
|
use servers::http::HttpOptions;
|
||||||
use servers::tls::{TlsMode, TlsOption};
|
use servers::tls::{TlsMode, TlsOption};
|
||||||
@@ -72,7 +72,7 @@ pub struct StandaloneOptions {
|
|||||||
pub influxdb_options: Option<InfluxdbOptions>,
|
pub influxdb_options: Option<InfluxdbOptions>,
|
||||||
pub prometheus_options: Option<PrometheusOptions>,
|
pub prometheus_options: Option<PrometheusOptions>,
|
||||||
pub mode: Mode,
|
pub mode: Mode,
|
||||||
pub wal_dir: String,
|
pub wal: WalConfig,
|
||||||
pub storage: ObjectStoreConfig,
|
pub storage: ObjectStoreConfig,
|
||||||
pub enable_memory_catalog: bool,
|
pub enable_memory_catalog: bool,
|
||||||
}
|
}
|
||||||
@@ -88,7 +88,7 @@ impl Default for StandaloneOptions {
|
|||||||
influxdb_options: Some(InfluxdbOptions::default()),
|
influxdb_options: Some(InfluxdbOptions::default()),
|
||||||
prometheus_options: Some(PrometheusOptions::default()),
|
prometheus_options: Some(PrometheusOptions::default()),
|
||||||
mode: Mode::Standalone,
|
mode: Mode::Standalone,
|
||||||
wal_dir: "/tmp/greptimedb/wal".to_string(),
|
wal: WalConfig::default(),
|
||||||
storage: ObjectStoreConfig::default(),
|
storage: ObjectStoreConfig::default(),
|
||||||
enable_memory_catalog: false,
|
enable_memory_catalog: false,
|
||||||
}
|
}
|
||||||
@@ -112,7 +112,7 @@ impl StandaloneOptions {
|
|||||||
|
|
||||||
fn datanode_options(self) -> DatanodeOptions {
|
fn datanode_options(self) -> DatanodeOptions {
|
||||||
DatanodeOptions {
|
DatanodeOptions {
|
||||||
wal_dir: self.wal_dir,
|
wal: self.wal,
|
||||||
storage: self.storage,
|
storage: self.storage,
|
||||||
enable_memory_catalog: self.enable_memory_catalog,
|
enable_memory_catalog: self.enable_memory_catalog,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
@@ -152,7 +152,7 @@ impl StartCommand {
|
|||||||
async fn run(self) -> Result<()> {
|
async fn run(self) -> Result<()> {
|
||||||
let enable_memory_catalog = self.enable_memory_catalog;
|
let enable_memory_catalog = self.enable_memory_catalog;
|
||||||
let config_file = self.config_file.clone();
|
let config_file = self.config_file.clone();
|
||||||
let plugins = load_frontend_plugins(&self.user_provider)?;
|
let plugins = Arc::new(load_frontend_plugins(&self.user_provider)?);
|
||||||
let fe_opts = FrontendOptions::try_from(self)?;
|
let fe_opts = FrontendOptions::try_from(self)?;
|
||||||
let dn_opts: DatanodeOptions = {
|
let dn_opts: DatanodeOptions = {
|
||||||
let mut opts: StandaloneOptions = if let Some(path) = config_file {
|
let mut opts: StandaloneOptions = if let Some(path) = config_file {
|
||||||
@@ -189,11 +189,12 @@ impl StartCommand {
|
|||||||
/// Build frontend instance in standalone mode
|
/// Build frontend instance in standalone mode
|
||||||
async fn build_frontend(
|
async fn build_frontend(
|
||||||
fe_opts: FrontendOptions,
|
fe_opts: FrontendOptions,
|
||||||
plugins: AnyMap,
|
plugins: Arc<Plugins>,
|
||||||
datanode_instance: InstanceRef,
|
datanode_instance: InstanceRef,
|
||||||
) -> Result<Frontend<FeInstance>> {
|
) -> Result<Frontend<FeInstance>> {
|
||||||
let mut frontend_instance = FeInstance::new_standalone(datanode_instance.clone());
|
let mut frontend_instance = FeInstance::new_standalone(datanode_instance.clone());
|
||||||
frontend_instance.set_script_handler(datanode_instance);
|
frontend_instance.set_script_handler(datanode_instance);
|
||||||
|
frontend_instance.set_plugins(plugins.clone());
|
||||||
Ok(Frontend::new(fe_opts, frontend_instance, plugins))
|
Ok(Frontend::new(fe_opts, frontend_instance, plugins))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -223,8 +224,7 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
|||||||
if addr == datanode_grpc_addr {
|
if addr == datanode_grpc_addr {
|
||||||
return IllegalConfigSnafu {
|
return IllegalConfigSnafu {
|
||||||
msg: format!(
|
msg: format!(
|
||||||
"gRPC listen address conflicts with datanode reserved gRPC addr: {}",
|
"gRPC listen address conflicts with datanode reserved gRPC addr: {datanode_grpc_addr}",
|
||||||
datanode_grpc_addr
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
.fail();
|
.fail();
|
||||||
@@ -262,12 +262,12 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
|||||||
let tls_option = TlsOption::new(cmd.tls_mode, cmd.tls_cert_path, cmd.tls_key_path);
|
let tls_option = TlsOption::new(cmd.tls_mode, cmd.tls_cert_path, cmd.tls_key_path);
|
||||||
|
|
||||||
if let Some(mut mysql_options) = opts.mysql_options {
|
if let Some(mut mysql_options) = opts.mysql_options {
|
||||||
mysql_options.tls = Arc::new(tls_option.clone());
|
mysql_options.tls = tls_option.clone();
|
||||||
opts.mysql_options = Some(mysql_options);
|
opts.mysql_options = Some(mysql_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(mut postgres_options) = opts.postgres_options {
|
if let Some(mut postgres_options) = opts.postgres_options {
|
||||||
postgres_options.tls = Arc::new(tls_option);
|
postgres_options.tls = tls_option;
|
||||||
opts.postgres_options = Some(postgres_options);
|
opts.postgres_options = Some(postgres_options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "common-base"
|
name = "common-base"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bitvec = "1.0"
|
bitvec = "1.0"
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "common-catalog"
|
name = "common-catalog"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "common-error"
|
name = "common-error"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
snafu = { version = "0.7", features = ["backtraces"] }
|
snafu = { version = "0.7", features = ["backtraces"] }
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ mod tests {
|
|||||||
|
|
||||||
assert!(ErrorCompat::backtrace(&err).is_some());
|
assert!(ErrorCompat::backtrace(&err).is_some());
|
||||||
|
|
||||||
let msg = format!("{:?}", err);
|
let msg = format!("{err:?}");
|
||||||
assert!(msg.contains("\nBacktrace:\n"));
|
assert!(msg.contains("\nBacktrace:\n"));
|
||||||
let fmt_msg = format!("{:?}", DebugFormat::new(&err));
|
let fmt_msg = format!("{:?}", DebugFormat::new(&err));
|
||||||
assert_eq!(msg, fmt_msg);
|
assert_eq!(msg, fmt_msg);
|
||||||
@@ -151,7 +151,7 @@ mod tests {
|
|||||||
assert!(err.as_any().downcast_ref::<MockError>().is_some());
|
assert!(err.as_any().downcast_ref::<MockError>().is_some());
|
||||||
assert!(err.source().is_some());
|
assert!(err.source().is_some());
|
||||||
|
|
||||||
let msg = format!("{:?}", err);
|
let msg = format!("{err:?}");
|
||||||
assert!(msg.contains("\nBacktrace:\n"));
|
assert!(msg.contains("\nBacktrace:\n"));
|
||||||
assert!(msg.contains("Caused by"));
|
assert!(msg.contains("Caused by"));
|
||||||
|
|
||||||
|
|||||||
@@ -31,11 +31,11 @@ impl<'a, E: ErrorExt + ?Sized> fmt::Debug for DebugFormat<'a, E> {
|
|||||||
write!(f, "{}.", self.0)?;
|
write!(f, "{}.", self.0)?;
|
||||||
if let Some(source) = self.0.source() {
|
if let Some(source) = self.0.source() {
|
||||||
// Source error use debug format for more verbose info.
|
// Source error use debug format for more verbose info.
|
||||||
write!(f, " Caused by: {:?}", source)?;
|
write!(f, " Caused by: {source:?}")?;
|
||||||
}
|
}
|
||||||
if let Some(backtrace) = self.0.backtrace_opt() {
|
if let Some(backtrace) = self.0.backtrace_opt() {
|
||||||
// Add a newline to separate causes and backtrace.
|
// Add a newline to separate causes and backtrace.
|
||||||
write!(f, "\nBacktrace:\n{}", backtrace)?;
|
write!(f, "\nBacktrace:\n{backtrace}")?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -51,6 +51,7 @@ pub enum StatusCode {
|
|||||||
TableNotFound = 4001,
|
TableNotFound = 4001,
|
||||||
TableColumnNotFound = 4002,
|
TableColumnNotFound = 4002,
|
||||||
TableColumnExists = 4003,
|
TableColumnExists = 4003,
|
||||||
|
DatabaseNotFound = 4004,
|
||||||
// ====== End of catalog related status code =======
|
// ====== End of catalog related status code =======
|
||||||
|
|
||||||
// ====== Begin of storage related status code =====
|
// ====== Begin of storage related status code =====
|
||||||
@@ -86,7 +87,7 @@ impl StatusCode {
|
|||||||
impl fmt::Display for StatusCode {
|
impl fmt::Display for StatusCode {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
// The current debug format is suitable to display.
|
// The current debug format is suitable to display.
|
||||||
write!(f, "{:?}", self)
|
write!(f, "{self:?}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,7 +96,7 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
fn assert_status_code_display(code: StatusCode, msg: &str) {
|
fn assert_status_code_display(code: StatusCode, msg: &str) {
|
||||||
let code_msg = format!("{}", code);
|
let code_msg = format!("{code}");
|
||||||
assert_eq!(msg, code_msg);
|
assert_eq!(msg, code_msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "common-function-macro"
|
name = "common-function-macro"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
|
|
||||||
[lib]
|
[lib]
|
||||||
proc-macro = true
|
proc-macro = true
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
[package]
|
[package]
|
||||||
edition = "2021"
|
|
||||||
name = "common-function"
|
name = "common-function"
|
||||||
version = "0.1.0"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
version.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
arc-swap = "1.0"
|
arc-swap = "1.0"
|
||||||
@@ -11,7 +11,7 @@ common-error = { path = "../error" }
|
|||||||
common-function-macro = { path = "../function-macro" }
|
common-function-macro = { path = "../function-macro" }
|
||||||
common-query = { path = "../query" }
|
common-query = { path = "../query" }
|
||||||
common-time = { path = "../time" }
|
common-time = { path = "../time" }
|
||||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
datafusion.workspace = true
|
||||||
datatypes = { path = "../../datatypes" }
|
datatypes = { path = "../../datatypes" }
|
||||||
libc = "0.2"
|
libc = "0.2"
|
||||||
num = "0.4"
|
num = "0.4"
|
||||||
|
|||||||
@@ -1,69 +0,0 @@
|
|||||||
// Copyright 2022 Greptime Team
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
use std::any::Any;
|
|
||||||
|
|
||||||
use common_error::prelude::*;
|
|
||||||
pub use common_query::error::{Error, Result};
|
|
||||||
use datatypes::error::Error as DataTypeError;
|
|
||||||
|
|
||||||
#[derive(Debug, Snafu)]
|
|
||||||
#[snafu(visibility(pub))]
|
|
||||||
pub enum InnerError {
|
|
||||||
#[snafu(display("Fail to get scalar vector, {}", source))]
|
|
||||||
GetScalarVector {
|
|
||||||
source: DataTypeError,
|
|
||||||
backtrace: Backtrace,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ErrorExt for InnerError {
|
|
||||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
|
||||||
ErrorCompat::backtrace(self)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn as_any(&self) -> &dyn Any {
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<InnerError> for Error {
|
|
||||||
fn from(err: InnerError) -> Self {
|
|
||||||
Self::new(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use snafu::GenerateImplicitData;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
fn raise_datatype_error() -> std::result::Result<(), DataTypeError> {
|
|
||||||
Err(DataTypeError::Conversion {
|
|
||||||
from: "test".to_string(),
|
|
||||||
backtrace: Backtrace::generate(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_get_scalar_vector_error() {
|
|
||||||
let err: Error = raise_datatype_error()
|
|
||||||
.context(GetScalarVectorSnafu)
|
|
||||||
.err()
|
|
||||||
.unwrap()
|
|
||||||
.into();
|
|
||||||
assert!(err.backtrace_opt().is_some());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -12,5 +12,4 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
pub mod error;
|
|
||||||
pub mod scalars;
|
pub mod scalars;
|
||||||
|
|||||||
@@ -23,6 +23,5 @@ pub(crate) mod test;
|
|||||||
mod timestamp;
|
mod timestamp;
|
||||||
pub mod udf;
|
pub mod udf;
|
||||||
|
|
||||||
pub use aggregate::MedianAccumulatorCreator;
|
|
||||||
pub use function::{Function, FunctionRef};
|
pub use function::{Function, FunctionRef};
|
||||||
pub use function_registry::{FunctionRegistry, FUNCTION_REGISTRY};
|
pub use function_registry::{FunctionRegistry, FUNCTION_REGISTRY};
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ mod argmax;
|
|||||||
mod argmin;
|
mod argmin;
|
||||||
mod diff;
|
mod diff;
|
||||||
mod mean;
|
mod mean;
|
||||||
mod median;
|
|
||||||
mod percentile;
|
mod percentile;
|
||||||
mod polyval;
|
mod polyval;
|
||||||
mod scipy_stats_norm_cdf;
|
mod scipy_stats_norm_cdf;
|
||||||
@@ -29,7 +28,6 @@ pub use argmin::ArgminAccumulatorCreator;
|
|||||||
use common_query::logical_plan::AggregateFunctionCreatorRef;
|
use common_query::logical_plan::AggregateFunctionCreatorRef;
|
||||||
pub use diff::DiffAccumulatorCreator;
|
pub use diff::DiffAccumulatorCreator;
|
||||||
pub use mean::MeanAccumulatorCreator;
|
pub use mean::MeanAccumulatorCreator;
|
||||||
pub use median::MedianAccumulatorCreator;
|
|
||||||
pub use percentile::PercentileAccumulatorCreator;
|
pub use percentile::PercentileAccumulatorCreator;
|
||||||
pub use polyval::PolyvalAccumulatorCreator;
|
pub use polyval::PolyvalAccumulatorCreator;
|
||||||
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
|
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
|
||||||
@@ -88,7 +86,6 @@ impl AggregateFunctions {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
register_aggr_func!("median", 1, MedianAccumulatorCreator);
|
|
||||||
register_aggr_func!("diff", 1, DiffAccumulatorCreator);
|
register_aggr_func!("diff", 1, DiffAccumulatorCreator);
|
||||||
register_aggr_func!("mean", 1, MeanAccumulatorCreator);
|
register_aggr_func!("mean", 1, MeanAccumulatorCreator);
|
||||||
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
|
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
|
||||||
@@ -20,24 +20,22 @@ use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Resul
|
|||||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||||
use common_query::prelude::*;
|
use common_query::prelude::*;
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::vectors::ConstantVector;
|
use datatypes::types::{LogicalPrimitiveType, WrapperType};
|
||||||
|
use datatypes::vectors::{ConstantVector, Helper};
|
||||||
use datatypes::with_match_primitive_type_id;
|
use datatypes::with_match_primitive_type_id;
|
||||||
use snafu::ensure;
|
use snafu::ensure;
|
||||||
|
|
||||||
// https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
|
// https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
|
||||||
// return the index of the max value
|
// return the index of the max value
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct Argmax<T>
|
pub struct Argmax<T> {
|
||||||
where
|
|
||||||
T: Primitive + PartialOrd,
|
|
||||||
{
|
|
||||||
max: Option<T>,
|
max: Option<T>,
|
||||||
n: u64,
|
n: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Argmax<T>
|
impl<T> Argmax<T>
|
||||||
where
|
where
|
||||||
T: Primitive + PartialOrd,
|
T: PartialOrd + Copy,
|
||||||
{
|
{
|
||||||
fn update(&mut self, value: T, index: u64) {
|
fn update(&mut self, value: T, index: u64) {
|
||||||
if let Some(Ordering::Less) = self.max.partial_cmp(&Some(value)) {
|
if let Some(Ordering::Less) = self.max.partial_cmp(&Some(value)) {
|
||||||
@@ -49,8 +47,7 @@ where
|
|||||||
|
|
||||||
impl<T> Accumulator for Argmax<T>
|
impl<T> Accumulator for Argmax<T>
|
||||||
where
|
where
|
||||||
T: Primitive + PartialOrd,
|
T: WrapperType + PartialOrd,
|
||||||
for<'a> T: Scalar<RefType<'a> = T>,
|
|
||||||
{
|
{
|
||||||
fn state(&self) -> Result<Vec<Value>> {
|
fn state(&self) -> Result<Vec<Value>> {
|
||||||
match self.max {
|
match self.max {
|
||||||
@@ -66,10 +63,10 @@ where
|
|||||||
|
|
||||||
let column = &values[0];
|
let column = &values[0];
|
||||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
unsafe { Helper::static_cast(column.inner()) }
|
||||||
} else {
|
} else {
|
||||||
unsafe { VectorHelper::static_cast(column) }
|
unsafe { Helper::static_cast(column) }
|
||||||
};
|
};
|
||||||
for (i, v) in column.iter_data().enumerate() {
|
for (i, v) in column.iter_data().enumerate() {
|
||||||
if let Some(value) = v {
|
if let Some(value) = v {
|
||||||
@@ -93,8 +90,8 @@ where
|
|||||||
|
|
||||||
let max = &states[0];
|
let max = &states[0];
|
||||||
let index = &states[1];
|
let index = &states[1];
|
||||||
let max: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(max) };
|
let max: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(max) };
|
||||||
let index: &<u64 as Scalar>::VectorType = unsafe { VectorHelper::static_cast(index) };
|
let index: &<u64 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
|
||||||
index
|
index
|
||||||
.iter_data()
|
.iter_data()
|
||||||
.flatten()
|
.flatten()
|
||||||
@@ -122,7 +119,7 @@ impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_type.logical_type_id(),
|
input_type.logical_type_id(),
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(Box::new(Argmax::<$S>::default()))
|
Ok(Box::new(Argmax::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
let err_msg = format!(
|
let err_msg = format!(
|
||||||
@@ -154,7 +151,7 @@ impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use datatypes::vectors::PrimitiveVector;
|
use datatypes::vectors::Int32Vector;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
@@ -166,21 +163,19 @@ mod test {
|
|||||||
|
|
||||||
// test update one not-null value
|
// test update one not-null value
|
||||||
let mut argmax = Argmax::<i32>::default();
|
let mut argmax = Argmax::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||||
assert!(argmax.update_batch(&v).is_ok());
|
assert!(argmax.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
|
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
|
||||||
|
|
||||||
// test update one null value
|
// test update one null value
|
||||||
let mut argmax = Argmax::<i32>::default();
|
let mut argmax = Argmax::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||||
Option::<i32>::None,
|
|
||||||
]))];
|
|
||||||
assert!(argmax.update_batch(&v).is_ok());
|
assert!(argmax.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::Null, argmax.evaluate().unwrap());
|
assert_eq!(Value::Null, argmax.evaluate().unwrap());
|
||||||
|
|
||||||
// test update no null-value batch
|
// test update no null-value batch
|
||||||
let mut argmax = Argmax::<i32>::default();
|
let mut argmax = Argmax::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||||
Some(-1i32),
|
Some(-1i32),
|
||||||
Some(1),
|
Some(1),
|
||||||
Some(3),
|
Some(3),
|
||||||
@@ -190,7 +185,7 @@ mod test {
|
|||||||
|
|
||||||
// test update null-value batch
|
// test update null-value batch
|
||||||
let mut argmax = Argmax::<i32>::default();
|
let mut argmax = Argmax::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||||
Some(-2i32),
|
Some(-2i32),
|
||||||
None,
|
None,
|
||||||
Some(4),
|
Some(4),
|
||||||
@@ -201,7 +196,7 @@ mod test {
|
|||||||
// test update with constant vector
|
// test update with constant vector
|
||||||
let mut argmax = Argmax::<i32>::default();
|
let mut argmax = Argmax::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||||
10,
|
10,
|
||||||
))];
|
))];
|
||||||
assert!(argmax.update_batch(&v).is_ok());
|
assert!(argmax.update_batch(&v).is_ok());
|
||||||
|
|||||||
@@ -20,23 +20,20 @@ use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Resul
|
|||||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||||
use common_query::prelude::*;
|
use common_query::prelude::*;
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::vectors::ConstantVector;
|
use datatypes::vectors::{ConstantVector, Helper};
|
||||||
use datatypes::with_match_primitive_type_id;
|
use datatypes::with_match_primitive_type_id;
|
||||||
use snafu::ensure;
|
use snafu::ensure;
|
||||||
|
|
||||||
// // https://numpy.org/doc/stable/reference/generated/numpy.argmin.html
|
// // https://numpy.org/doc/stable/reference/generated/numpy.argmin.html
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct Argmin<T>
|
pub struct Argmin<T> {
|
||||||
where
|
|
||||||
T: Primitive + PartialOrd,
|
|
||||||
{
|
|
||||||
min: Option<T>,
|
min: Option<T>,
|
||||||
n: u32,
|
n: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Argmin<T>
|
impl<T> Argmin<T>
|
||||||
where
|
where
|
||||||
T: Primitive + PartialOrd,
|
T: Copy + PartialOrd,
|
||||||
{
|
{
|
||||||
fn update(&mut self, value: T, index: u32) {
|
fn update(&mut self, value: T, index: u32) {
|
||||||
match self.min {
|
match self.min {
|
||||||
@@ -56,8 +53,7 @@ where
|
|||||||
|
|
||||||
impl<T> Accumulator for Argmin<T>
|
impl<T> Accumulator for Argmin<T>
|
||||||
where
|
where
|
||||||
T: Primitive + PartialOrd,
|
T: WrapperType + PartialOrd,
|
||||||
for<'a> T: Scalar<RefType<'a> = T>,
|
|
||||||
{
|
{
|
||||||
fn state(&self) -> Result<Vec<Value>> {
|
fn state(&self) -> Result<Vec<Value>> {
|
||||||
match self.min {
|
match self.min {
|
||||||
@@ -75,10 +71,10 @@ where
|
|||||||
|
|
||||||
let column = &values[0];
|
let column = &values[0];
|
||||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
unsafe { Helper::static_cast(column.inner()) }
|
||||||
} else {
|
} else {
|
||||||
unsafe { VectorHelper::static_cast(column) }
|
unsafe { Helper::static_cast(column) }
|
||||||
};
|
};
|
||||||
for (i, v) in column.iter_data().enumerate() {
|
for (i, v) in column.iter_data().enumerate() {
|
||||||
if let Some(value) = v {
|
if let Some(value) = v {
|
||||||
@@ -102,8 +98,8 @@ where
|
|||||||
|
|
||||||
let min = &states[0];
|
let min = &states[0];
|
||||||
let index = &states[1];
|
let index = &states[1];
|
||||||
let min: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(min) };
|
let min: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(min) };
|
||||||
let index: &<u32 as Scalar>::VectorType = unsafe { VectorHelper::static_cast(index) };
|
let index: &<u32 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
|
||||||
index
|
index
|
||||||
.iter_data()
|
.iter_data()
|
||||||
.flatten()
|
.flatten()
|
||||||
@@ -131,7 +127,7 @@ impl AggregateFunctionCreator for ArgminAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_type.logical_type_id(),
|
input_type.logical_type_id(),
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(Box::new(Argmin::<$S>::default()))
|
Ok(Box::new(Argmin::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
let err_msg = format!(
|
let err_msg = format!(
|
||||||
@@ -163,7 +159,7 @@ impl AggregateFunctionCreator for ArgminAccumulatorCreator {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use datatypes::vectors::PrimitiveVector;
|
use datatypes::vectors::Int32Vector;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
@@ -175,21 +171,19 @@ mod test {
|
|||||||
|
|
||||||
// test update one not-null value
|
// test update one not-null value
|
||||||
let mut argmin = Argmin::<i32>::default();
|
let mut argmin = Argmin::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||||
assert!(argmin.update_batch(&v).is_ok());
|
assert!(argmin.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
|
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
|
||||||
|
|
||||||
// test update one null value
|
// test update one null value
|
||||||
let mut argmin = Argmin::<i32>::default();
|
let mut argmin = Argmin::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||||
Option::<i32>::None,
|
|
||||||
]))];
|
|
||||||
assert!(argmin.update_batch(&v).is_ok());
|
assert!(argmin.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::Null, argmin.evaluate().unwrap());
|
assert_eq!(Value::Null, argmin.evaluate().unwrap());
|
||||||
|
|
||||||
// test update no null-value batch
|
// test update no null-value batch
|
||||||
let mut argmin = Argmin::<i32>::default();
|
let mut argmin = Argmin::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||||
Some(-1i32),
|
Some(-1i32),
|
||||||
Some(1),
|
Some(1),
|
||||||
Some(3),
|
Some(3),
|
||||||
@@ -199,7 +193,7 @@ mod test {
|
|||||||
|
|
||||||
// test update null-value batch
|
// test update null-value batch
|
||||||
let mut argmin = Argmin::<i32>::default();
|
let mut argmin = Argmin::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||||
Some(-2i32),
|
Some(-2i32),
|
||||||
None,
|
None,
|
||||||
Some(4),
|
Some(4),
|
||||||
@@ -210,7 +204,7 @@ mod test {
|
|||||||
// test update with constant vector
|
// test update with constant vector
|
||||||
let mut argmin = Argmin::<i32>::default();
|
let mut argmin = Argmin::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||||
10,
|
10,
|
||||||
))];
|
))];
|
||||||
assert!(argmin.update_batch(&v).is_ok());
|
assert!(argmin.update_batch(&v).is_ok());
|
||||||
|
|||||||
@@ -22,40 +22,32 @@ use common_query::error::{
|
|||||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||||
use common_query::prelude::*;
|
use common_query::prelude::*;
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::types::PrimitiveType;
|
|
||||||
use datatypes::value::ListValue;
|
use datatypes::value::ListValue;
|
||||||
use datatypes::vectors::{ConstantVector, ListVector};
|
use datatypes::vectors::{ConstantVector, Helper, ListVector};
|
||||||
use datatypes::with_match_primitive_type_id;
|
use datatypes::with_match_primitive_type_id;
|
||||||
use num_traits::AsPrimitive;
|
use num_traits::AsPrimitive;
|
||||||
use snafu::{ensure, OptionExt, ResultExt};
|
use snafu::{ensure, OptionExt, ResultExt};
|
||||||
|
|
||||||
// https://numpy.org/doc/stable/reference/generated/numpy.diff.html
|
// https://numpy.org/doc/stable/reference/generated/numpy.diff.html
|
||||||
|
// I is the input type, O is the output type.
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct Diff<T, SubT>
|
pub struct Diff<I, O> {
|
||||||
where
|
values: Vec<I>,
|
||||||
T: Primitive + AsPrimitive<SubT>,
|
_phantom: PhantomData<O>,
|
||||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
|
||||||
{
|
|
||||||
values: Vec<T>,
|
|
||||||
_phantom: PhantomData<SubT>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T, SubT> Diff<T, SubT>
|
impl<I, O> Diff<I, O> {
|
||||||
where
|
fn push(&mut self, value: I) {
|
||||||
T: Primitive + AsPrimitive<SubT>,
|
|
||||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
|
||||||
{
|
|
||||||
fn push(&mut self, value: T) {
|
|
||||||
self.values.push(value);
|
self.values.push(value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T, SubT> Accumulator for Diff<T, SubT>
|
impl<I, O> Accumulator for Diff<I, O>
|
||||||
where
|
where
|
||||||
T: Primitive + AsPrimitive<SubT>,
|
I: WrapperType,
|
||||||
for<'a> T: Scalar<RefType<'a> = T>,
|
O: WrapperType,
|
||||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
I::Native: AsPrimitive<O::Native>,
|
||||||
for<'a> SubT: Scalar<RefType<'a> = SubT>,
|
O::Native: std::ops::Sub<Output = O::Native>,
|
||||||
{
|
{
|
||||||
fn state(&self) -> Result<Vec<Value>> {
|
fn state(&self) -> Result<Vec<Value>> {
|
||||||
let nums = self
|
let nums = self
|
||||||
@@ -65,7 +57,7 @@ where
|
|||||||
.collect::<Vec<Value>>();
|
.collect::<Vec<Value>>();
|
||||||
Ok(vec![Value::List(ListValue::new(
|
Ok(vec![Value::List(ListValue::new(
|
||||||
Some(Box::new(nums)),
|
Some(Box::new(nums)),
|
||||||
T::default().into().data_type(),
|
I::LogicalType::build_data_type(),
|
||||||
))])
|
))])
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,12 +70,12 @@ where
|
|||||||
|
|
||||||
let column = &values[0];
|
let column = &values[0];
|
||||||
let mut len = 1;
|
let mut len = 1;
|
||||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
let column: &<I as Scalar>::VectorType = if column.is_const() {
|
||||||
len = column.len();
|
len = column.len();
|
||||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
unsafe { Helper::static_cast(column.inner()) }
|
||||||
} else {
|
} else {
|
||||||
unsafe { VectorHelper::static_cast(column) }
|
unsafe { Helper::static_cast(column) }
|
||||||
};
|
};
|
||||||
(0..len).for_each(|_| {
|
(0..len).for_each(|_| {
|
||||||
for v in column.iter_data().flatten() {
|
for v in column.iter_data().flatten() {
|
||||||
@@ -109,8 +101,9 @@ where
|
|||||||
),
|
),
|
||||||
})?;
|
})?;
|
||||||
for state in states.values_iter() {
|
for state in states.values_iter() {
|
||||||
let state = state.context(FromScalarValueSnafu)?;
|
if let Some(state) = state.context(FromScalarValueSnafu)? {
|
||||||
self.update_batch(&[state])?
|
self.update_batch(&[state])?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -122,11 +115,14 @@ where
|
|||||||
let diff = self
|
let diff = self
|
||||||
.values
|
.values
|
||||||
.windows(2)
|
.windows(2)
|
||||||
.map(|x| (x[1].as_() - x[0].as_()).into())
|
.map(|x| {
|
||||||
|
let native = x[1].into_native().as_() - x[0].into_native().as_();
|
||||||
|
O::from_native(native).into()
|
||||||
|
})
|
||||||
.collect::<Vec<Value>>();
|
.collect::<Vec<Value>>();
|
||||||
let diff = Value::List(ListValue::new(
|
let diff = Value::List(ListValue::new(
|
||||||
Some(Box::new(diff)),
|
Some(Box::new(diff)),
|
||||||
SubT::default().into().data_type(),
|
O::LogicalType::build_data_type(),
|
||||||
));
|
));
|
||||||
Ok(diff)
|
Ok(diff)
|
||||||
}
|
}
|
||||||
@@ -143,7 +139,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_type.logical_type_id(),
|
input_type.logical_type_id(),
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(Box::new(Diff::<$S,<$S as Primitive>::LargestType>::default()))
|
Ok(Box::new(Diff::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
let err_msg = format!(
|
let err_msg = format!(
|
||||||
@@ -163,7 +159,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_types[0].logical_type_id(),
|
input_types[0].logical_type_id(),
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(ConcreteDataType::list_datatype(PrimitiveType::<<$S as Primitive>::LargestType>::default().into()))
|
Ok(ConcreteDataType::list_datatype($S::default().into()))
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
unreachable!()
|
unreachable!()
|
||||||
@@ -177,7 +173,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_types[0].logical_type_id(),
|
input_types[0].logical_type_id(),
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(vec![ConcreteDataType::list_datatype(PrimitiveType::<$S>::default().into())])
|
Ok(vec![ConcreteDataType::list_datatype($S::default().into())])
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
unreachable!()
|
unreachable!()
|
||||||
@@ -188,9 +184,10 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use datatypes::vectors::PrimitiveVector;
|
use datatypes::vectors::Int32Vector;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_update_batch() {
|
fn test_update_batch() {
|
||||||
// test update empty batch, expect not updating anything
|
// test update empty batch, expect not updating anything
|
||||||
@@ -201,21 +198,19 @@ mod test {
|
|||||||
|
|
||||||
// test update one not-null value
|
// test update one not-null value
|
||||||
let mut diff = Diff::<i32, i64>::default();
|
let mut diff = Diff::<i32, i64>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||||
assert!(diff.update_batch(&v).is_ok());
|
assert!(diff.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||||
|
|
||||||
// test update one null value
|
// test update one null value
|
||||||
let mut diff = Diff::<i32, i64>::default();
|
let mut diff = Diff::<i32, i64>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||||
Option::<i32>::None,
|
|
||||||
]))];
|
|
||||||
assert!(diff.update_batch(&v).is_ok());
|
assert!(diff.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||||
|
|
||||||
// test update no null-value batch
|
// test update no null-value batch
|
||||||
let mut diff = Diff::<i32, i64>::default();
|
let mut diff = Diff::<i32, i64>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||||
Some(-1i32),
|
Some(-1i32),
|
||||||
Some(1),
|
Some(1),
|
||||||
Some(2),
|
Some(2),
|
||||||
@@ -232,7 +227,7 @@ mod test {
|
|||||||
|
|
||||||
// test update null-value batch
|
// test update null-value batch
|
||||||
let mut diff = Diff::<i32, i64>::default();
|
let mut diff = Diff::<i32, i64>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||||
Some(-2i32),
|
Some(-2i32),
|
||||||
None,
|
None,
|
||||||
Some(3),
|
Some(3),
|
||||||
@@ -251,7 +246,7 @@ mod test {
|
|||||||
// test update with constant vector
|
// test update with constant vector
|
||||||
let mut diff = Diff::<i32, i64>::default();
|
let mut diff = Diff::<i32, i64>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||||
4,
|
4,
|
||||||
))];
|
))];
|
||||||
let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
|
let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
|
||||||
|
|||||||
@@ -22,16 +22,14 @@ use common_query::error::{
|
|||||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||||
use common_query::prelude::*;
|
use common_query::prelude::*;
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::vectors::{ConstantVector, Float64Vector, UInt64Vector};
|
use datatypes::types::WrapperType;
|
||||||
|
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, UInt64Vector};
|
||||||
use datatypes::with_match_primitive_type_id;
|
use datatypes::with_match_primitive_type_id;
|
||||||
use num_traits::AsPrimitive;
|
use num_traits::AsPrimitive;
|
||||||
use snafu::{ensure, OptionExt};
|
use snafu::{ensure, OptionExt};
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct Mean<T>
|
pub struct Mean<T> {
|
||||||
where
|
|
||||||
T: Primitive + AsPrimitive<f64>,
|
|
||||||
{
|
|
||||||
sum: f64,
|
sum: f64,
|
||||||
n: u64,
|
n: u64,
|
||||||
_phantom: PhantomData<T>,
|
_phantom: PhantomData<T>,
|
||||||
@@ -39,11 +37,12 @@ where
|
|||||||
|
|
||||||
impl<T> Mean<T>
|
impl<T> Mean<T>
|
||||||
where
|
where
|
||||||
T: Primitive + AsPrimitive<f64>,
|
T: WrapperType,
|
||||||
|
T::Native: AsPrimitive<f64>,
|
||||||
{
|
{
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn push(&mut self, value: T) {
|
fn push(&mut self, value: T) {
|
||||||
self.sum += value.as_();
|
self.sum += value.into_native().as_();
|
||||||
self.n += 1;
|
self.n += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -56,8 +55,8 @@ where
|
|||||||
|
|
||||||
impl<T> Accumulator for Mean<T>
|
impl<T> Accumulator for Mean<T>
|
||||||
where
|
where
|
||||||
T: Primitive + AsPrimitive<f64>,
|
T: WrapperType,
|
||||||
for<'a> T: Scalar<RefType<'a> = T>,
|
T::Native: AsPrimitive<f64>,
|
||||||
{
|
{
|
||||||
fn state(&self) -> Result<Vec<Value>> {
|
fn state(&self) -> Result<Vec<Value>> {
|
||||||
Ok(vec![self.sum.into(), self.n.into()])
|
Ok(vec![self.sum.into(), self.n.into()])
|
||||||
@@ -73,10 +72,10 @@ where
|
|||||||
let mut len = 1;
|
let mut len = 1;
|
||||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||||
len = column.len();
|
len = column.len();
|
||||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
unsafe { Helper::static_cast(column.inner()) }
|
||||||
} else {
|
} else {
|
||||||
unsafe { VectorHelper::static_cast(column) }
|
unsafe { Helper::static_cast(column) }
|
||||||
};
|
};
|
||||||
(0..len).for_each(|_| {
|
(0..len).for_each(|_| {
|
||||||
for v in column.iter_data().flatten() {
|
for v in column.iter_data().flatten() {
|
||||||
@@ -150,7 +149,7 @@ impl AggregateFunctionCreator for MeanAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_type.logical_type_id(),
|
input_type.logical_type_id(),
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(Box::new(Mean::<$S>::default()))
|
Ok(Box::new(Mean::<<$S as LogicalPrimitiveType>::Native>::default()))
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
let err_msg = format!(
|
let err_msg = format!(
|
||||||
@@ -182,7 +181,7 @@ impl AggregateFunctionCreator for MeanAccumulatorCreator {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use datatypes::vectors::PrimitiveVector;
|
use datatypes::vectors::Int32Vector;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
@@ -194,21 +193,19 @@ mod test {
|
|||||||
|
|
||||||
// test update one not-null value
|
// test update one not-null value
|
||||||
let mut mean = Mean::<i32>::default();
|
let mut mean = Mean::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||||
assert!(mean.update_batch(&v).is_ok());
|
assert!(mean.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::from(42.0_f64), mean.evaluate().unwrap());
|
assert_eq!(Value::from(42.0_f64), mean.evaluate().unwrap());
|
||||||
|
|
||||||
// test update one null value
|
// test update one null value
|
||||||
let mut mean = Mean::<i32>::default();
|
let mut mean = Mean::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||||
Option::<i32>::None,
|
|
||||||
]))];
|
|
||||||
assert!(mean.update_batch(&v).is_ok());
|
assert!(mean.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::Null, mean.evaluate().unwrap());
|
assert_eq!(Value::Null, mean.evaluate().unwrap());
|
||||||
|
|
||||||
// test update no null-value batch
|
// test update no null-value batch
|
||||||
let mut mean = Mean::<i32>::default();
|
let mut mean = Mean::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||||
Some(-1i32),
|
Some(-1i32),
|
||||||
Some(1),
|
Some(1),
|
||||||
Some(2),
|
Some(2),
|
||||||
@@ -218,7 +215,7 @@ mod test {
|
|||||||
|
|
||||||
// test update null-value batch
|
// test update null-value batch
|
||||||
let mut mean = Mean::<i32>::default();
|
let mut mean = Mean::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||||
Some(-2i32),
|
Some(-2i32),
|
||||||
None,
|
None,
|
||||||
Some(3),
|
Some(3),
|
||||||
@@ -230,7 +227,7 @@ mod test {
|
|||||||
// test update with constant vector
|
// test update with constant vector
|
||||||
let mut mean = Mean::<i32>::default();
|
let mut mean = Mean::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||||
10,
|
10,
|
||||||
))];
|
))];
|
||||||
assert!(mean.update_batch(&v).is_ok());
|
assert!(mean.update_batch(&v).is_ok());
|
||||||
|
|||||||
@@ -1,289 +0,0 @@
|
|||||||
// Copyright 2022 Greptime Team
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
use std::cmp::Reverse;
|
|
||||||
use std::collections::BinaryHeap;
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use common_function_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
|
||||||
use common_query::error::{
|
|
||||||
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, Result,
|
|
||||||
};
|
|
||||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
|
||||||
use common_query::prelude::*;
|
|
||||||
use datatypes::prelude::*;
|
|
||||||
use datatypes::types::OrdPrimitive;
|
|
||||||
use datatypes::value::ListValue;
|
|
||||||
use datatypes::vectors::{ConstantVector, ListVector};
|
|
||||||
use datatypes::with_match_primitive_type_id;
|
|
||||||
use num::NumCast;
|
|
||||||
use snafu::{ensure, OptionExt, ResultExt};
|
|
||||||
|
|
||||||
// This median calculation algorithm's details can be found at
|
|
||||||
// https://leetcode.cn/problems/find-median-from-data-stream/
|
|
||||||
//
|
|
||||||
// Basically, it uses two heaps, a maximum heap and a minimum. The maximum heap stores numbers that
|
|
||||||
// are not greater than the median, and the minimum heap stores the greater. In a streaming of
|
|
||||||
// numbers, when a number is arrived, we adjust the heaps' tops, so that either one top is the
|
|
||||||
// median or both tops can be averaged to get the median.
|
|
||||||
//
|
|
||||||
// The time complexity to update the median is O(logn), O(1) to get the median; and the space
|
|
||||||
// complexity is O(n). (Ignore the costs for heap expansion.)
|
|
||||||
//
|
|
||||||
// From the point of algorithm, [quick select](https://en.wikipedia.org/wiki/Quickselect) might be
|
|
||||||
// better. But to use quick select here, we need a mutable self in the final calculation(`evaluate`)
|
|
||||||
// to swap stored numbers in the states vector. Though we can make our `evaluate` received
|
|
||||||
// `&mut self`, DataFusion calls our accumulator with `&self` (see `DfAccumulatorAdaptor`). That
|
|
||||||
// means we have to introduce some kinds of interior mutability, and the overhead is not neglectable.
|
|
||||||
//
|
|
||||||
// TODO(LFC): Use quick select to get median when we can modify DataFusion's code, and benchmark with two-heap algorithm.
|
|
||||||
#[derive(Debug, Default)]
|
|
||||||
pub struct Median<T>
|
|
||||||
where
|
|
||||||
T: Primitive,
|
|
||||||
{
|
|
||||||
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
|
|
||||||
not_greater: BinaryHeap<OrdPrimitive<T>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> Median<T>
|
|
||||||
where
|
|
||||||
T: Primitive,
|
|
||||||
{
|
|
||||||
fn push(&mut self, value: T) {
|
|
||||||
let value = OrdPrimitive::<T>(value);
|
|
||||||
|
|
||||||
if self.not_greater.is_empty() {
|
|
||||||
self.not_greater.push(value);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// The `unwrap`s below are safe because there are `push`s before them.
|
|
||||||
if value <= *self.not_greater.peek().unwrap() {
|
|
||||||
self.not_greater.push(value);
|
|
||||||
if self.not_greater.len() > self.greater.len() + 1 {
|
|
||||||
self.greater.push(Reverse(self.not_greater.pop().unwrap()));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
self.greater.push(Reverse(value));
|
|
||||||
if self.greater.len() > self.not_greater.len() {
|
|
||||||
self.not_greater.push(self.greater.pop().unwrap().0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// UDAFs are built using the trait `Accumulator`, that offers DataFusion the necessary functions
|
|
||||||
// to use them.
|
|
||||||
impl<T> Accumulator for Median<T>
|
|
||||||
where
|
|
||||||
T: Primitive,
|
|
||||||
for<'a> T: Scalar<RefType<'a> = T>,
|
|
||||||
{
|
|
||||||
// This function serializes our state to `ScalarValue`, which DataFusion uses to pass this
|
|
||||||
// state between execution stages. Note that this can be arbitrary data.
|
|
||||||
//
|
|
||||||
// The `ScalarValue`s returned here will be passed in as argument `states: &[VectorRef]` to
|
|
||||||
// `merge_batch` function.
|
|
||||||
fn state(&self) -> Result<Vec<Value>> {
|
|
||||||
let nums = self
|
|
||||||
.greater
|
|
||||||
.iter()
|
|
||||||
.map(|x| &x.0)
|
|
||||||
.chain(self.not_greater.iter())
|
|
||||||
.map(|&n| n.into())
|
|
||||||
.collect::<Vec<Value>>();
|
|
||||||
Ok(vec![Value::List(ListValue::new(
|
|
||||||
Some(Box::new(nums)),
|
|
||||||
T::default().into().data_type(),
|
|
||||||
))])
|
|
||||||
}
|
|
||||||
|
|
||||||
// DataFusion calls this function to update the accumulator's state for a batch of inputs rows.
|
|
||||||
// It is expected this function to update the accumulator's state.
|
|
||||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
|
||||||
if values.is_empty() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
ensure!(values.len() == 1, InvalidInputStateSnafu);
|
|
||||||
|
|
||||||
// This is a unary accumulator, so only one column is provided.
|
|
||||||
let column = &values[0];
|
|
||||||
let mut len = 1;
|
|
||||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
|
||||||
len = column.len();
|
|
||||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
|
||||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
|
||||||
} else {
|
|
||||||
unsafe { VectorHelper::static_cast(column) }
|
|
||||||
};
|
|
||||||
(0..len).for_each(|_| {
|
|
||||||
for v in column.iter_data().flatten() {
|
|
||||||
self.push(v);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// DataFusion executes accumulators in partitions. In some execution stage, DataFusion will
|
|
||||||
// merge states from other accumulators (returned by `state()` method).
|
|
||||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
|
||||||
if states.is_empty() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
// The states here are returned by the `state` method. Since we only returned a vector
|
|
||||||
// with one value in that method, `states[0]` is fine.
|
|
||||||
let states = &states[0];
|
|
||||||
let states = states
|
|
||||||
.as_any()
|
|
||||||
.downcast_ref::<ListVector>()
|
|
||||||
.with_context(|| DowncastVectorSnafu {
|
|
||||||
err_msg: format!(
|
|
||||||
"expect ListVector, got vector type {}",
|
|
||||||
states.vector_type_name()
|
|
||||||
),
|
|
||||||
})?;
|
|
||||||
for state in states.values_iter() {
|
|
||||||
let state = state.context(FromScalarValueSnafu)?;
|
|
||||||
// merging state is simply accumulate stored numbers from others', so just call update
|
|
||||||
self.update_batch(&[state])?
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// DataFusion expects this function to return the final value of this aggregator.
|
|
||||||
fn evaluate(&self) -> Result<Value> {
|
|
||||||
if self.not_greater.is_empty() {
|
|
||||||
assert!(
|
|
||||||
self.greater.is_empty(),
|
|
||||||
"not expected in two-heap median algorithm, there must be a bug when implementing it"
|
|
||||||
);
|
|
||||||
return Ok(Value::Null);
|
|
||||||
}
|
|
||||||
|
|
||||||
// unwrap is safe because we checked not_greater heap's len above
|
|
||||||
let not_greater = *self.not_greater.peek().unwrap();
|
|
||||||
let median = if self.not_greater.len() > self.greater.len() {
|
|
||||||
not_greater.into()
|
|
||||||
} else {
|
|
||||||
// unwrap is safe because greater heap len >= not_greater heap len, which is > 0
|
|
||||||
let greater = self.greater.peek().unwrap();
|
|
||||||
|
|
||||||
// the following three NumCast's `unwrap`s are safe because T is primitive
|
|
||||||
let not_greater_v: f64 = NumCast::from(not_greater.as_primitive()).unwrap();
|
|
||||||
let greater_v: f64 = NumCast::from(greater.0.as_primitive()).unwrap();
|
|
||||||
let median: T = NumCast::from((not_greater_v + greater_v) / 2.0).unwrap();
|
|
||||||
median.into()
|
|
||||||
};
|
|
||||||
Ok(median)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[as_aggr_func_creator]
|
|
||||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
|
||||||
pub struct MedianAccumulatorCreator {}
|
|
||||||
|
|
||||||
impl AggregateFunctionCreator for MedianAccumulatorCreator {
|
|
||||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
|
||||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
|
||||||
let input_type = &types[0];
|
|
||||||
with_match_primitive_type_id!(
|
|
||||||
input_type.logical_type_id(),
|
|
||||||
|$S| {
|
|
||||||
Ok(Box::new(Median::<$S>::default()))
|
|
||||||
},
|
|
||||||
{
|
|
||||||
let err_msg = format!(
|
|
||||||
"\"MEDIAN\" aggregate function not support data type {:?}",
|
|
||||||
input_type.logical_type_id(),
|
|
||||||
);
|
|
||||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
|
||||||
}
|
|
||||||
)
|
|
||||||
});
|
|
||||||
creator
|
|
||||||
}
|
|
||||||
|
|
||||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
|
||||||
let input_types = self.input_types()?;
|
|
||||||
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
|
|
||||||
// unwrap is safe because we have checked input_types len must equals 1
|
|
||||||
Ok(input_types.into_iter().next().unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
|
||||||
Ok(vec![ConcreteDataType::list_datatype(self.output_type()?)])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use datatypes::vectors::PrimitiveVector;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
#[test]
|
|
||||||
fn test_update_batch() {
|
|
||||||
// test update empty batch, expect not updating anything
|
|
||||||
let mut median = Median::<i32>::default();
|
|
||||||
assert!(median.update_batch(&[]).is_ok());
|
|
||||||
assert!(median.not_greater.is_empty());
|
|
||||||
assert!(median.greater.is_empty());
|
|
||||||
assert_eq!(Value::Null, median.evaluate().unwrap());
|
|
||||||
|
|
||||||
// test update one not-null value
|
|
||||||
let mut median = Median::<i32>::default();
|
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
|
||||||
assert!(median.update_batch(&v).is_ok());
|
|
||||||
assert_eq!(Value::Int32(42), median.evaluate().unwrap());
|
|
||||||
|
|
||||||
// test update one null value
|
|
||||||
let mut median = Median::<i32>::default();
|
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
|
||||||
Option::<i32>::None,
|
|
||||||
]))];
|
|
||||||
assert!(median.update_batch(&v).is_ok());
|
|
||||||
assert_eq!(Value::Null, median.evaluate().unwrap());
|
|
||||||
|
|
||||||
// test update no null-value batch
|
|
||||||
let mut median = Median::<i32>::default();
|
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
|
||||||
Some(-1i32),
|
|
||||||
Some(1),
|
|
||||||
Some(2),
|
|
||||||
]))];
|
|
||||||
assert!(median.update_batch(&v).is_ok());
|
|
||||||
assert_eq!(Value::Int32(1), median.evaluate().unwrap());
|
|
||||||
|
|
||||||
// test update null-value batch
|
|
||||||
let mut median = Median::<i32>::default();
|
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
|
||||||
Some(-2i32),
|
|
||||||
None,
|
|
||||||
Some(3),
|
|
||||||
Some(4),
|
|
||||||
]))];
|
|
||||||
assert!(median.update_batch(&v).is_ok());
|
|
||||||
assert_eq!(Value::Int32(3), median.evaluate().unwrap());
|
|
||||||
|
|
||||||
// test update with constant vector
|
|
||||||
let mut median = Median::<i32>::default();
|
|
||||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
|
||||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
|
||||||
10,
|
|
||||||
))];
|
|
||||||
assert!(median.update_batch(&v).is_ok());
|
|
||||||
assert_eq!(Value::Int32(4), median.evaluate().unwrap());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -26,7 +26,7 @@ use common_query::prelude::*;
|
|||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::types::OrdPrimitive;
|
use datatypes::types::OrdPrimitive;
|
||||||
use datatypes::value::{ListValue, OrderedFloat};
|
use datatypes::value::{ListValue, OrderedFloat};
|
||||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||||
use datatypes::with_match_primitive_type_id;
|
use datatypes::with_match_primitive_type_id;
|
||||||
use num::NumCast;
|
use num::NumCast;
|
||||||
use snafu::{ensure, OptionExt, ResultExt};
|
use snafu::{ensure, OptionExt, ResultExt};
|
||||||
@@ -44,15 +44,15 @@ use snafu::{ensure, OptionExt, ResultExt};
|
|||||||
// This optional method parameter specifies the method to use when the desired quantile lies between two data points i < j.
|
// This optional method parameter specifies the method to use when the desired quantile lies between two data points i < j.
|
||||||
// If g is the fractional part of the index surrounded by i and alpha and beta are correction constants modifying i and j.
|
// If g is the fractional part of the index surrounded by i and alpha and beta are correction constants modifying i and j.
|
||||||
// i+g = (q-alpha)/(n-alpha-beta+1)
|
// i+g = (q-alpha)/(n-alpha-beta+1)
|
||||||
// Below, ‘q’ is the quantile value, ‘n’ is the sample size and alpha and beta are constants. The following formula gives an interpolation “i + g” of where the quantile would be in the sorted sample.
|
// Below, 'q' is the quantile value, 'n' is the sample size and alpha and beta are constants. The following formula gives an interpolation "i + g" of where the quantile would be in the sorted sample.
|
||||||
// With ‘i’ being the floor and ‘g’ the fractional part of the result.
|
// With 'i' being the floor and 'g' the fractional part of the result.
|
||||||
// the default method is linear where
|
// the default method is linear where
|
||||||
// alpha = 1
|
// alpha = 1
|
||||||
// beta = 1
|
// beta = 1
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct Percentile<T>
|
pub struct Percentile<T>
|
||||||
where
|
where
|
||||||
T: Primitive,
|
T: WrapperType,
|
||||||
{
|
{
|
||||||
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
|
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
|
||||||
not_greater: BinaryHeap<OrdPrimitive<T>>,
|
not_greater: BinaryHeap<OrdPrimitive<T>>,
|
||||||
@@ -62,7 +62,7 @@ where
|
|||||||
|
|
||||||
impl<T> Percentile<T>
|
impl<T> Percentile<T>
|
||||||
where
|
where
|
||||||
T: Primitive,
|
T: WrapperType,
|
||||||
{
|
{
|
||||||
fn push(&mut self, value: T) {
|
fn push(&mut self, value: T) {
|
||||||
let value = OrdPrimitive::<T>(value);
|
let value = OrdPrimitive::<T>(value);
|
||||||
@@ -93,8 +93,7 @@ where
|
|||||||
|
|
||||||
impl<T> Accumulator for Percentile<T>
|
impl<T> Accumulator for Percentile<T>
|
||||||
where
|
where
|
||||||
T: Primitive,
|
T: WrapperType,
|
||||||
for<'a> T: Scalar<RefType<'a> = T>,
|
|
||||||
{
|
{
|
||||||
fn state(&self) -> Result<Vec<Value>> {
|
fn state(&self) -> Result<Vec<Value>> {
|
||||||
let nums = self
|
let nums = self
|
||||||
@@ -107,7 +106,7 @@ where
|
|||||||
Ok(vec![
|
Ok(vec![
|
||||||
Value::List(ListValue::new(
|
Value::List(ListValue::new(
|
||||||
Some(Box::new(nums)),
|
Some(Box::new(nums)),
|
||||||
T::default().into().data_type(),
|
T::LogicalType::build_data_type(),
|
||||||
)),
|
)),
|
||||||
self.p.into(),
|
self.p.into(),
|
||||||
])
|
])
|
||||||
@@ -129,14 +128,14 @@ where
|
|||||||
let mut len = 1;
|
let mut len = 1;
|
||||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||||
len = column.len();
|
len = column.len();
|
||||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
unsafe { Helper::static_cast(column.inner()) }
|
||||||
} else {
|
} else {
|
||||||
unsafe { VectorHelper::static_cast(column) }
|
unsafe { Helper::static_cast(column) }
|
||||||
};
|
};
|
||||||
|
|
||||||
let x = &values[1];
|
let x = &values[1];
|
||||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||||
err_msg: "expecting \"POLYVAL\" function's second argument to be float64",
|
err_msg: "expecting \"POLYVAL\" function's second argument to be float64",
|
||||||
})?;
|
})?;
|
||||||
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
||||||
@@ -209,10 +208,11 @@ where
|
|||||||
),
|
),
|
||||||
})?;
|
})?;
|
||||||
for value in values.values_iter() {
|
for value in values.values_iter() {
|
||||||
let value = value.context(FromScalarValueSnafu)?;
|
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||||
for v in column.iter_data().flatten() {
|
for v in column.iter_data().flatten() {
|
||||||
self.push(v);
|
self.push(v);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -259,7 +259,7 @@ impl AggregateFunctionCreator for PercentileAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_type.logical_type_id(),
|
input_type.logical_type_id(),
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(Box::new(Percentile::<$S>::default()))
|
Ok(Box::new(Percentile::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
let err_msg = format!(
|
let err_msg = format!(
|
||||||
@@ -292,7 +292,7 @@ impl AggregateFunctionCreator for PercentileAccumulatorCreator {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use datatypes::vectors::PrimitiveVector;
|
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
@@ -307,8 +307,8 @@ mod test {
|
|||||||
// test update one not-null value
|
// test update one not-null value
|
||||||
let mut percentile = Percentile::<i32>::default();
|
let mut percentile = Percentile::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)])),
|
Arc::new(Int32Vector::from(vec![Some(42)])),
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![Some(100.0_f64)])),
|
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
|
||||||
];
|
];
|
||||||
assert!(percentile.update_batch(&v).is_ok());
|
assert!(percentile.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::from(42.0_f64), percentile.evaluate().unwrap());
|
assert_eq!(Value::from(42.0_f64), percentile.evaluate().unwrap());
|
||||||
@@ -316,8 +316,8 @@ mod test {
|
|||||||
// test update one null value
|
// test update one null value
|
||||||
let mut percentile = Percentile::<i32>::default();
|
let mut percentile = Percentile::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![Option::<i32>::None])),
|
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![Some(100.0_f64)])),
|
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
|
||||||
];
|
];
|
||||||
assert!(percentile.update_batch(&v).is_ok());
|
assert!(percentile.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::Null, percentile.evaluate().unwrap());
|
assert_eq!(Value::Null, percentile.evaluate().unwrap());
|
||||||
@@ -325,12 +325,8 @@ mod test {
|
|||||||
// test update no null-value batch
|
// test update no null-value batch
|
||||||
let mut percentile = Percentile::<i32>::default();
|
let mut percentile = Percentile::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||||
Some(-1i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
Some(1),
|
|
||||||
Some(2),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(100.0_f64),
|
Some(100.0_f64),
|
||||||
Some(100.0_f64),
|
Some(100.0_f64),
|
||||||
Some(100.0_f64),
|
Some(100.0_f64),
|
||||||
@@ -342,13 +338,8 @@ mod test {
|
|||||||
// test update null-value batch
|
// test update null-value batch
|
||||||
let mut percentile = Percentile::<i32>::default();
|
let mut percentile = Percentile::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||||
Some(-2i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
None,
|
|
||||||
Some(3),
|
|
||||||
Some(4),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(100.0_f64),
|
Some(100.0_f64),
|
||||||
Some(100.0_f64),
|
Some(100.0_f64),
|
||||||
Some(100.0_f64),
|
Some(100.0_f64),
|
||||||
@@ -362,13 +353,10 @@ mod test {
|
|||||||
let mut percentile = Percentile::<i32>::default();
|
let mut percentile = Percentile::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(ConstantVector::new(
|
Arc::new(ConstantVector::new(
|
||||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||||
2,
|
2,
|
||||||
)),
|
)),
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
Arc::new(Float64Vector::from(vec![Some(100.0_f64), Some(100.0_f64)])),
|
||||||
Some(100.0_f64),
|
|
||||||
Some(100.0_f64),
|
|
||||||
])),
|
|
||||||
];
|
];
|
||||||
assert!(percentile.update_batch(&v).is_ok());
|
assert!(percentile.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
|
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
|
||||||
@@ -376,12 +364,8 @@ mod test {
|
|||||||
// test left border
|
// test left border
|
||||||
let mut percentile = Percentile::<i32>::default();
|
let mut percentile = Percentile::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||||
Some(-1i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
Some(1),
|
|
||||||
Some(2),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(0.0_f64),
|
Some(0.0_f64),
|
||||||
Some(0.0_f64),
|
Some(0.0_f64),
|
||||||
Some(0.0_f64),
|
Some(0.0_f64),
|
||||||
@@ -393,12 +377,8 @@ mod test {
|
|||||||
// test medium
|
// test medium
|
||||||
let mut percentile = Percentile::<i32>::default();
|
let mut percentile = Percentile::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||||
Some(-1i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
Some(1),
|
|
||||||
Some(2),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(50.0_f64),
|
Some(50.0_f64),
|
||||||
Some(50.0_f64),
|
Some(50.0_f64),
|
||||||
Some(50.0_f64),
|
Some(50.0_f64),
|
||||||
@@ -410,12 +390,8 @@ mod test {
|
|||||||
// test right border
|
// test right border
|
||||||
let mut percentile = Percentile::<i32>::default();
|
let mut percentile = Percentile::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||||
Some(-1i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
Some(1),
|
|
||||||
Some(2),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(100.0_f64),
|
Some(100.0_f64),
|
||||||
Some(100.0_f64),
|
Some(100.0_f64),
|
||||||
Some(100.0_f64),
|
Some(100.0_f64),
|
||||||
@@ -431,12 +407,8 @@ mod test {
|
|||||||
// >> 6.400000000000
|
// >> 6.400000000000
|
||||||
let mut percentile = Percentile::<i32>::default();
|
let mut percentile = Percentile::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
|
||||||
Some(10i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
Some(7),
|
|
||||||
Some(4),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(40.0_f64),
|
Some(40.0_f64),
|
||||||
Some(40.0_f64),
|
Some(40.0_f64),
|
||||||
Some(40.0_f64),
|
Some(40.0_f64),
|
||||||
@@ -451,12 +423,8 @@ mod test {
|
|||||||
// >> 9.7000000000000011
|
// >> 9.7000000000000011
|
||||||
let mut percentile = Percentile::<i32>::default();
|
let mut percentile = Percentile::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
|
||||||
Some(10i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
Some(7),
|
|
||||||
Some(4),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(95.0_f64),
|
Some(95.0_f64),
|
||||||
Some(95.0_f64),
|
Some(95.0_f64),
|
||||||
Some(95.0_f64),
|
Some(95.0_f64),
|
||||||
|
|||||||
@@ -23,9 +23,9 @@ use common_query::error::{
|
|||||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||||
use common_query::prelude::*;
|
use common_query::prelude::*;
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::types::PrimitiveType;
|
use datatypes::types::{LogicalPrimitiveType, WrapperType};
|
||||||
use datatypes::value::ListValue;
|
use datatypes::value::ListValue;
|
||||||
use datatypes::vectors::{ConstantVector, Int64Vector, ListVector};
|
use datatypes::vectors::{ConstantVector, Helper, Int64Vector, ListVector};
|
||||||
use datatypes::with_match_primitive_type_id;
|
use datatypes::with_match_primitive_type_id;
|
||||||
use num_traits::AsPrimitive;
|
use num_traits::AsPrimitive;
|
||||||
use snafu::{ensure, OptionExt, ResultExt};
|
use snafu::{ensure, OptionExt, ResultExt};
|
||||||
@@ -34,8 +34,10 @@ use snafu::{ensure, OptionExt, ResultExt};
|
|||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct Polyval<T, PolyT>
|
pub struct Polyval<T, PolyT>
|
||||||
where
|
where
|
||||||
T: Primitive + AsPrimitive<PolyT>,
|
T: WrapperType,
|
||||||
PolyT: Primitive + std::ops::Mul<Output = PolyT>,
|
T::Native: AsPrimitive<PolyT::Native>,
|
||||||
|
PolyT: WrapperType,
|
||||||
|
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
|
||||||
{
|
{
|
||||||
values: Vec<T>,
|
values: Vec<T>,
|
||||||
// DataFusion casts constant in into i64 type.
|
// DataFusion casts constant in into i64 type.
|
||||||
@@ -45,8 +47,10 @@ where
|
|||||||
|
|
||||||
impl<T, PolyT> Polyval<T, PolyT>
|
impl<T, PolyT> Polyval<T, PolyT>
|
||||||
where
|
where
|
||||||
T: Primitive + AsPrimitive<PolyT>,
|
T: WrapperType,
|
||||||
PolyT: Primitive + std::ops::Mul<Output = PolyT>,
|
T::Native: AsPrimitive<PolyT::Native>,
|
||||||
|
PolyT: WrapperType,
|
||||||
|
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
|
||||||
{
|
{
|
||||||
fn push(&mut self, value: T) {
|
fn push(&mut self, value: T) {
|
||||||
self.values.push(value);
|
self.values.push(value);
|
||||||
@@ -55,11 +59,11 @@ where
|
|||||||
|
|
||||||
impl<T, PolyT> Accumulator for Polyval<T, PolyT>
|
impl<T, PolyT> Accumulator for Polyval<T, PolyT>
|
||||||
where
|
where
|
||||||
T: Primitive + AsPrimitive<PolyT>,
|
T: WrapperType,
|
||||||
PolyT: Primitive + std::ops::Mul<Output = PolyT> + std::iter::Sum<PolyT>,
|
T::Native: AsPrimitive<PolyT::Native>,
|
||||||
for<'a> T: Scalar<RefType<'a> = T>,
|
PolyT: WrapperType + std::iter::Sum<<PolyT as WrapperType>::Native>,
|
||||||
for<'a> PolyT: Scalar<RefType<'a> = PolyT>,
|
PolyT::Native: std::ops::Mul<Output = PolyT::Native> + std::iter::Sum<PolyT::Native>,
|
||||||
i64: AsPrimitive<PolyT>,
|
i64: AsPrimitive<<PolyT as WrapperType>::Native>,
|
||||||
{
|
{
|
||||||
fn state(&self) -> Result<Vec<Value>> {
|
fn state(&self) -> Result<Vec<Value>> {
|
||||||
let nums = self
|
let nums = self
|
||||||
@@ -70,7 +74,7 @@ where
|
|||||||
Ok(vec![
|
Ok(vec![
|
||||||
Value::List(ListValue::new(
|
Value::List(ListValue::new(
|
||||||
Some(Box::new(nums)),
|
Some(Box::new(nums)),
|
||||||
T::default().into().data_type(),
|
T::LogicalType::build_data_type(),
|
||||||
)),
|
)),
|
||||||
self.x.into(),
|
self.x.into(),
|
||||||
])
|
])
|
||||||
@@ -91,10 +95,10 @@ where
|
|||||||
let mut len = 1;
|
let mut len = 1;
|
||||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||||
len = column.len();
|
len = column.len();
|
||||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
unsafe { Helper::static_cast(column.inner()) }
|
||||||
} else {
|
} else {
|
||||||
unsafe { VectorHelper::static_cast(column) }
|
unsafe { Helper::static_cast(column) }
|
||||||
};
|
};
|
||||||
(0..len).for_each(|_| {
|
(0..len).for_each(|_| {
|
||||||
for v in column.iter_data().flatten() {
|
for v in column.iter_data().flatten() {
|
||||||
@@ -103,7 +107,7 @@ where
|
|||||||
});
|
});
|
||||||
|
|
||||||
let x = &values[1];
|
let x = &values[1];
|
||||||
let x = VectorHelper::check_get_scalar::<i64>(x).context(error::InvalidInputsSnafu {
|
let x = Helper::check_get_scalar::<i64>(x).context(error::InvalidInputTypeSnafu {
|
||||||
err_msg: "expecting \"POLYVAL\" function's second argument to be a positive integer",
|
err_msg: "expecting \"POLYVAL\" function's second argument to be a positive integer",
|
||||||
})?;
|
})?;
|
||||||
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
||||||
@@ -172,12 +176,14 @@ where
|
|||||||
),
|
),
|
||||||
})?;
|
})?;
|
||||||
for value in values.values_iter() {
|
for value in values.values_iter() {
|
||||||
let value = value.context(FromScalarValueSnafu)?;
|
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||||
for v in column.iter_data().flatten() {
|
for v in column.iter_data().flatten() {
|
||||||
self.push(v);
|
self.push(v);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -196,7 +202,7 @@ where
|
|||||||
.values
|
.values
|
||||||
.iter()
|
.iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.map(|(i, &value)| value.as_() * (x.pow((len - 1 - i) as u32)).as_())
|
.map(|(i, &value)| value.into_native().as_() * x.pow((len - 1 - i) as u32).as_())
|
||||||
.sum();
|
.sum();
|
||||||
Ok(polyval.into())
|
Ok(polyval.into())
|
||||||
}
|
}
|
||||||
@@ -213,7 +219,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_type.logical_type_id(),
|
input_type.logical_type_id(),
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(Box::new(Polyval::<$S,<$S as Primitive>::LargestType>::default()))
|
Ok(Box::new(Polyval::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
let err_msg = format!(
|
let err_msg = format!(
|
||||||
@@ -234,7 +240,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_type,
|
input_type,
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(PrimitiveType::<<$S as Primitive>::LargestType>::default().into())
|
Ok(<<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::build_data_type())
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
unreachable!()
|
unreachable!()
|
||||||
@@ -254,7 +260,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use datatypes::vectors::PrimitiveVector;
|
use datatypes::vectors::Int32Vector;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
@@ -268,8 +274,8 @@ mod test {
|
|||||||
// test update one not-null value
|
// test update one not-null value
|
||||||
let mut polyval = Polyval::<i32, i64>::default();
|
let mut polyval = Polyval::<i32, i64>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![Some(3)])),
|
Arc::new(Int32Vector::from(vec![Some(3)])),
|
||||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(2_i64)])),
|
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
|
||||||
];
|
];
|
||||||
assert!(polyval.update_batch(&v).is_ok());
|
assert!(polyval.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::Int64(3), polyval.evaluate().unwrap());
|
assert_eq!(Value::Int64(3), polyval.evaluate().unwrap());
|
||||||
@@ -277,8 +283,8 @@ mod test {
|
|||||||
// test update one null value
|
// test update one null value
|
||||||
let mut polyval = Polyval::<i32, i64>::default();
|
let mut polyval = Polyval::<i32, i64>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![Option::<i32>::None])),
|
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
|
||||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(2_i64)])),
|
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
|
||||||
];
|
];
|
||||||
assert!(polyval.update_batch(&v).is_ok());
|
assert!(polyval.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::Null, polyval.evaluate().unwrap());
|
assert_eq!(Value::Null, polyval.evaluate().unwrap());
|
||||||
@@ -286,12 +292,8 @@ mod test {
|
|||||||
// test update no null-value batch
|
// test update no null-value batch
|
||||||
let mut polyval = Polyval::<i32, i64>::default();
|
let mut polyval = Polyval::<i32, i64>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(3), Some(0), Some(1)])),
|
||||||
Some(3),
|
Arc::new(Int64Vector::from(vec![
|
||||||
Some(0),
|
|
||||||
Some(1),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<i64>::from(vec![
|
|
||||||
Some(2_i64),
|
Some(2_i64),
|
||||||
Some(2_i64),
|
Some(2_i64),
|
||||||
Some(2_i64),
|
Some(2_i64),
|
||||||
@@ -303,13 +305,8 @@ mod test {
|
|||||||
// test update null-value batch
|
// test update null-value batch
|
||||||
let mut polyval = Polyval::<i32, i64>::default();
|
let mut polyval = Polyval::<i32, i64>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(3), Some(0), None, Some(1)])),
|
||||||
Some(3),
|
Arc::new(Int64Vector::from(vec![
|
||||||
Some(0),
|
|
||||||
None,
|
|
||||||
Some(1),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<i64>::from(vec![
|
|
||||||
Some(2_i64),
|
Some(2_i64),
|
||||||
Some(2_i64),
|
Some(2_i64),
|
||||||
Some(2_i64),
|
Some(2_i64),
|
||||||
@@ -323,10 +320,10 @@ mod test {
|
|||||||
let mut polyval = Polyval::<i32, i64>::default();
|
let mut polyval = Polyval::<i32, i64>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(ConstantVector::new(
|
Arc::new(ConstantVector::new(
|
||||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||||
2,
|
2,
|
||||||
)),
|
)),
|
||||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(5_i64), Some(5_i64)])),
|
Arc::new(Int64Vector::from(vec![Some(5_i64), Some(5_i64)])),
|
||||||
];
|
];
|
||||||
assert!(polyval.update_batch(&v).is_ok());
|
assert!(polyval.update_batch(&v).is_ok());
|
||||||
assert_eq!(Value::Int64(24), polyval.evaluate().unwrap());
|
assert_eq!(Value::Int64(24), polyval.evaluate().unwrap());
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
|||||||
use common_query::prelude::*;
|
use common_query::prelude::*;
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::value::{ListValue, OrderedFloat};
|
use datatypes::value::{ListValue, OrderedFloat};
|
||||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||||
use datatypes::with_match_primitive_type_id;
|
use datatypes::with_match_primitive_type_id;
|
||||||
use num_traits::AsPrimitive;
|
use num_traits::AsPrimitive;
|
||||||
use snafu::{ensure, OptionExt, ResultExt};
|
use snafu::{ensure, OptionExt, ResultExt};
|
||||||
@@ -33,18 +33,12 @@ use statrs::statistics::Statistics;
|
|||||||
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct ScipyStatsNormCdf<T>
|
pub struct ScipyStatsNormCdf<T> {
|
||||||
where
|
|
||||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
|
||||||
{
|
|
||||||
values: Vec<T>,
|
values: Vec<T>,
|
||||||
x: Option<f64>,
|
x: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> ScipyStatsNormCdf<T>
|
impl<T> ScipyStatsNormCdf<T> {
|
||||||
where
|
|
||||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
|
||||||
{
|
|
||||||
fn push(&mut self, value: T) {
|
fn push(&mut self, value: T) {
|
||||||
self.values.push(value);
|
self.values.push(value);
|
||||||
}
|
}
|
||||||
@@ -52,8 +46,8 @@ where
|
|||||||
|
|
||||||
impl<T> Accumulator for ScipyStatsNormCdf<T>
|
impl<T> Accumulator for ScipyStatsNormCdf<T>
|
||||||
where
|
where
|
||||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
T: WrapperType + std::iter::Sum<T>,
|
||||||
for<'a> T: Scalar<RefType<'a> = T>,
|
T::Native: AsPrimitive<f64>,
|
||||||
{
|
{
|
||||||
fn state(&self) -> Result<Vec<Value>> {
|
fn state(&self) -> Result<Vec<Value>> {
|
||||||
let nums = self
|
let nums = self
|
||||||
@@ -64,7 +58,7 @@ where
|
|||||||
Ok(vec![
|
Ok(vec![
|
||||||
Value::List(ListValue::new(
|
Value::List(ListValue::new(
|
||||||
Some(Box::new(nums)),
|
Some(Box::new(nums)),
|
||||||
T::default().into().data_type(),
|
T::LogicalType::build_data_type(),
|
||||||
)),
|
)),
|
||||||
self.x.into(),
|
self.x.into(),
|
||||||
])
|
])
|
||||||
@@ -86,14 +80,14 @@ where
|
|||||||
let mut len = 1;
|
let mut len = 1;
|
||||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||||
len = column.len();
|
len = column.len();
|
||||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
unsafe { Helper::static_cast(column.inner()) }
|
||||||
} else {
|
} else {
|
||||||
unsafe { VectorHelper::static_cast(column) }
|
unsafe { Helper::static_cast(column) }
|
||||||
};
|
};
|
||||||
|
|
||||||
let x = &values[1];
|
let x = &values[1];
|
||||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||||
err_msg: "expecting \"SCIPYSTATSNORMCDF\" function's second argument to be a positive integer",
|
err_msg: "expecting \"SCIPYSTATSNORMCDF\" function's second argument to be a positive integer",
|
||||||
})?;
|
})?;
|
||||||
let first = x.get(0);
|
let first = x.get(0);
|
||||||
@@ -160,19 +154,19 @@ where
|
|||||||
),
|
),
|
||||||
})?;
|
})?;
|
||||||
for value in values.values_iter() {
|
for value in values.values_iter() {
|
||||||
let value = value.context(FromScalarValueSnafu)?;
|
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||||
for v in column.iter_data().flatten() {
|
for v in column.iter_data().flatten() {
|
||||||
self.push(v);
|
self.push(v);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn evaluate(&self) -> Result<Value> {
|
fn evaluate(&self) -> Result<Value> {
|
||||||
let values = self.values.iter().map(|&v| v.as_()).collect::<Vec<_>>();
|
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
|
||||||
let mean = values.clone().mean();
|
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
|
||||||
let std_dev = values.std_dev();
|
|
||||||
if mean.is_nan() || std_dev.is_nan() {
|
if mean.is_nan() || std_dev.is_nan() {
|
||||||
Ok(Value::Null)
|
Ok(Value::Null)
|
||||||
} else {
|
} else {
|
||||||
@@ -198,7 +192,7 @@ impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_type.logical_type_id(),
|
input_type.logical_type_id(),
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(Box::new(ScipyStatsNormCdf::<$S>::default()))
|
Ok(Box::new(ScipyStatsNormCdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
let err_msg = format!(
|
let err_msg = format!(
|
||||||
@@ -230,7 +224,7 @@ impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use datatypes::vectors::PrimitiveVector;
|
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
@@ -244,12 +238,8 @@ mod test {
|
|||||||
// test update no null-value batch
|
// test update no null-value batch
|
||||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||||
Some(-1i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
Some(1),
|
|
||||||
Some(2),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(2.0_f64),
|
Some(2.0_f64),
|
||||||
Some(2.0_f64),
|
Some(2.0_f64),
|
||||||
Some(2.0_f64),
|
Some(2.0_f64),
|
||||||
@@ -264,13 +254,8 @@ mod test {
|
|||||||
// test update null-value batch
|
// test update null-value batch
|
||||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||||
Some(-2i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
None,
|
|
||||||
Some(3),
|
|
||||||
Some(4),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(2.0_f64),
|
Some(2.0_f64),
|
||||||
None,
|
None,
|
||||||
Some(2.0_f64),
|
Some(2.0_f64),
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
|||||||
use common_query::prelude::*;
|
use common_query::prelude::*;
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::value::{ListValue, OrderedFloat};
|
use datatypes::value::{ListValue, OrderedFloat};
|
||||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||||
use datatypes::with_match_primitive_type_id;
|
use datatypes::with_match_primitive_type_id;
|
||||||
use num_traits::AsPrimitive;
|
use num_traits::AsPrimitive;
|
||||||
use snafu::{ensure, OptionExt, ResultExt};
|
use snafu::{ensure, OptionExt, ResultExt};
|
||||||
@@ -33,18 +33,12 @@ use statrs::statistics::Statistics;
|
|||||||
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
pub struct ScipyStatsNormPdf<T>
|
pub struct ScipyStatsNormPdf<T> {
|
||||||
where
|
|
||||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
|
||||||
{
|
|
||||||
values: Vec<T>,
|
values: Vec<T>,
|
||||||
x: Option<f64>,
|
x: Option<f64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> ScipyStatsNormPdf<T>
|
impl<T> ScipyStatsNormPdf<T> {
|
||||||
where
|
|
||||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
|
||||||
{
|
|
||||||
fn push(&mut self, value: T) {
|
fn push(&mut self, value: T) {
|
||||||
self.values.push(value);
|
self.values.push(value);
|
||||||
}
|
}
|
||||||
@@ -52,8 +46,8 @@ where
|
|||||||
|
|
||||||
impl<T> Accumulator for ScipyStatsNormPdf<T>
|
impl<T> Accumulator for ScipyStatsNormPdf<T>
|
||||||
where
|
where
|
||||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
T: WrapperType,
|
||||||
for<'a> T: Scalar<RefType<'a> = T>,
|
T::Native: AsPrimitive<f64> + std::iter::Sum<T>,
|
||||||
{
|
{
|
||||||
fn state(&self) -> Result<Vec<Value>> {
|
fn state(&self) -> Result<Vec<Value>> {
|
||||||
let nums = self
|
let nums = self
|
||||||
@@ -64,7 +58,7 @@ where
|
|||||||
Ok(vec![
|
Ok(vec![
|
||||||
Value::List(ListValue::new(
|
Value::List(ListValue::new(
|
||||||
Some(Box::new(nums)),
|
Some(Box::new(nums)),
|
||||||
T::default().into().data_type(),
|
T::LogicalType::build_data_type(),
|
||||||
)),
|
)),
|
||||||
self.x.into(),
|
self.x.into(),
|
||||||
])
|
])
|
||||||
@@ -86,14 +80,14 @@ where
|
|||||||
let mut len = 1;
|
let mut len = 1;
|
||||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||||
len = column.len();
|
len = column.len();
|
||||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
unsafe { Helper::static_cast(column.inner()) }
|
||||||
} else {
|
} else {
|
||||||
unsafe { VectorHelper::static_cast(column) }
|
unsafe { Helper::static_cast(column) }
|
||||||
};
|
};
|
||||||
|
|
||||||
let x = &values[1];
|
let x = &values[1];
|
||||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||||
err_msg: "expecting \"SCIPYSTATSNORMPDF\" function's second argument to be a positive integer",
|
err_msg: "expecting \"SCIPYSTATSNORMPDF\" function's second argument to be a positive integer",
|
||||||
})?;
|
})?;
|
||||||
let first = x.get(0);
|
let first = x.get(0);
|
||||||
@@ -160,19 +154,20 @@ where
|
|||||||
),
|
),
|
||||||
})?;
|
})?;
|
||||||
for value in values.values_iter() {
|
for value in values.values_iter() {
|
||||||
let value = value.context(FromScalarValueSnafu)?;
|
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||||
for v in column.iter_data().flatten() {
|
for v in column.iter_data().flatten() {
|
||||||
self.push(v);
|
self.push(v);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn evaluate(&self) -> Result<Value> {
|
fn evaluate(&self) -> Result<Value> {
|
||||||
let values = self.values.iter().map(|&v| v.as_()).collect::<Vec<_>>();
|
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
|
||||||
let mean = values.clone().mean();
|
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
|
||||||
let std_dev = values.std_dev();
|
|
||||||
if mean.is_nan() || std_dev.is_nan() {
|
if mean.is_nan() || std_dev.is_nan() {
|
||||||
Ok(Value::Null)
|
Ok(Value::Null)
|
||||||
} else {
|
} else {
|
||||||
@@ -198,7 +193,7 @@ impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
|
|||||||
with_match_primitive_type_id!(
|
with_match_primitive_type_id!(
|
||||||
input_type.logical_type_id(),
|
input_type.logical_type_id(),
|
||||||
|$S| {
|
|$S| {
|
||||||
Ok(Box::new(ScipyStatsNormPdf::<$S>::default()))
|
Ok(Box::new(ScipyStatsNormPdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
let err_msg = format!(
|
let err_msg = format!(
|
||||||
@@ -230,7 +225,7 @@ impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use datatypes::vectors::PrimitiveVector;
|
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
@@ -244,12 +239,8 @@ mod test {
|
|||||||
// test update no null-value batch
|
// test update no null-value batch
|
||||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||||
Some(-1i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
Some(1),
|
|
||||||
Some(2),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(2.0_f64),
|
Some(2.0_f64),
|
||||||
Some(2.0_f64),
|
Some(2.0_f64),
|
||||||
Some(2.0_f64),
|
Some(2.0_f64),
|
||||||
@@ -264,13 +255,8 @@ mod test {
|
|||||||
// test update null-value batch
|
// test update null-value batch
|
||||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||||
let v: Vec<VectorRef> = vec![
|
let v: Vec<VectorRef> = vec![
|
||||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||||
Some(-2i32),
|
Arc::new(Float64Vector::from(vec![
|
||||||
None,
|
|
||||||
Some(3),
|
|
||||||
Some(4),
|
|
||||||
])),
|
|
||||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
|
||||||
Some(2.0_f64),
|
Some(2.0_f64),
|
||||||
None,
|
None,
|
||||||
Some(2.0_f64),
|
Some(2.0_f64),
|
||||||
|
|||||||
@@ -14,10 +14,10 @@
|
|||||||
|
|
||||||
use std::iter;
|
use std::iter;
|
||||||
|
|
||||||
|
use common_query::error::Result;
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::vectors::ConstantVector;
|
use datatypes::vectors::{ConstantVector, Helper};
|
||||||
|
|
||||||
use crate::error::Result;
|
|
||||||
use crate::scalars::expression::ctx::EvalContext;
|
use crate::scalars::expression::ctx::EvalContext;
|
||||||
|
|
||||||
pub fn scalar_binary_op<L: Scalar, R: Scalar, O: Scalar, F>(
|
pub fn scalar_binary_op<L: Scalar, R: Scalar, O: Scalar, F>(
|
||||||
@@ -36,10 +36,9 @@ where
|
|||||||
|
|
||||||
let result = match (l.is_const(), r.is_const()) {
|
let result = match (l.is_const(), r.is_const()) {
|
||||||
(false, true) => {
|
(false, true) => {
|
||||||
let left: &<L as Scalar>::VectorType = unsafe { VectorHelper::static_cast(l) };
|
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(l) };
|
||||||
let right: &ConstantVector = unsafe { VectorHelper::static_cast(r) };
|
let right: &ConstantVector = unsafe { Helper::static_cast(r) };
|
||||||
let right: &<R as Scalar>::VectorType =
|
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(right.inner()) };
|
||||||
unsafe { VectorHelper::static_cast(right.inner()) };
|
|
||||||
let b = right.get_data(0);
|
let b = right.get_data(0);
|
||||||
|
|
||||||
let it = left.iter_data().map(|a| f(a, b, ctx));
|
let it = left.iter_data().map(|a| f(a, b, ctx));
|
||||||
@@ -47,8 +46,8 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
(false, false) => {
|
(false, false) => {
|
||||||
let left: &<L as Scalar>::VectorType = unsafe { VectorHelper::static_cast(l) };
|
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(l) };
|
||||||
let right: &<R as Scalar>::VectorType = unsafe { VectorHelper::static_cast(r) };
|
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(r) };
|
||||||
|
|
||||||
let it = left
|
let it = left
|
||||||
.iter_data()
|
.iter_data()
|
||||||
@@ -58,25 +57,22 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
(true, false) => {
|
(true, false) => {
|
||||||
let left: &ConstantVector = unsafe { VectorHelper::static_cast(l) };
|
let left: &ConstantVector = unsafe { Helper::static_cast(l) };
|
||||||
let left: &<L as Scalar>::VectorType =
|
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(left.inner()) };
|
||||||
unsafe { VectorHelper::static_cast(left.inner()) };
|
|
||||||
let a = left.get_data(0);
|
let a = left.get_data(0);
|
||||||
|
|
||||||
let right: &<R as Scalar>::VectorType = unsafe { VectorHelper::static_cast(r) };
|
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(r) };
|
||||||
let it = right.iter_data().map(|b| f(a, b, ctx));
|
let it = right.iter_data().map(|b| f(a, b, ctx));
|
||||||
<O as Scalar>::VectorType::from_owned_iterator(it)
|
<O as Scalar>::VectorType::from_owned_iterator(it)
|
||||||
}
|
}
|
||||||
|
|
||||||
(true, true) => {
|
(true, true) => {
|
||||||
let left: &ConstantVector = unsafe { VectorHelper::static_cast(l) };
|
let left: &ConstantVector = unsafe { Helper::static_cast(l) };
|
||||||
let left: &<L as Scalar>::VectorType =
|
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(left.inner()) };
|
||||||
unsafe { VectorHelper::static_cast(left.inner()) };
|
|
||||||
let a = left.get_data(0);
|
let a = left.get_data(0);
|
||||||
|
|
||||||
let right: &ConstantVector = unsafe { VectorHelper::static_cast(r) };
|
let right: &ConstantVector = unsafe { Helper::static_cast(r) };
|
||||||
let right: &<R as Scalar>::VectorType =
|
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(right.inner()) };
|
||||||
unsafe { VectorHelper::static_cast(right.inner()) };
|
|
||||||
let b = right.get_data(0);
|
let b = right.get_data(0);
|
||||||
|
|
||||||
let it = iter::repeat(a)
|
let it = iter::repeat(a)
|
||||||
|
|||||||
@@ -13,8 +13,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use chrono_tz::Tz;
|
use chrono_tz::Tz;
|
||||||
|
use common_query::error::Error;
|
||||||
use crate::error::Error;
|
|
||||||
|
|
||||||
pub struct EvalContext {
|
pub struct EvalContext {
|
||||||
_tz: Tz,
|
_tz: Tz,
|
||||||
|
|||||||
@@ -12,10 +12,11 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
use common_query::error::{self, Result};
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
|
use datatypes::vectors::Helper;
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
|
|
||||||
use crate::error::{GetScalarVectorSnafu, Result};
|
|
||||||
use crate::scalars::expression::ctx::EvalContext;
|
use crate::scalars::expression::ctx::EvalContext;
|
||||||
|
|
||||||
/// TODO: remove the allow_unused when it's used.
|
/// TODO: remove the allow_unused when it's used.
|
||||||
@@ -28,7 +29,7 @@ pub fn scalar_unary_op<L: Scalar, O: Scalar, F>(
|
|||||||
where
|
where
|
||||||
F: Fn(Option<L::RefType<'_>>, &mut EvalContext) -> Option<O>,
|
F: Fn(Option<L::RefType<'_>>, &mut EvalContext) -> Option<O>,
|
||||||
{
|
{
|
||||||
let left = VectorHelper::check_get_scalar::<L>(l).context(GetScalarVectorSnafu)?;
|
let left = Helper::check_get_scalar::<L>(l).context(error::GetScalarVectorSnafu)?;
|
||||||
let it = left.iter_data().map(|a| f(a, ctx));
|
let it = left.iter_data().map(|a| f(a, ctx));
|
||||||
let result = <O as Scalar>::VectorType::from_owned_iterator(it);
|
let result = <O as Scalar>::VectorType::from_owned_iterator(it);
|
||||||
|
|
||||||
|
|||||||
@@ -16,12 +16,11 @@ use std::fmt;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use chrono_tz::Tz;
|
use chrono_tz::Tz;
|
||||||
|
use common_query::error::Result;
|
||||||
use common_query::prelude::Signature;
|
use common_query::prelude::Signature;
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use datatypes::data_type::ConcreteDataType;
|
||||||
use datatypes::vectors::VectorRef;
|
use datatypes::vectors::VectorRef;
|
||||||
|
|
||||||
use crate::error::Result;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct FunctionContext {
|
pub struct FunctionContext {
|
||||||
pub tz: Tz,
|
pub tz: Tz,
|
||||||
|
|||||||
@@ -15,15 +15,16 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use common_query::error::Result;
|
||||||
use common_query::prelude::{Signature, Volatility};
|
use common_query::prelude::{Signature, Volatility};
|
||||||
use datatypes::data_type::DataType;
|
use datatypes::data_type::DataType;
|
||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
|
use datatypes::types::LogicalPrimitiveType;
|
||||||
use datatypes::vectors::VectorRef;
|
use datatypes::vectors::VectorRef;
|
||||||
use datatypes::with_match_primitive_type_id;
|
use datatypes::with_match_primitive_type_id;
|
||||||
use num::traits::Pow;
|
use num::traits::Pow;
|
||||||
use num_traits::AsPrimitive;
|
use num_traits::AsPrimitive;
|
||||||
|
|
||||||
use crate::error::Result;
|
|
||||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||||
use crate::scalars::function::{Function, FunctionContext};
|
use crate::scalars::function::{Function, FunctionContext};
|
||||||
|
|
||||||
@@ -46,7 +47,7 @@ impl Function for PowFunction {
|
|||||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||||
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
|
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
|
||||||
let col = scalar_binary_op::<$S, $T, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
|
let col = scalar_binary_op::<<$S as LogicalPrimitiveType>::Native, <$T as LogicalPrimitiveType>::Native, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
|
||||||
Ok(Arc::new(col))
|
Ok(Arc::new(col))
|
||||||
},{
|
},{
|
||||||
unreachable!()
|
unreachable!()
|
||||||
|
|||||||
@@ -14,10 +14,10 @@
|
|||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use arrow::array::Array;
|
use common_query::error::{self, Result};
|
||||||
use common_query::error::{FromArrowArraySnafu, Result, TypeCastSnafu};
|
|
||||||
use common_query::prelude::{Signature, Volatility};
|
use common_query::prelude::{Signature, Volatility};
|
||||||
use datatypes::arrow;
|
use datatypes::arrow::compute::kernels::{arithmetic, cast};
|
||||||
|
use datatypes::arrow::datatypes::DataType;
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::vectors::{Helper, VectorRef};
|
use datatypes::vectors::{Helper, VectorRef};
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
@@ -51,28 +51,21 @@ impl Function for RateFunction {
|
|||||||
let val = &columns[0].to_arrow_array();
|
let val = &columns[0].to_arrow_array();
|
||||||
let val_0 = val.slice(0, val.len() - 1);
|
let val_0 = val.slice(0, val.len() - 1);
|
||||||
let val_1 = val.slice(1, val.len() - 1);
|
let val_1 = val.slice(1, val.len() - 1);
|
||||||
let dv = arrow::compute::arithmetics::sub(&*val_1, &*val_0);
|
let dv = arithmetic::subtract_dyn(&val_1, &val_0).context(error::ArrowComputeSnafu)?;
|
||||||
let ts = &columns[1].to_arrow_array();
|
let ts = &columns[1].to_arrow_array();
|
||||||
let ts_0 = ts.slice(0, ts.len() - 1);
|
let ts_0 = ts.slice(0, ts.len() - 1);
|
||||||
let ts_1 = ts.slice(1, ts.len() - 1);
|
let ts_1 = ts.slice(1, ts.len() - 1);
|
||||||
let dt = arrow::compute::arithmetics::sub(&*ts_1, &*ts_0);
|
let dt = arithmetic::subtract_dyn(&ts_1, &ts_0).context(error::ArrowComputeSnafu)?;
|
||||||
fn all_to_f64(array: &dyn Array) -> Result<Box<dyn Array>> {
|
|
||||||
Ok(arrow::compute::cast::cast(
|
let dv = cast::cast(&dv, &DataType::Float64).context(error::TypeCastSnafu {
|
||||||
array,
|
typ: DataType::Float64,
|
||||||
&arrow::datatypes::DataType::Float64,
|
})?;
|
||||||
arrow::compute::cast::CastOptions {
|
let dt = cast::cast(&dt, &DataType::Float64).context(error::TypeCastSnafu {
|
||||||
wrapped: true,
|
typ: DataType::Float64,
|
||||||
partial: true,
|
})?;
|
||||||
},
|
let rate = arithmetic::divide_dyn(&dv, &dt).context(error::ArrowComputeSnafu)?;
|
||||||
)
|
let v = Helper::try_into_vector(&rate).context(error::FromArrowArraySnafu)?;
|
||||||
.context(TypeCastSnafu {
|
|
||||||
typ: arrow::datatypes::DataType::Float64,
|
|
||||||
})?)
|
|
||||||
}
|
|
||||||
let dv = all_to_f64(&*dv)?;
|
|
||||||
let dt = all_to_f64(&*dt)?;
|
|
||||||
let rate = arrow::compute::arithmetics::div(&*dv, &*dt);
|
|
||||||
let v = Helper::try_into_vector(&rate).context(FromArrowArraySnafu)?;
|
|
||||||
Ok(v)
|
Ok(v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -81,9 +74,8 @@ impl Function for RateFunction {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow::array::Float64Array;
|
|
||||||
use common_query::prelude::TypeSignature;
|
use common_query::prelude::TypeSignature;
|
||||||
use datatypes::vectors::{Float32Vector, Int64Vector};
|
use datatypes::vectors::{Float32Vector, Float64Vector, Int64Vector};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
@@ -108,9 +100,7 @@ mod tests {
|
|||||||
Arc::new(Int64Vector::from_vec(ts)),
|
Arc::new(Int64Vector::from_vec(ts)),
|
||||||
];
|
];
|
||||||
let vector = rate.eval(FunctionContext::default(), &args).unwrap();
|
let vector = rate.eval(FunctionContext::default(), &args).unwrap();
|
||||||
let arr = vector.to_arrow_array();
|
let expect: VectorRef = Arc::new(Float64Vector::from_vec(vec![2.0, 3.0]));
|
||||||
let expect = Arc::new(Float64Array::from_vec(vec![2.0, 3.0]));
|
assert_eq!(expect, vector);
|
||||||
let res = arrow::compute::comparison::eq(&*arr, &*expect);
|
|
||||||
res.iter().for_each(|x| assert!(matches!(x, Some(true))));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,7 +13,6 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
mod clip;
|
mod clip;
|
||||||
#[allow(unused)]
|
|
||||||
mod interp;
|
mod interp;
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -15,14 +15,15 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use common_query::error::Result;
|
||||||
use common_query::prelude::{Signature, Volatility};
|
use common_query::prelude::{Signature, Volatility};
|
||||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
use datatypes::arrow::compute;
|
||||||
use datatypes::prelude::{Scalar, VectorRef};
|
use datatypes::arrow::datatypes::ArrowPrimitiveType;
|
||||||
use datatypes::with_match_primitive_type_id;
|
use datatypes::data_type::ConcreteDataType;
|
||||||
use num_traits::AsPrimitive;
|
use datatypes::prelude::*;
|
||||||
|
use datatypes::vectors::PrimitiveVector;
|
||||||
use paste::paste;
|
use paste::paste;
|
||||||
|
|
||||||
use crate::error::Result;
|
|
||||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||||
use crate::scalars::function::{Function, FunctionContext};
|
use crate::scalars::function::{Function, FunctionContext};
|
||||||
|
|
||||||
@@ -34,25 +35,32 @@ macro_rules! define_eval {
|
|||||||
($O: ident) => {
|
($O: ident) => {
|
||||||
paste! {
|
paste! {
|
||||||
fn [<eval_ $O>](columns: &[VectorRef]) -> Result<VectorRef> {
|
fn [<eval_ $O>](columns: &[VectorRef]) -> Result<VectorRef> {
|
||||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
fn cast_vector(input: &VectorRef) -> VectorRef {
|
||||||
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
|
Arc::new(PrimitiveVector::<<$O as WrapperType>::LogicalType>::try_from_arrow_array(
|
||||||
with_match_primitive_type_id!(columns[2].data_type().logical_type_id(), |$R| {
|
compute::cast(&input.to_arrow_array(), &<<<$O as WrapperType>::LogicalType as LogicalPrimitiveType>::ArrowPrimitive as ArrowPrimitiveType>::DATA_TYPE).unwrap()
|
||||||
// clip(a, min, max) is equals to min(max(a, min), max)
|
).unwrap()) as _
|
||||||
let col: VectorRef = Arc::new(scalar_binary_op::<$S, $T, $O, _>(&columns[0], &columns[1], scalar_max, &mut EvalContext::default())?);
|
}
|
||||||
let col = scalar_binary_op::<$O, $R, $O, _>(&col, &columns[2], scalar_min, &mut EvalContext::default())?;
|
let operator_1 = cast_vector(&columns[0]);
|
||||||
Ok(Arc::new(col))
|
let operator_2 = cast_vector(&columns[1]);
|
||||||
}, {
|
let operator_3 = cast_vector(&columns[2]);
|
||||||
unreachable!()
|
|
||||||
})
|
// clip(a, min, max) is equals to min(max(a, min), max)
|
||||||
}, {
|
let col: VectorRef = Arc::new(scalar_binary_op::<$O, $O, $O, _>(
|
||||||
unreachable!()
|
&operator_1,
|
||||||
})
|
&operator_2,
|
||||||
}, {
|
scalar_max,
|
||||||
unreachable!()
|
&mut EvalContext::default(),
|
||||||
})
|
)?);
|
||||||
|
let col = scalar_binary_op::<$O, $O, $O, _>(
|
||||||
|
&col,
|
||||||
|
&operator_3,
|
||||||
|
scalar_min,
|
||||||
|
&mut EvalContext::default(),
|
||||||
|
)?;
|
||||||
|
Ok(Arc::new(col))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
define_eval!(i64);
|
define_eval!(i64);
|
||||||
@@ -108,27 +116,23 @@ pub fn max<T: PartialOrd>(input: T, max: T) -> T {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn scalar_min<S, T, O>(left: Option<S>, right: Option<T>, _ctx: &mut EvalContext) -> Option<O>
|
fn scalar_min<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
|
||||||
where
|
where
|
||||||
S: AsPrimitive<O>,
|
|
||||||
T: AsPrimitive<O>,
|
|
||||||
O: Scalar + Copy + PartialOrd,
|
O: Scalar + Copy + PartialOrd,
|
||||||
{
|
{
|
||||||
match (left, right) {
|
match (left, right) {
|
||||||
(Some(left), Some(right)) => Some(min(left.as_(), right.as_())),
|
(Some(left), Some(right)) => Some(min(left, right)),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn scalar_max<S, T, O>(left: Option<S>, right: Option<T>, _ctx: &mut EvalContext) -> Option<O>
|
fn scalar_max<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
|
||||||
where
|
where
|
||||||
S: AsPrimitive<O>,
|
|
||||||
T: AsPrimitive<O>,
|
|
||||||
O: Scalar + Copy + PartialOrd,
|
O: Scalar + Copy + PartialOrd,
|
||||||
{
|
{
|
||||||
match (left, right) {
|
match (left, right) {
|
||||||
(Some(left), Some(right)) => Some(max(left.as_(), right.as_())),
|
(Some(left), Some(right)) => Some(max(left, right)),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -143,11 +147,15 @@ impl fmt::Display for ClipFunction {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use common_query::prelude::TypeSignature;
|
use common_query::prelude::TypeSignature;
|
||||||
use datatypes::value::Value;
|
use datatypes::value::Value;
|
||||||
use datatypes::vectors::{ConstantVector, Float32Vector, Int32Vector, UInt32Vector};
|
use datatypes::vectors::{
|
||||||
|
ConstantVector, Float32Vector, Int16Vector, Int32Vector, Int8Vector, UInt16Vector,
|
||||||
|
UInt32Vector, UInt8Vector,
|
||||||
|
};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_clip_function() {
|
fn test_clip_signature() {
|
||||||
let clip = ClipFunction::default();
|
let clip = ClipFunction::default();
|
||||||
|
|
||||||
assert_eq!("clip", clip.name());
|
assert_eq!("clip", clip.name());
|
||||||
@@ -190,16 +198,21 @@ mod tests {
|
|||||||
volatility: Volatility::Immutable
|
volatility: Volatility::Immutable
|
||||||
} if valid_types == ConcreteDataType::numerics()
|
} if valid_types == ConcreteDataType::numerics()
|
||||||
));
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_clip_fn_signed() {
|
||||||
|
let clip = ClipFunction::default();
|
||||||
|
|
||||||
// eval with signed integers
|
// eval with signed integers
|
||||||
let args: Vec<VectorRef> = vec![
|
let args: Vec<VectorRef> = vec![
|
||||||
Arc::new(Int32Vector::from_values(0..10)),
|
Arc::new(Int32Vector::from_values(0..10)),
|
||||||
Arc::new(ConstantVector::new(
|
Arc::new(ConstantVector::new(
|
||||||
Arc::new(Int32Vector::from_vec(vec![3])),
|
Arc::new(Int8Vector::from_vec(vec![3])),
|
||||||
10,
|
10,
|
||||||
)),
|
)),
|
||||||
Arc::new(ConstantVector::new(
|
Arc::new(ConstantVector::new(
|
||||||
Arc::new(Int32Vector::from_vec(vec![6])),
|
Arc::new(Int16Vector::from_vec(vec![6])),
|
||||||
10,
|
10,
|
||||||
)),
|
)),
|
||||||
];
|
];
|
||||||
@@ -217,16 +230,21 @@ mod tests {
|
|||||||
assert!(matches!(vector.get(i), Value::Int64(v) if v == 6));
|
assert!(matches!(vector.get(i), Value::Int64(v) if v == 6));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_clip_fn_unsigned() {
|
||||||
|
let clip = ClipFunction::default();
|
||||||
|
|
||||||
// eval with unsigned integers
|
// eval with unsigned integers
|
||||||
let args: Vec<VectorRef> = vec![
|
let args: Vec<VectorRef> = vec![
|
||||||
Arc::new(UInt32Vector::from_values(0..10)),
|
Arc::new(UInt8Vector::from_values(0..10)),
|
||||||
Arc::new(ConstantVector::new(
|
Arc::new(ConstantVector::new(
|
||||||
Arc::new(UInt32Vector::from_vec(vec![3])),
|
Arc::new(UInt32Vector::from_vec(vec![3])),
|
||||||
10,
|
10,
|
||||||
)),
|
)),
|
||||||
Arc::new(ConstantVector::new(
|
Arc::new(ConstantVector::new(
|
||||||
Arc::new(UInt32Vector::from_vec(vec![6])),
|
Arc::new(UInt16Vector::from_vec(vec![6])),
|
||||||
10,
|
10,
|
||||||
)),
|
)),
|
||||||
];
|
];
|
||||||
@@ -244,12 +262,17 @@ mod tests {
|
|||||||
assert!(matches!(vector.get(i), Value::UInt64(v) if v == 6));
|
assert!(matches!(vector.get(i), Value::UInt64(v) if v == 6));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_clip_fn_float() {
|
||||||
|
let clip = ClipFunction::default();
|
||||||
|
|
||||||
// eval with floats
|
// eval with floats
|
||||||
let args: Vec<VectorRef> = vec![
|
let args: Vec<VectorRef> = vec![
|
||||||
Arc::new(Int32Vector::from_values(0..10)),
|
Arc::new(Int8Vector::from_values(0..10)),
|
||||||
Arc::new(ConstantVector::new(
|
Arc::new(ConstantVector::new(
|
||||||
Arc::new(Int32Vector::from_vec(vec![3])),
|
Arc::new(UInt32Vector::from_vec(vec![3])),
|
||||||
10,
|
10,
|
||||||
)),
|
)),
|
||||||
Arc::new(ConstantVector::new(
|
Arc::new(ConstantVector::new(
|
||||||
|
|||||||
@@ -14,41 +14,18 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datatypes::arrow::array::PrimitiveArray;
|
use common_query::error::{self, Result};
|
||||||
use datatypes::arrow::compute::cast::primitive_to_primitive;
|
use datatypes::arrow::compute::cast;
|
||||||
use datatypes::arrow::datatypes::DataType::Float64;
|
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||||
use datatypes::data_type::DataType;
|
use datatypes::data_type::DataType;
|
||||||
use datatypes::prelude::ScalarVector;
|
use datatypes::prelude::ScalarVector;
|
||||||
use datatypes::type_id::LogicalTypeId;
|
|
||||||
use datatypes::value::Value;
|
use datatypes::value::Value;
|
||||||
use datatypes::vectors::{Float64Vector, PrimitiveVector, Vector, VectorRef};
|
use datatypes::vectors::{Float64Vector, Vector, VectorRef};
|
||||||
use datatypes::{arrow, with_match_primitive_type_id};
|
use datatypes::with_match_primitive_type_id;
|
||||||
use snafu::{ensure, Snafu};
|
use snafu::{ensure, ResultExt};
|
||||||
|
|
||||||
#[derive(Debug, Snafu)]
|
|
||||||
pub enum Error {
|
|
||||||
#[snafu(display(
|
|
||||||
"The length of the args is not enough, expect at least: {}, have: {}",
|
|
||||||
expect,
|
|
||||||
actual,
|
|
||||||
))]
|
|
||||||
ArgsLenNotEnough { expect: usize, actual: usize },
|
|
||||||
|
|
||||||
#[snafu(display("The sample {} is empty", name))]
|
|
||||||
SampleEmpty { name: String },
|
|
||||||
|
|
||||||
#[snafu(display(
|
|
||||||
"The length of the len1: {} don't match the length of the len2: {}",
|
|
||||||
len1,
|
|
||||||
len2,
|
|
||||||
))]
|
|
||||||
LenNotEquals { len1: usize, len2: usize },
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
|
||||||
|
|
||||||
/* search the biggest number that smaller than x in xp */
|
/* search the biggest number that smaller than x in xp */
|
||||||
fn linear_search_ascending_vector(x: Value, xp: &PrimitiveVector<f64>) -> usize {
|
fn linear_search_ascending_vector(x: Value, xp: &Float64Vector) -> usize {
|
||||||
for i in 0..xp.len() {
|
for i in 0..xp.len() {
|
||||||
if x < xp.get(i) {
|
if x < xp.get(i) {
|
||||||
return i - 1;
|
return i - 1;
|
||||||
@@ -58,7 +35,7 @@ fn linear_search_ascending_vector(x: Value, xp: &PrimitiveVector<f64>) -> usize
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* search the biggest number that smaller than x in xp */
|
/* search the biggest number that smaller than x in xp */
|
||||||
fn binary_search_ascending_vector(key: Value, xp: &PrimitiveVector<f64>) -> usize {
|
fn binary_search_ascending_vector(key: Value, xp: &Float64Vector) -> usize {
|
||||||
let mut left = 0;
|
let mut left = 0;
|
||||||
let mut right = xp.len();
|
let mut right = xp.len();
|
||||||
/* If len <= 4 use linear search. */
|
/* If len <= 4 use linear search. */
|
||||||
@@ -77,27 +54,33 @@ fn binary_search_ascending_vector(key: Value, xp: &PrimitiveVector<f64>) -> usiz
|
|||||||
left - 1
|
left - 1
|
||||||
}
|
}
|
||||||
|
|
||||||
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<PrimitiveVector<f64>> {
|
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<Float64Vector> {
|
||||||
with_match_primitive_type_id!(arg.data_type().logical_type_id(), |$S| {
|
with_match_primitive_type_id!(arg.data_type().logical_type_id(), |$S| {
|
||||||
let tmp = arg.to_arrow_array();
|
let tmp = arg.to_arrow_array();
|
||||||
let from = tmp.as_any().downcast_ref::<PrimitiveArray<$S>>().expect("cast failed");
|
let array = cast(&tmp, &ArrowDataType::Float64).context(error::TypeCastSnafu {
|
||||||
let array = primitive_to_primitive(from, &Float64);
|
typ: ArrowDataType::Float64,
|
||||||
Ok(PrimitiveVector::new(array))
|
})?;
|
||||||
|
// Safety: array has been cast to Float64Array.
|
||||||
|
Ok(Float64Vector::try_from_arrow_array(array).unwrap())
|
||||||
},{
|
},{
|
||||||
unreachable!()
|
unreachable!()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// https://github.com/numpy/numpy/blob/b101756ac02e390d605b2febcded30a1da50cc2c/numpy/core/src/multiarray/compiled_base.c#L491
|
/// https://github.com/numpy/numpy/blob/b101756ac02e390d605b2febcded30a1da50cc2c/numpy/core/src/multiarray/compiled_base.c#L491
|
||||||
|
#[allow(unused)]
|
||||||
pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||||
let mut left = None;
|
let mut left = None;
|
||||||
let mut right = None;
|
let mut right = None;
|
||||||
|
|
||||||
ensure!(
|
ensure!(
|
||||||
args.len() >= 3,
|
args.len() >= 3,
|
||||||
ArgsLenNotEnoughSnafu {
|
error::InvalidFuncArgsSnafu {
|
||||||
expect: 3_usize,
|
err_msg: format!(
|
||||||
actual: args.len()
|
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||||
|
3,
|
||||||
|
args.len()
|
||||||
|
),
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -109,9 +92,12 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
|||||||
if args.len() > 3 {
|
if args.len() > 3 {
|
||||||
ensure!(
|
ensure!(
|
||||||
args.len() == 5,
|
args.len() == 5,
|
||||||
ArgsLenNotEnoughSnafu {
|
error::InvalidFuncArgsSnafu {
|
||||||
expect: 5_usize,
|
err_msg: format!(
|
||||||
actual: args.len()
|
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||||
|
5,
|
||||||
|
args.len()
|
||||||
|
),
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -123,14 +109,32 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
|||||||
.get_data(0);
|
.get_data(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
ensure!(x.len() != 0, SampleEmptySnafu { name: "x" });
|
ensure!(
|
||||||
ensure!(xp.len() != 0, SampleEmptySnafu { name: "xp" });
|
x.len() != 0,
|
||||||
ensure!(fp.len() != 0, SampleEmptySnafu { name: "fp" });
|
error::InvalidFuncArgsSnafu {
|
||||||
|
err_msg: "The sample x is empty",
|
||||||
|
}
|
||||||
|
);
|
||||||
|
ensure!(
|
||||||
|
xp.len() != 0,
|
||||||
|
error::InvalidFuncArgsSnafu {
|
||||||
|
err_msg: "The sample xp is empty",
|
||||||
|
}
|
||||||
|
);
|
||||||
|
ensure!(
|
||||||
|
fp.len() != 0,
|
||||||
|
error::InvalidFuncArgsSnafu {
|
||||||
|
err_msg: "The sample fp is empty",
|
||||||
|
}
|
||||||
|
);
|
||||||
ensure!(
|
ensure!(
|
||||||
xp.len() == fp.len(),
|
xp.len() == fp.len(),
|
||||||
LenNotEqualsSnafu {
|
error::InvalidFuncArgsSnafu {
|
||||||
len1: xp.len(),
|
err_msg: format!(
|
||||||
len2: fp.len(),
|
"The length of the len1: {} don't match the length of the len2: {}",
|
||||||
|
xp.len(),
|
||||||
|
fp.len()
|
||||||
|
),
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -147,7 +151,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
|||||||
|
|
||||||
let res;
|
let res;
|
||||||
if xp.len() == 1 {
|
if xp.len() == 1 {
|
||||||
res = x
|
let datas = x
|
||||||
.iter_data()
|
.iter_data()
|
||||||
.map(|x| {
|
.map(|x| {
|
||||||
if Value::from(x) < xp.get(0) {
|
if Value::from(x) < xp.get(0) {
|
||||||
@@ -158,7 +162,8 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
|||||||
fp.get_data(0)
|
fp.get_data(0)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect::<Float64Vector>();
|
.collect::<Vec<_>>();
|
||||||
|
res = Float64Vector::from(datas);
|
||||||
} else {
|
} else {
|
||||||
let mut j = 0;
|
let mut j = 0;
|
||||||
/* only pre-calculate slopes if there are relatively few of them. */
|
/* only pre-calculate slopes if there are relatively few of them. */
|
||||||
@@ -185,7 +190,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
|||||||
}
|
}
|
||||||
slopes = Some(slopes_tmp);
|
slopes = Some(slopes_tmp);
|
||||||
}
|
}
|
||||||
res = x
|
let datas = x
|
||||||
.iter_data()
|
.iter_data()
|
||||||
.map(|x| match x {
|
.map(|x| match x {
|
||||||
Some(xi) => {
|
Some(xi) => {
|
||||||
@@ -248,7 +253,8 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
|||||||
}
|
}
|
||||||
_ => None,
|
_ => None,
|
||||||
})
|
})
|
||||||
.collect::<Float64Vector>();
|
.collect::<Vec<_>>();
|
||||||
|
res = Float64Vector::from(datas);
|
||||||
}
|
}
|
||||||
Ok(Arc::new(res) as _)
|
Ok(Arc::new(res) as _)
|
||||||
}
|
}
|
||||||
@@ -257,8 +263,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datatypes::prelude::ScalarVectorBuilder;
|
use datatypes::vectors::{Int32Vector, Int64Vector};
|
||||||
use datatypes::vectors::{Int32Vector, Int64Vector, PrimitiveVectorBuilder};
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
@@ -338,15 +343,11 @@ mod tests {
|
|||||||
Arc::new(Int64Vector::from_vec(fp.clone())),
|
Arc::new(Int64Vector::from_vec(fp.clone())),
|
||||||
];
|
];
|
||||||
let vector = interp(&args).unwrap();
|
let vector = interp(&args).unwrap();
|
||||||
assert!(matches!(vector.get(0), Value::Float64(v) if v==x[0] as f64));
|
assert!(matches!(vector.get(0), Value::Float64(v) if v == x[0]));
|
||||||
|
|
||||||
// x=None output:Null
|
// x=None output:Null
|
||||||
let input = [None, Some(0.0), Some(0.3)];
|
let input = vec![None, Some(0.0), Some(0.3)];
|
||||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
let x = Float64Vector::from(input);
|
||||||
for v in input {
|
|
||||||
builder.push(v);
|
|
||||||
}
|
|
||||||
let x = builder.finish();
|
|
||||||
let args: Vec<VectorRef> = vec![
|
let args: Vec<VectorRef> = vec![
|
||||||
Arc::new(x),
|
Arc::new(x),
|
||||||
Arc::new(Int64Vector::from_vec(xp)),
|
Arc::new(Int64Vector::from_vec(xp)),
|
||||||
|
|||||||
@@ -15,11 +15,11 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use common_query::error::Result;
|
||||||
use common_query::prelude::{Signature, Volatility};
|
use common_query::prelude::{Signature, Volatility};
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use datatypes::data_type::ConcreteDataType;
|
||||||
use datatypes::prelude::VectorRef;
|
use datatypes::prelude::VectorRef;
|
||||||
|
|
||||||
use crate::error::Result;
|
|
||||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||||
use crate::scalars::function::{Function, FunctionContext};
|
use crate::scalars::function::{Function, FunctionContext};
|
||||||
|
|
||||||
|
|||||||
@@ -17,16 +17,17 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
|
use common_query::error::{
|
||||||
|
ArrowComputeSnafu, IntoVectorSnafu, Result, TypeCastSnafu, UnsupportedInputDataTypeSnafu,
|
||||||
|
};
|
||||||
use common_query::prelude::{Signature, Volatility};
|
use common_query::prelude::{Signature, Volatility};
|
||||||
use datatypes::arrow::compute::arithmetics;
|
use datatypes::arrow::compute;
|
||||||
use datatypes::arrow::datatypes::DataType as ArrowDatatype;
|
use datatypes::arrow::datatypes::{DataType as ArrowDatatype, Int64Type};
|
||||||
use datatypes::arrow::scalar::PrimitiveScalar;
|
use datatypes::data_type::DataType;
|
||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
use datatypes::vectors::{TimestampVector, VectorRef};
|
use datatypes::vectors::{TimestampMillisecondVector, VectorRef};
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
|
|
||||||
use crate::error::Result;
|
|
||||||
use crate::scalars::function::{Function, FunctionContext};
|
use crate::scalars::function::{Function, FunctionContext};
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
@@ -40,7 +41,7 @@ impl Function for FromUnixtimeFunction {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||||
Ok(ConcreteDataType::timestamp_millis_datatype())
|
Ok(ConcreteDataType::timestamp_millisecond_datatype())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn signature(&self) -> Signature {
|
fn signature(&self) -> Signature {
|
||||||
@@ -56,14 +57,18 @@ impl Function for FromUnixtimeFunction {
|
|||||||
ConcreteDataType::Int64(_) => {
|
ConcreteDataType::Int64(_) => {
|
||||||
let array = columns[0].to_arrow_array();
|
let array = columns[0].to_arrow_array();
|
||||||
// Our timestamp vector's time unit is millisecond
|
// Our timestamp vector's time unit is millisecond
|
||||||
let array = arithmetics::mul_scalar(
|
let array = compute::multiply_scalar_dyn::<Int64Type>(&array, 1000i64)
|
||||||
&*array,
|
.context(ArrowComputeSnafu)?;
|
||||||
&PrimitiveScalar::new(ArrowDatatype::Int64, Some(1000i64)),
|
|
||||||
);
|
|
||||||
|
|
||||||
|
let arrow_datatype = &self.return_type(&[]).unwrap().as_arrow_type();
|
||||||
Ok(Arc::new(
|
Ok(Arc::new(
|
||||||
TimestampVector::try_from_arrow_array(array).context(IntoVectorSnafu {
|
TimestampMillisecondVector::try_from_arrow_array(
|
||||||
data_type: ArrowDatatype::Int64,
|
compute::cast(&array, arrow_datatype).context(TypeCastSnafu {
|
||||||
|
typ: ArrowDatatype::Int64,
|
||||||
|
})?,
|
||||||
|
)
|
||||||
|
.context(IntoVectorSnafu {
|
||||||
|
data_type: arrow_datatype.clone(),
|
||||||
})?,
|
})?,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
@@ -71,8 +76,7 @@ impl Function for FromUnixtimeFunction {
|
|||||||
function: NAME,
|
function: NAME,
|
||||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||||
}
|
}
|
||||||
.fail()
|
.fail(),
|
||||||
.map_err(|e| e.into()),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -96,7 +100,7 @@ mod tests {
|
|||||||
let f = FromUnixtimeFunction::default();
|
let f = FromUnixtimeFunction::default();
|
||||||
assert_eq!("from_unixtime", f.name());
|
assert_eq!("from_unixtime", f.name());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
ConcreteDataType::timestamp_millis_datatype(),
|
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||||
f.return_type(&[]).unwrap()
|
f.return_type(&[]).unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -19,7 +19,8 @@ use common_query::prelude::{
|
|||||||
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf, ScalarValue,
|
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf, ScalarValue,
|
||||||
};
|
};
|
||||||
use datatypes::error::Error as DataTypeError;
|
use datatypes::error::Error as DataTypeError;
|
||||||
use datatypes::prelude::{ConcreteDataType, VectorHelper};
|
use datatypes::prelude::*;
|
||||||
|
use datatypes::vectors::Helper;
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
|
|
||||||
use crate::scalars::function::{FunctionContext, FunctionRef};
|
use crate::scalars::function::{FunctionContext, FunctionRef};
|
||||||
@@ -47,7 +48,7 @@ pub fn create_udf(func: FunctionRef) -> ScalarUdf {
|
|||||||
let args: Result<Vec<_>, DataTypeError> = args
|
let args: Result<Vec<_>, DataTypeError> = args
|
||||||
.iter()
|
.iter()
|
||||||
.map(|arg| match arg {
|
.map(|arg| match arg {
|
||||||
ColumnarValue::Scalar(v) => VectorHelper::try_from_scalar_value(v.clone(), rows),
|
ColumnarValue::Scalar(v) => Helper::try_from_scalar_value(v.clone(), rows),
|
||||||
ColumnarValue::Vector(v) => Ok(v.clone()),
|
ColumnarValue::Vector(v) => Ok(v.clone()),
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
@@ -126,12 +127,7 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!(4, vec.len());
|
assert_eq!(4, vec.len());
|
||||||
for i in 0..4 {
|
for i in 0..4 {
|
||||||
assert_eq!(
|
assert_eq!(i == 0 || i == 3, vec.get_data(i).unwrap(), "Failed at {i}",)
|
||||||
i == 0 || i == 3,
|
|
||||||
vec.get_data(i).unwrap(),
|
|
||||||
"failed at {}",
|
|
||||||
i
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "common-grpc-expr"
|
name = "common-grpc-expr"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
api = { path = "../../api" }
|
api = { path = "../../api" }
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use api::v1::alter_expr::Kind;
|
use api::v1::alter_expr::Kind;
|
||||||
use api::v1::{AlterExpr, CreateExpr, DropColumns};
|
use api::v1::{AlterExpr, CreateTableExpr, DropColumns};
|
||||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||||
use snafu::{ensure, OptionExt, ResultExt};
|
use snafu::{ensure, OptionExt, ResultExt};
|
||||||
@@ -29,6 +29,16 @@ use crate::error::{
|
|||||||
|
|
||||||
/// Convert an [`AlterExpr`] to an optional [`AlterTableRequest`]
|
/// Convert an [`AlterExpr`] to an optional [`AlterTableRequest`]
|
||||||
pub fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest>> {
|
pub fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest>> {
|
||||||
|
let catalog_name = if expr.catalog_name.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(expr.catalog_name)
|
||||||
|
};
|
||||||
|
let schema_name = if expr.schema_name.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(expr.schema_name)
|
||||||
|
};
|
||||||
match expr.kind {
|
match expr.kind {
|
||||||
Some(Kind::AddColumns(add_columns)) => {
|
Some(Kind::AddColumns(add_columns)) => {
|
||||||
let add_column_requests = add_columns
|
let add_column_requests = add_columns
|
||||||
@@ -57,8 +67,8 @@ pub fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest
|
|||||||
};
|
};
|
||||||
|
|
||||||
let request = AlterTableRequest {
|
let request = AlterTableRequest {
|
||||||
catalog_name: expr.catalog_name,
|
catalog_name,
|
||||||
schema_name: expr.schema_name,
|
schema_name,
|
||||||
table_name: expr.table_name,
|
table_name: expr.table_name,
|
||||||
alter_kind,
|
alter_kind,
|
||||||
};
|
};
|
||||||
@@ -70,8 +80,8 @@ pub fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest
|
|||||||
};
|
};
|
||||||
|
|
||||||
let request = AlterTableRequest {
|
let request = AlterTableRequest {
|
||||||
catalog_name: expr.catalog_name,
|
catalog_name,
|
||||||
schema_name: expr.schema_name,
|
schema_name,
|
||||||
table_name: expr.table_name,
|
table_name: expr.table_name,
|
||||||
alter_kind,
|
alter_kind,
|
||||||
};
|
};
|
||||||
@@ -81,7 +91,7 @@ pub fn alter_expr_to_request(expr: AlterExpr) -> Result<Option<AlterTableRequest
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
|
pub fn create_table_schema(expr: &CreateTableExpr) -> Result<SchemaRef> {
|
||||||
let column_schemas = expr
|
let column_schemas = expr
|
||||||
.column_defs
|
.column_defs
|
||||||
.iter()
|
.iter()
|
||||||
@@ -96,7 +106,7 @@ pub fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
|
|||||||
.iter()
|
.iter()
|
||||||
.any(|column| column.name == expr.time_index),
|
.any(|column| column.name == expr.time_index),
|
||||||
MissingTimestampColumnSnafu {
|
MissingTimestampColumnSnafu {
|
||||||
msg: format!("CreateExpr: {:?}", expr)
|
msg: format!("CreateExpr: {expr:?}")
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -119,7 +129,10 @@ pub fn create_table_schema(expr: &CreateExpr) -> Result<SchemaRef> {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn create_expr_to_request(table_id: TableId, expr: CreateExpr) -> Result<CreateTableRequest> {
|
pub fn create_expr_to_request(
|
||||||
|
table_id: TableId,
|
||||||
|
expr: CreateTableExpr,
|
||||||
|
) -> Result<CreateTableRequest> {
|
||||||
let schema = create_table_schema(&expr)?;
|
let schema = create_table_schema(&expr)?;
|
||||||
let primary_key_indices = expr
|
let primary_key_indices = expr
|
||||||
.primary_keys
|
.primary_keys
|
||||||
@@ -134,12 +147,19 @@ pub fn create_expr_to_request(table_id: TableId, expr: CreateExpr) -> Result<Cre
|
|||||||
})
|
})
|
||||||
.collect::<Result<Vec<usize>>>()?;
|
.collect::<Result<Vec<usize>>>()?;
|
||||||
|
|
||||||
let catalog_name = expr
|
let mut catalog_name = expr.catalog_name;
|
||||||
.catalog_name
|
if catalog_name.is_empty() {
|
||||||
.unwrap_or_else(|| DEFAULT_CATALOG_NAME.to_string());
|
catalog_name = DEFAULT_CATALOG_NAME.to_string();
|
||||||
let schema_name = expr
|
}
|
||||||
.schema_name
|
let mut schema_name = expr.schema_name;
|
||||||
.unwrap_or_else(|| DEFAULT_SCHEMA_NAME.to_string());
|
if schema_name.is_empty() {
|
||||||
|
schema_name = DEFAULT_SCHEMA_NAME.to_string();
|
||||||
|
}
|
||||||
|
let desc = if expr.desc.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(expr.desc)
|
||||||
|
};
|
||||||
|
|
||||||
let region_ids = if expr.region_ids.is_empty() {
|
let region_ids = if expr.region_ids.is_empty() {
|
||||||
vec![0]
|
vec![0]
|
||||||
@@ -152,7 +172,7 @@ pub fn create_expr_to_request(table_id: TableId, expr: CreateExpr) -> Result<Cre
|
|||||||
catalog_name,
|
catalog_name,
|
||||||
schema_name,
|
schema_name,
|
||||||
table_name: expr.table_name,
|
table_name: expr.table_name,
|
||||||
desc: expr.desc,
|
desc,
|
||||||
schema,
|
schema,
|
||||||
region_numbers: region_ids,
|
region_numbers: region_ids,
|
||||||
primary_key_indices,
|
primary_key_indices,
|
||||||
@@ -171,8 +191,8 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_alter_expr_to_request() {
|
fn test_alter_expr_to_request() {
|
||||||
let expr = AlterExpr {
|
let expr = AlterExpr {
|
||||||
catalog_name: None,
|
catalog_name: "".to_string(),
|
||||||
schema_name: None,
|
schema_name: "".to_string(),
|
||||||
table_name: "monitor".to_string(),
|
table_name: "monitor".to_string(),
|
||||||
|
|
||||||
kind: Some(Kind::AddColumns(AddColumns {
|
kind: Some(Kind::AddColumns(AddColumns {
|
||||||
@@ -181,7 +201,7 @@ mod tests {
|
|||||||
name: "mem_usage".to_string(),
|
name: "mem_usage".to_string(),
|
||||||
datatype: ColumnDataType::Float64 as i32,
|
datatype: ColumnDataType::Float64 as i32,
|
||||||
is_nullable: false,
|
is_nullable: false,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
}),
|
}),
|
||||||
is_key: false,
|
is_key: false,
|
||||||
}],
|
}],
|
||||||
@@ -208,8 +228,8 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_drop_column_expr() {
|
fn test_drop_column_expr() {
|
||||||
let expr = AlterExpr {
|
let expr = AlterExpr {
|
||||||
catalog_name: Some("test_catalog".to_string()),
|
catalog_name: "test_catalog".to_string(),
|
||||||
schema_name: Some("test_schema".to_string()),
|
schema_name: "test_schema".to_string(),
|
||||||
table_name: "monitor".to_string(),
|
table_name: "monitor".to_string(),
|
||||||
|
|
||||||
kind: Some(Kind::DropColumns(DropColumns {
|
kind: Some(Kind::DropColumns(DropColumns {
|
||||||
|
|||||||
@@ -18,15 +18,15 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use api::helper::ColumnDataTypeWrapper;
|
use api::helper::ColumnDataTypeWrapper;
|
||||||
use api::v1::column::{SemanticType, Values};
|
use api::v1::column::{SemanticType, Values};
|
||||||
use api::v1::{AddColumn, AddColumns, Column, ColumnDataType, ColumnDef, CreateExpr};
|
use api::v1::{AddColumn, AddColumns, Column, ColumnDataType, ColumnDef, CreateTableExpr};
|
||||||
use common_base::BitVec;
|
use common_base::BitVec;
|
||||||
use common_time::timestamp::Timestamp;
|
use common_time::timestamp::Timestamp;
|
||||||
use common_time::{Date, DateTime};
|
use common_time::{Date, DateTime};
|
||||||
use datatypes::data_type::ConcreteDataType;
|
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||||
use datatypes::prelude::{ValueRef, VectorRef};
|
use datatypes::prelude::{ValueRef, VectorRef};
|
||||||
use datatypes::schema::SchemaRef;
|
use datatypes::schema::SchemaRef;
|
||||||
use datatypes::value::Value;
|
use datatypes::value::Value;
|
||||||
use datatypes::vectors::VectorBuilder;
|
use datatypes::vectors::MutableVector;
|
||||||
use snafu::{ensure, OptionExt, ResultExt};
|
use snafu::{ensure, OptionExt, ResultExt};
|
||||||
use table::metadata::TableId;
|
use table::metadata::TableId;
|
||||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequest};
|
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequest};
|
||||||
@@ -45,7 +45,7 @@ fn build_column_def(column_name: &str, datatype: i32, nullable: bool) -> ColumnD
|
|||||||
name: column_name.to_string(),
|
name: column_name.to_string(),
|
||||||
datatype,
|
datatype,
|
||||||
is_nullable: nullable,
|
is_nullable: nullable,
|
||||||
default_constraint: None,
|
default_constraint: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -99,7 +99,7 @@ pub fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
|
|||||||
let column_datatype = wrapper.datatype();
|
let column_datatype = wrapper.datatype();
|
||||||
|
|
||||||
let rows = rows as usize;
|
let rows = rows as usize;
|
||||||
let mut vector = VectorBuilder::with_capacity(wrapper.into(), rows);
|
let mut vector = ConcreteDataType::from(wrapper).create_mutable_vector(rows);
|
||||||
|
|
||||||
if let Some(values) = &column.values {
|
if let Some(values) = &column.values {
|
||||||
let values = collect_column_values(column_datatype, values);
|
let values = collect_column_values(column_datatype, values);
|
||||||
@@ -110,21 +110,31 @@ pub fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
|
|||||||
|
|
||||||
for i in 0..rows {
|
for i in 0..rows {
|
||||||
if let Some(true) = nulls_iter.next() {
|
if let Some(true) = nulls_iter.next() {
|
||||||
vector.push_null();
|
vector
|
||||||
|
.push_value_ref(ValueRef::Null)
|
||||||
|
.context(CreateVectorSnafu)?;
|
||||||
} else {
|
} else {
|
||||||
let value_ref = values_iter.next().context(InvalidColumnProtoSnafu {
|
let value_ref = values_iter
|
||||||
err_msg: format!(
|
.next()
|
||||||
"value not found at position {} of column {}",
|
.with_context(|| InvalidColumnProtoSnafu {
|
||||||
i, &column.column_name
|
err_msg: format!(
|
||||||
),
|
"value not found at position {} of column {}",
|
||||||
})?;
|
i, &column.column_name
|
||||||
vector.try_push_ref(value_ref).context(CreateVectorSnafu)?;
|
),
|
||||||
|
})?;
|
||||||
|
vector
|
||||||
|
.push_value_ref(value_ref)
|
||||||
|
.context(CreateVectorSnafu)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
(0..rows).for_each(|_| vector.push_null());
|
(0..rows).try_for_each(|_| {
|
||||||
|
vector
|
||||||
|
.push_value_ref(ValueRef::Null)
|
||||||
|
.context(CreateVectorSnafu)
|
||||||
|
})?;
|
||||||
}
|
}
|
||||||
Ok(vector.finish())
|
Ok(vector.to_vector())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Vec<ValueRef> {
|
fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Vec<ValueRef> {
|
||||||
@@ -144,7 +154,7 @@ fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Ve
|
|||||||
collect_values!(values.i32_values, |v| ValueRef::from(*v))
|
collect_values!(values.i32_values, |v| ValueRef::from(*v))
|
||||||
}
|
}
|
||||||
ColumnDataType::Int64 => {
|
ColumnDataType::Int64 => {
|
||||||
collect_values!(values.i64_values, |v| ValueRef::from(*v as i64))
|
collect_values!(values.i64_values, |v| ValueRef::from(*v))
|
||||||
}
|
}
|
||||||
ColumnDataType::Uint8 => {
|
ColumnDataType::Uint8 => {
|
||||||
collect_values!(values.u8_values, |v| ValueRef::from(*v as u8))
|
collect_values!(values.u8_values, |v| ValueRef::from(*v as u8))
|
||||||
@@ -156,7 +166,7 @@ fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Ve
|
|||||||
collect_values!(values.u32_values, |v| ValueRef::from(*v))
|
collect_values!(values.u32_values, |v| ValueRef::from(*v))
|
||||||
}
|
}
|
||||||
ColumnDataType::Uint64 => {
|
ColumnDataType::Uint64 => {
|
||||||
collect_values!(values.u64_values, |v| ValueRef::from(*v as u64))
|
collect_values!(values.u64_values, |v| ValueRef::from(*v))
|
||||||
}
|
}
|
||||||
ColumnDataType::Float32 => collect_values!(values.f32_values, |v| ValueRef::from(*v)),
|
ColumnDataType::Float32 => collect_values!(values.f32_values, |v| ValueRef::from(*v)),
|
||||||
ColumnDataType::Float64 => collect_values!(values.f64_values, |v| ValueRef::from(*v)),
|
ColumnDataType::Float64 => collect_values!(values.f64_values, |v| ValueRef::from(*v)),
|
||||||
@@ -174,9 +184,24 @@ fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Ve
|
|||||||
DateTime::new(*v)
|
DateTime::new(*v)
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
ColumnDataType::Timestamp => {
|
ColumnDataType::TimestampSecond => {
|
||||||
collect_values!(values.ts_millis_values, |v| ValueRef::Timestamp(
|
collect_values!(values.ts_second_values, |v| ValueRef::Timestamp(
|
||||||
Timestamp::from_millis(*v)
|
Timestamp::new_second(*v)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
ColumnDataType::TimestampMillisecond => {
|
||||||
|
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||||
|
Timestamp::new_millisecond(*v)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
ColumnDataType::TimestampMicrosecond => {
|
||||||
|
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||||
|
Timestamp::new_microsecond(*v)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
ColumnDataType::TimestampNanosecond => {
|
||||||
|
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||||
|
Timestamp::new_nanosecond(*v)
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -189,7 +214,7 @@ pub fn build_create_expr_from_insertion(
|
|||||||
table_id: Option<TableId>,
|
table_id: Option<TableId>,
|
||||||
table_name: &str,
|
table_name: &str,
|
||||||
columns: &[Column],
|
columns: &[Column],
|
||||||
) -> Result<CreateExpr> {
|
) -> Result<CreateTableExpr> {
|
||||||
let mut new_columns: HashSet<String> = HashSet::default();
|
let mut new_columns: HashSet<String> = HashSet::default();
|
||||||
let mut column_defs = Vec::default();
|
let mut column_defs = Vec::default();
|
||||||
let mut primary_key_indices = Vec::default();
|
let mut primary_key_indices = Vec::default();
|
||||||
@@ -238,17 +263,17 @@ pub fn build_create_expr_from_insertion(
|
|||||||
.map(|idx| columns[*idx].column_name.clone())
|
.map(|idx| columns[*idx].column_name.clone())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let expr = CreateExpr {
|
let expr = CreateTableExpr {
|
||||||
catalog_name: Some(catalog_name.to_string()),
|
catalog_name: catalog_name.to_string(),
|
||||||
schema_name: Some(schema_name.to_string()),
|
schema_name: schema_name.to_string(),
|
||||||
table_name: table_name.to_string(),
|
table_name: table_name.to_string(),
|
||||||
desc: Some("Created on insertion".to_string()),
|
desc: "Created on insertion".to_string(),
|
||||||
column_defs,
|
column_defs,
|
||||||
time_index: timestamp_field_name,
|
time_index: timestamp_field_name,
|
||||||
primary_keys,
|
primary_keys,
|
||||||
create_if_not_exists: true,
|
create_if_not_exists: true,
|
||||||
table_options: Default::default(),
|
table_options: Default::default(),
|
||||||
table_id,
|
table_id: table_id.map(|id| api::v1::TableId { id }),
|
||||||
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
region_ids: vec![0], // TODO:(hl): region id should be allocated by frontend
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -289,10 +314,7 @@ pub fn insertion_expr_to_request(
|
|||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
let data_type = &column_schema.data_type;
|
let data_type = &column_schema.data_type;
|
||||||
entry.insert(VectorBuilder::with_capacity(
|
entry.insert(data_type.create_mutable_vector(row_count as usize))
|
||||||
data_type.clone(),
|
|
||||||
row_count as usize,
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
add_values_to_builder(vector_builder, values, row_count as usize, null_mask)?;
|
add_values_to_builder(vector_builder, values, row_count as usize, null_mask)?;
|
||||||
@@ -300,7 +322,7 @@ pub fn insertion_expr_to_request(
|
|||||||
}
|
}
|
||||||
let columns_values = columns_builders
|
let columns_values = columns_builders
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(column_name, mut vector_builder)| (column_name, vector_builder.finish()))
|
.map(|(column_name, mut vector_builder)| (column_name, vector_builder.to_vector()))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
Ok(InsertRequest {
|
Ok(InsertRequest {
|
||||||
@@ -312,7 +334,7 @@ pub fn insertion_expr_to_request(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn add_values_to_builder(
|
fn add_values_to_builder(
|
||||||
builder: &mut VectorBuilder,
|
builder: &mut Box<dyn MutableVector>,
|
||||||
values: Values,
|
values: Values,
|
||||||
row_count: usize,
|
row_count: usize,
|
||||||
null_mask: Vec<u8>,
|
null_mask: Vec<u8>,
|
||||||
@@ -323,9 +345,11 @@ fn add_values_to_builder(
|
|||||||
if null_mask.is_empty() {
|
if null_mask.is_empty() {
|
||||||
ensure!(values.len() == row_count, IllegalInsertDataSnafu);
|
ensure!(values.len() == row_count, IllegalInsertDataSnafu);
|
||||||
|
|
||||||
values.iter().for_each(|value| {
|
values.iter().try_for_each(|value| {
|
||||||
builder.push(value);
|
builder
|
||||||
});
|
.push_value_ref(value.as_value_ref())
|
||||||
|
.context(CreateVectorSnafu)
|
||||||
|
})?;
|
||||||
} else {
|
} else {
|
||||||
let null_mask = BitVec::from_vec(null_mask);
|
let null_mask = BitVec::from_vec(null_mask);
|
||||||
ensure!(
|
ensure!(
|
||||||
@@ -336,9 +360,13 @@ fn add_values_to_builder(
|
|||||||
let mut idx_of_values = 0;
|
let mut idx_of_values = 0;
|
||||||
for idx in 0..row_count {
|
for idx in 0..row_count {
|
||||||
match is_null(&null_mask, idx) {
|
match is_null(&null_mask, idx) {
|
||||||
Some(true) => builder.push(&Value::Null),
|
Some(true) => builder
|
||||||
|
.push_value_ref(ValueRef::Null)
|
||||||
|
.context(CreateVectorSnafu)?,
|
||||||
_ => {
|
_ => {
|
||||||
builder.push(&values[idx_of_values]);
|
builder
|
||||||
|
.push_value_ref(values[idx_of_values].as_value_ref())
|
||||||
|
.context(CreateVectorSnafu)?;
|
||||||
idx_of_values += 1
|
idx_of_values += 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -418,9 +446,9 @@ fn convert_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
|
|||||||
.map(|v| Value::Date(v.into()))
|
.map(|v| Value::Date(v.into()))
|
||||||
.collect(),
|
.collect(),
|
||||||
ConcreteDataType::Timestamp(_) => values
|
ConcreteDataType::Timestamp(_) => values
|
||||||
.ts_millis_values
|
.ts_millisecond_values
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|v| Value::Timestamp(Timestamp::from_millis(v)))
|
.map(|v| Value::Timestamp(Timestamp::new_millisecond(v)))
|
||||||
.collect(),
|
.collect(),
|
||||||
ConcreteDataType::Null(_) => unreachable!(),
|
ConcreteDataType::Null(_) => unreachable!(),
|
||||||
ConcreteDataType::List(_) => unreachable!(),
|
ConcreteDataType::List(_) => unreachable!(),
|
||||||
@@ -488,9 +516,9 @@ mod tests {
|
|||||||
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batch.0)
|
build_create_expr_from_insertion("", "", table_id, table_name, &insert_batch.0)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(table_id, create_expr.table_id);
|
assert_eq!(table_id, create_expr.table_id.map(|x| x.id));
|
||||||
assert_eq!(table_name, create_expr.table_name);
|
assert_eq!(table_name, create_expr.table_name);
|
||||||
assert_eq!(Some("Created on insertion".to_string()), create_expr.desc);
|
assert_eq!("Created on insertion".to_string(), create_expr.desc);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
vec![create_expr.column_defs[0].name.clone()],
|
vec![create_expr.column_defs[0].name.clone()],
|
||||||
create_expr.primary_keys
|
create_expr.primary_keys
|
||||||
@@ -543,7 +571,7 @@ mod tests {
|
|||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
ConcreteDataType::timestamp_millis_datatype(),
|
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||||
ConcreteDataType::from(
|
ConcreteDataType::from(
|
||||||
ColumnDataTypeWrapper::try_new(
|
ColumnDataTypeWrapper::try_new(
|
||||||
column_defs
|
column_defs
|
||||||
@@ -624,8 +652,8 @@ mod tests {
|
|||||||
assert_eq!(Value::Float64(0.1.into()), memory.get(1));
|
assert_eq!(Value::Float64(0.1.into()), memory.get(1));
|
||||||
|
|
||||||
let ts = insert_req.columns_values.get("ts").unwrap();
|
let ts = insert_req.columns_values.get("ts").unwrap();
|
||||||
assert_eq!(Value::Timestamp(Timestamp::from_millis(100)), ts.get(0));
|
assert_eq!(Value::Timestamp(Timestamp::new_millisecond(100)), ts.get(0));
|
||||||
assert_eq!(Value::Timestamp(Timestamp::from_millis(101)), ts.get(1));
|
assert_eq!(Value::Timestamp(Timestamp::new_millisecond(101)), ts.get(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -675,8 +703,12 @@ mod tests {
|
|||||||
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
|
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
|
||||||
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
|
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
|
||||||
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
|
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
|
||||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), true)
|
ColumnSchema::new(
|
||||||
.with_time_index(true),
|
"ts",
|
||||||
|
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||||
|
true,
|
||||||
|
)
|
||||||
|
.with_time_index(true),
|
||||||
];
|
];
|
||||||
|
|
||||||
Arc::new(
|
Arc::new(
|
||||||
@@ -693,7 +725,7 @@ mod tests {
|
|||||||
|
|
||||||
async fn scan(
|
async fn scan(
|
||||||
&self,
|
&self,
|
||||||
_projection: &Option<Vec<usize>>,
|
_projection: Option<&Vec<usize>>,
|
||||||
_filters: &[Expr],
|
_filters: &[Expr],
|
||||||
_limit: Option<usize>,
|
_limit: Option<usize>,
|
||||||
) -> TableResult<PhysicalPlanRef> {
|
) -> TableResult<PhysicalPlanRef> {
|
||||||
@@ -741,7 +773,7 @@ mod tests {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let ts_vals = column::Values {
|
let ts_vals = column::Values {
|
||||||
ts_millis_values: vec![100, 101],
|
ts_millisecond_values: vec![100, 101],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let ts_column = Column {
|
let ts_column = Column {
|
||||||
@@ -749,7 +781,7 @@ mod tests {
|
|||||||
semantic_type: TIMESTAMP_SEMANTIC_TYPE,
|
semantic_type: TIMESTAMP_SEMANTIC_TYPE,
|
||||||
values: Some(ts_vals),
|
values: Some(ts_vals),
|
||||||
null_mask: vec![0],
|
null_mask: vec![0],
|
||||||
datatype: ColumnDataType::Timestamp as i32,
|
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||||
};
|
};
|
||||||
|
|
||||||
(
|
(
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
#![feature(assert_matches)]
|
|
||||||
// Copyright 2022 Greptime Team
|
// Copyright 2022 Greptime Team
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "common-grpc"
|
name = "common-grpc"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
api = { path = "../../api" }
|
api = { path = "../../api" }
|
||||||
@@ -13,9 +13,7 @@ common-query = { path = "../query" }
|
|||||||
common-recordbatch = { path = "../recordbatch" }
|
common-recordbatch = { path = "../recordbatch" }
|
||||||
common-runtime = { path = "../runtime" }
|
common-runtime = { path = "../runtime" }
|
||||||
dashmap = "5.4"
|
dashmap = "5.4"
|
||||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
datafusion.workspace = true
|
||||||
"simd",
|
|
||||||
] }
|
|
||||||
datatypes = { path = "../../datatypes" }
|
datatypes = { path = "../../datatypes" }
|
||||||
snafu = { version = "0.7", features = ["backtraces"] }
|
snafu = { version = "0.7", features = ["backtraces"] }
|
||||||
tokio = { version = "1.0", features = ["full"] }
|
tokio = { version = "1.0", features = ["full"] }
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ async fn do_bench_channel_manager() {
|
|||||||
let join = tokio::spawn(async move {
|
let join = tokio::spawn(async move {
|
||||||
for _ in 0..10000 {
|
for _ in 0..10000 {
|
||||||
let idx = rand::random::<usize>() % 100;
|
let idx = rand::random::<usize>() % 100;
|
||||||
let ret = m_clone.get(format!("{}", idx));
|
let ret = m_clone.get(format!("{idx}"));
|
||||||
assert!(ret.is_ok());
|
assert!(ret.is_ok());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -120,7 +120,7 @@ impl ChannelManager {
|
|||||||
|
|
||||||
fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
|
fn build_endpoint(&self, addr: &str) -> Result<Endpoint> {
|
||||||
let mut endpoint =
|
let mut endpoint =
|
||||||
Endpoint::new(format!("http://{}", addr)).context(error::CreateChannelSnafu)?;
|
Endpoint::new(format!("http://{addr}")).context(error::CreateChannelSnafu)?;
|
||||||
|
|
||||||
if let Some(dur) = self.config.timeout {
|
if let Some(dur) = self.config.timeout {
|
||||||
endpoint = endpoint.timeout(dur);
|
endpoint = endpoint.timeout(dur);
|
||||||
|
|||||||
@@ -12,8 +12,6 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
use api::helper::ColumnDataTypeWrapper;
|
use api::helper::ColumnDataTypeWrapper;
|
||||||
use api::result::{build_err_result, ObjectResultBuilder};
|
use api::result::{build_err_result, ObjectResultBuilder};
|
||||||
use api::v1::codec::SelectResult;
|
use api::v1::codec::SelectResult;
|
||||||
@@ -24,9 +22,14 @@ use common_error::prelude::ErrorExt;
|
|||||||
use common_error::status_code::StatusCode;
|
use common_error::status_code::StatusCode;
|
||||||
use common_query::Output;
|
use common_query::Output;
|
||||||
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
||||||
use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray};
|
|
||||||
use datatypes::arrow_array::{BinaryArray, StringArray};
|
|
||||||
use datatypes::schema::SchemaRef;
|
use datatypes::schema::SchemaRef;
|
||||||
|
use datatypes::types::{TimestampType, WrapperType};
|
||||||
|
use datatypes::vectors::{
|
||||||
|
BinaryVector, BooleanVector, DateTimeVector, DateVector, Float32Vector, Float64Vector,
|
||||||
|
Int16Vector, Int32Vector, Int64Vector, Int8Vector, StringVector, TimestampMicrosecondVector,
|
||||||
|
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
|
||||||
|
UInt32Vector, UInt64Vector, UInt8Vector, VectorRef,
|
||||||
|
};
|
||||||
use snafu::{OptionExt, ResultExt};
|
use snafu::{OptionExt, ResultExt};
|
||||||
|
|
||||||
use crate::error::{self, ConversionSnafu, Result};
|
use crate::error::{self, ConversionSnafu, Result};
|
||||||
@@ -46,6 +49,7 @@ pub async fn to_object_result(output: std::result::Result<Output, impl ErrorExt>
|
|||||||
Err(e) => build_err_result(&e),
|
Err(e) => build_err_result(&e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn collect(stream: SendableRecordBatchStream) -> Result<ObjectResult> {
|
async fn collect(stream: SendableRecordBatchStream) -> Result<ObjectResult> {
|
||||||
let recordbatches = RecordBatches::try_collect(stream)
|
let recordbatches = RecordBatches::try_collect(stream)
|
||||||
.await
|
.await
|
||||||
@@ -78,10 +82,7 @@ fn try_convert(record_batches: RecordBatches) -> Result<SelectResult> {
|
|||||||
let schema = record_batches.schema();
|
let schema = record_batches.schema();
|
||||||
let record_batches = record_batches.take();
|
let record_batches = record_batches.take();
|
||||||
|
|
||||||
let row_count: usize = record_batches
|
let row_count: usize = record_batches.iter().map(|r| r.num_rows()).sum();
|
||||||
.iter()
|
|
||||||
.map(|r| r.df_recordbatch.num_rows())
|
|
||||||
.sum();
|
|
||||||
|
|
||||||
let schemas = schema.column_schemas();
|
let schemas = schema.column_schemas();
|
||||||
let mut columns = Vec::with_capacity(schemas.len());
|
let mut columns = Vec::with_capacity(schemas.len());
|
||||||
@@ -89,9 +90,9 @@ fn try_convert(record_batches: RecordBatches) -> Result<SelectResult> {
|
|||||||
for (idx, column_schema) in schemas.iter().enumerate() {
|
for (idx, column_schema) in schemas.iter().enumerate() {
|
||||||
let column_name = column_schema.name.clone();
|
let column_name = column_schema.name.clone();
|
||||||
|
|
||||||
let arrays: Vec<Arc<dyn Array>> = record_batches
|
let arrays: Vec<_> = record_batches
|
||||||
.iter()
|
.iter()
|
||||||
.map(|r| r.df_recordbatch.columns()[idx].clone())
|
.map(|r| r.column(idx).clone())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let column = Column {
|
let column = Column {
|
||||||
@@ -112,7 +113,7 @@ fn try_convert(record_batches: RecordBatches) -> Result<SelectResult> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
|
pub fn null_mask(arrays: &[VectorRef], row_count: usize) -> Vec<u8> {
|
||||||
let null_count: usize = arrays.iter().map(|a| a.null_count()).sum();
|
let null_count: usize = arrays.iter().map(|a| a.null_count()).sum();
|
||||||
|
|
||||||
if null_count == 0 {
|
if null_count == 0 {
|
||||||
@@ -122,10 +123,12 @@ pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
|
|||||||
let mut null_mask = BitVec::with_capacity(row_count);
|
let mut null_mask = BitVec::with_capacity(row_count);
|
||||||
for array in arrays {
|
for array in arrays {
|
||||||
let validity = array.validity();
|
let validity = array.validity();
|
||||||
if let Some(v) = validity {
|
if validity.is_all_valid() {
|
||||||
v.iter().for_each(|x| null_mask.push(!x));
|
|
||||||
} else {
|
|
||||||
null_mask.extend_from_bitslice(&BitVec::repeat(false, array.len()));
|
null_mask.extend_from_bitslice(&BitVec::repeat(false, array.len()));
|
||||||
|
} else {
|
||||||
|
for i in 0..array.len() {
|
||||||
|
null_mask.push(!validity.is_set(i));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
null_mask.into_vec()
|
null_mask.into_vec()
|
||||||
@@ -133,7 +136,9 @@ pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
|
|||||||
|
|
||||||
macro_rules! convert_arrow_array_to_grpc_vals {
|
macro_rules! convert_arrow_array_to_grpc_vals {
|
||||||
($data_type: expr, $arrays: ident, $(($Type: pat, $CastType: ty, $field: ident, $MapFunction: expr)), +) => {{
|
($data_type: expr, $arrays: ident, $(($Type: pat, $CastType: ty, $field: ident, $MapFunction: expr)), +) => {{
|
||||||
use datatypes::arrow::datatypes::{DataType, TimeUnit};
|
use datatypes::data_type::{ConcreteDataType};
|
||||||
|
use datatypes::prelude::ScalarVector;
|
||||||
|
|
||||||
match $data_type {
|
match $data_type {
|
||||||
$(
|
$(
|
||||||
$Type => {
|
$Type => {
|
||||||
@@ -143,52 +148,114 @@ macro_rules! convert_arrow_array_to_grpc_vals {
|
|||||||
from: format!("{:?}", $data_type),
|
from: format!("{:?}", $data_type),
|
||||||
})?;
|
})?;
|
||||||
vals.$field.extend(array
|
vals.$field.extend(array
|
||||||
.iter()
|
.iter_data()
|
||||||
.filter_map(|i| i.map($MapFunction))
|
.filter_map(|i| i.map($MapFunction))
|
||||||
.collect::<Vec<_>>());
|
.collect::<Vec<_>>());
|
||||||
}
|
}
|
||||||
return Ok(vals);
|
return Ok(vals);
|
||||||
},
|
},
|
||||||
)+
|
)+
|
||||||
_ => unimplemented!(),
|
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
|
pub fn values(arrays: &[VectorRef]) -> Result<Values> {
|
||||||
if arrays.is_empty() {
|
if arrays.is_empty() {
|
||||||
return Ok(Values::default());
|
return Ok(Values::default());
|
||||||
}
|
}
|
||||||
let data_type = arrays[0].data_type();
|
let data_type = arrays[0].data_type();
|
||||||
|
|
||||||
convert_arrow_array_to_grpc_vals!(
|
convert_arrow_array_to_grpc_vals!(
|
||||||
data_type, arrays,
|
data_type,
|
||||||
|
arrays,
|
||||||
(DataType::Boolean, BooleanArray, bool_values, |x| {x}),
|
(
|
||||||
|
ConcreteDataType::Boolean(_),
|
||||||
(DataType::Int8, PrimitiveArray<i8>, i8_values, |x| {*x as i32}),
|
BooleanVector,
|
||||||
(DataType::Int16, PrimitiveArray<i16>, i16_values, |x| {*x as i32}),
|
bool_values,
|
||||||
(DataType::Int32, PrimitiveArray<i32>, i32_values, |x| {*x}),
|
|x| { x }
|
||||||
(DataType::Int64, PrimitiveArray<i64>, i64_values, |x| {*x}),
|
),
|
||||||
|
(ConcreteDataType::Int8(_), Int8Vector, i8_values, |x| {
|
||||||
(DataType::UInt8, PrimitiveArray<u8>, u8_values, |x| {*x as u32}),
|
i32::from(x)
|
||||||
(DataType::UInt16, PrimitiveArray<u16>, u16_values, |x| {*x as u32}),
|
}),
|
||||||
(DataType::UInt32, PrimitiveArray<u32>, u32_values, |x| {*x}),
|
(ConcreteDataType::Int16(_), Int16Vector, i16_values, |x| {
|
||||||
(DataType::UInt64, PrimitiveArray<u64>, u64_values, |x| {*x}),
|
i32::from(x)
|
||||||
|
}),
|
||||||
(DataType::Float32, PrimitiveArray<f32>, f32_values, |x| {*x}),
|
(ConcreteDataType::Int32(_), Int32Vector, i32_values, |x| {
|
||||||
(DataType::Float64, PrimitiveArray<f64>, f64_values, |x| {*x}),
|
x
|
||||||
|
}),
|
||||||
(DataType::Binary, BinaryArray, binary_values, |x| {x.into()}),
|
(ConcreteDataType::Int64(_), Int64Vector, i64_values, |x| {
|
||||||
(DataType::LargeBinary, BinaryArray, binary_values, |x| {x.into()}),
|
x
|
||||||
|
}),
|
||||||
(DataType::Utf8, StringArray, string_values, |x| {x.into()}),
|
(ConcreteDataType::UInt8(_), UInt8Vector, u8_values, |x| {
|
||||||
(DataType::LargeUtf8, StringArray, string_values, |x| {x.into()}),
|
u32::from(x)
|
||||||
|
}),
|
||||||
(DataType::Date32, PrimitiveArray<i32>, date_values, |x| {*x as i32}),
|
(ConcreteDataType::UInt16(_), UInt16Vector, u16_values, |x| {
|
||||||
(DataType::Date64, PrimitiveArray<i64>, datetime_values,|x| {*x as i64}),
|
u32::from(x)
|
||||||
|
}),
|
||||||
(DataType::Timestamp(TimeUnit::Millisecond, _), PrimitiveArray<i64>, ts_millis_values, |x| {*x})
|
(ConcreteDataType::UInt32(_), UInt32Vector, u32_values, |x| {
|
||||||
|
x
|
||||||
|
}),
|
||||||
|
(ConcreteDataType::UInt64(_), UInt64Vector, u64_values, |x| {
|
||||||
|
x
|
||||||
|
}),
|
||||||
|
(
|
||||||
|
ConcreteDataType::Float32(_),
|
||||||
|
Float32Vector,
|
||||||
|
f32_values,
|
||||||
|
|x| { x }
|
||||||
|
),
|
||||||
|
(
|
||||||
|
ConcreteDataType::Float64(_),
|
||||||
|
Float64Vector,
|
||||||
|
f64_values,
|
||||||
|
|x| { x }
|
||||||
|
),
|
||||||
|
(
|
||||||
|
ConcreteDataType::Binary(_),
|
||||||
|
BinaryVector,
|
||||||
|
binary_values,
|
||||||
|
|x| { x.into() }
|
||||||
|
),
|
||||||
|
(
|
||||||
|
ConcreteDataType::String(_),
|
||||||
|
StringVector,
|
||||||
|
string_values,
|
||||||
|
|x| { x.into() }
|
||||||
|
),
|
||||||
|
(ConcreteDataType::Date(_), DateVector, date_values, |x| {
|
||||||
|
x.val()
|
||||||
|
}),
|
||||||
|
(
|
||||||
|
ConcreteDataType::DateTime(_),
|
||||||
|
DateTimeVector,
|
||||||
|
datetime_values,
|
||||||
|
|x| { x.val() }
|
||||||
|
),
|
||||||
|
(
|
||||||
|
ConcreteDataType::Timestamp(TimestampType::Second(_)),
|
||||||
|
TimestampSecondVector,
|
||||||
|
ts_second_values,
|
||||||
|
|x| { x.into_native() }
|
||||||
|
),
|
||||||
|
(
|
||||||
|
ConcreteDataType::Timestamp(TimestampType::Millisecond(_)),
|
||||||
|
TimestampMillisecondVector,
|
||||||
|
ts_millisecond_values,
|
||||||
|
|x| { x.into_native() }
|
||||||
|
),
|
||||||
|
(
|
||||||
|
ConcreteDataType::Timestamp(TimestampType::Microsecond(_)),
|
||||||
|
TimestampMicrosecondVector,
|
||||||
|
ts_microsecond_values,
|
||||||
|
|x| { x.into_native() }
|
||||||
|
),
|
||||||
|
(
|
||||||
|
ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)),
|
||||||
|
TimestampNanosecondVector,
|
||||||
|
ts_nanosecond_values,
|
||||||
|
|x| { x.into_native() }
|
||||||
|
)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -197,14 +264,10 @@ mod tests {
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||||
use datafusion::field_util::SchemaExt;
|
use datatypes::data_type::ConcreteDataType;
|
||||||
use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray};
|
use datatypes::schema::{ColumnSchema, Schema};
|
||||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
|
||||||
use datatypes::arrow_array::StringArray;
|
|
||||||
use datatypes::schema::Schema;
|
|
||||||
use datatypes::vectors::{UInt32Vector, VectorRef};
|
|
||||||
|
|
||||||
use crate::select::{null_mask, try_convert, values};
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_convert_record_batches_to_select_result() {
|
fn test_convert_record_batches_to_select_result() {
|
||||||
@@ -230,9 +293,8 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_convert_arrow_arrays_i32() {
|
fn test_convert_arrow_arrays_i32() {
|
||||||
let array: PrimitiveArray<i32> =
|
let array = Int32Vector::from(vec![Some(1), Some(2), None, Some(3)]);
|
||||||
PrimitiveArray::from(vec![Some(1), Some(2), None, Some(3)]);
|
let array: VectorRef = Arc::new(array);
|
||||||
let array: Arc<dyn Array> = Arc::new(array);
|
|
||||||
|
|
||||||
let values = values(&[array]).unwrap();
|
let values = values(&[array]).unwrap();
|
||||||
|
|
||||||
@@ -241,14 +303,14 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_convert_arrow_arrays_string() {
|
fn test_convert_arrow_arrays_string() {
|
||||||
let array = StringArray::from(vec![
|
let array = StringVector::from(vec![
|
||||||
Some("1".to_string()),
|
Some("1".to_string()),
|
||||||
Some("2".to_string()),
|
Some("2".to_string()),
|
||||||
None,
|
None,
|
||||||
Some("3".to_string()),
|
Some("3".to_string()),
|
||||||
None,
|
None,
|
||||||
]);
|
]);
|
||||||
let array: Arc<dyn Array> = Arc::new(array);
|
let array: VectorRef = Arc::new(array);
|
||||||
|
|
||||||
let values = values(&[array]).unwrap();
|
let values = values(&[array]).unwrap();
|
||||||
|
|
||||||
@@ -257,8 +319,8 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_convert_arrow_arrays_bool() {
|
fn test_convert_arrow_arrays_bool() {
|
||||||
let array = BooleanArray::from(vec![Some(true), Some(false), None, Some(false), None]);
|
let array = BooleanVector::from(vec![Some(true), Some(false), None, Some(false), None]);
|
||||||
let array: Arc<dyn Array> = Arc::new(array);
|
let array: VectorRef = Arc::new(array);
|
||||||
|
|
||||||
let values = values(&[array]).unwrap();
|
let values = values(&[array]).unwrap();
|
||||||
|
|
||||||
@@ -267,43 +329,42 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_convert_arrow_arrays_empty() {
|
fn test_convert_arrow_arrays_empty() {
|
||||||
let array = BooleanArray::from(vec![None, None, None, None, None]);
|
let array = BooleanVector::from(vec![None, None, None, None, None]);
|
||||||
let array: Arc<dyn Array> = Arc::new(array);
|
let array: VectorRef = Arc::new(array);
|
||||||
|
|
||||||
let values = values(&[array]).unwrap();
|
let values = values(&[array]).unwrap();
|
||||||
|
|
||||||
assert_eq!(Vec::<bool>::default(), values.bool_values);
|
assert!(values.bool_values.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_null_mask() {
|
fn test_null_mask() {
|
||||||
let a1: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![None, Some(2), None]));
|
let a1: VectorRef = Arc::new(Int32Vector::from(vec![None, Some(2), None]));
|
||||||
let a2: Arc<dyn Array> =
|
let a2: VectorRef = Arc::new(Int32Vector::from(vec![Some(1), Some(2), None, Some(4)]));
|
||||||
Arc::new(PrimitiveArray::from(vec![Some(1), Some(2), None, Some(4)]));
|
let mask = null_mask(&[a1, a2], 3 + 4);
|
||||||
let mask = null_mask(&vec![a1, a2], 3 + 4);
|
|
||||||
assert_eq!(vec![0b0010_0101], mask);
|
assert_eq!(vec![0b0010_0101], mask);
|
||||||
|
|
||||||
let empty: Arc<dyn Array> = Arc::new(PrimitiveArray::<i32>::from(vec![None, None, None]));
|
let empty: VectorRef = Arc::new(Int32Vector::from(vec![None, None, None]));
|
||||||
let mask = null_mask(&vec![empty.clone(), empty.clone(), empty], 9);
|
let mask = null_mask(&[empty.clone(), empty.clone(), empty], 9);
|
||||||
assert_eq!(vec![0b1111_1111, 0b0000_0001], mask);
|
assert_eq!(vec![0b1111_1111, 0b0000_0001], mask);
|
||||||
|
|
||||||
let a1: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(1), Some(2), Some(3)]));
|
let a1: VectorRef = Arc::new(Int32Vector::from(vec![Some(1), Some(2), Some(3)]));
|
||||||
let a2: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(4), Some(5), Some(6)]));
|
let a2: VectorRef = Arc::new(Int32Vector::from(vec![Some(4), Some(5), Some(6)]));
|
||||||
let mask = null_mask(&vec![a1, a2], 3 + 3);
|
let mask = null_mask(&[a1, a2], 3 + 3);
|
||||||
assert_eq!(Vec::<u8>::default(), mask);
|
assert_eq!(Vec::<u8>::default(), mask);
|
||||||
|
|
||||||
let a1: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(1), Some(2), Some(3)]));
|
let a1: VectorRef = Arc::new(Int32Vector::from(vec![Some(1), Some(2), Some(3)]));
|
||||||
let a2: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(4), Some(5), None]));
|
let a2: VectorRef = Arc::new(Int32Vector::from(vec![Some(4), Some(5), None]));
|
||||||
let mask = null_mask(&vec![a1, a2], 3 + 3);
|
let mask = null_mask(&[a1, a2], 3 + 3);
|
||||||
assert_eq!(vec![0b0010_0000], mask);
|
assert_eq!(vec![0b0010_0000], mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn mock_record_batch() -> RecordBatch {
|
fn mock_record_batch() -> RecordBatch {
|
||||||
let arrow_schema = Arc::new(ArrowSchema::new(vec![
|
let column_schemas = vec![
|
||||||
Field::new("c1", DataType::UInt32, false),
|
ColumnSchema::new("c1", ConcreteDataType::uint32_datatype(), true),
|
||||||
Field::new("c2", DataType::UInt32, false),
|
ColumnSchema::new("c2", ConcreteDataType::uint32_datatype(), true),
|
||||||
]));
|
];
|
||||||
let schema = Arc::new(Schema::try_from(arrow_schema).unwrap());
|
let schema = Arc::new(Schema::try_new(column_schemas).unwrap());
|
||||||
|
|
||||||
let v1 = Arc::new(UInt32Vector::from(vec![Some(1), Some(2), None]));
|
let v1 = Arc::new(UInt32Vector::from(vec![Some(1), Some(2), None]));
|
||||||
let v2 = Arc::new(UInt32Vector::from(vec![Some(1), None, None]));
|
let v2 = Arc::new(UInt32Vector::from(vec![Some(1), None, None]));
|
||||||
|
|||||||
@@ -45,11 +45,11 @@ impl LinesWriter {
|
|||||||
pub fn write_ts(&mut self, column_name: &str, value: (i64, Precision)) -> Result<()> {
|
pub fn write_ts(&mut self, column_name: &str, value: (i64, Precision)) -> Result<()> {
|
||||||
let (idx, column) = self.mut_column(
|
let (idx, column) = self.mut_column(
|
||||||
column_name,
|
column_name,
|
||||||
ColumnDataType::Timestamp,
|
ColumnDataType::TimestampMillisecond,
|
||||||
SemanticType::Timestamp,
|
SemanticType::Timestamp,
|
||||||
);
|
);
|
||||||
ensure!(
|
ensure!(
|
||||||
column.datatype == ColumnDataType::Timestamp as i32,
|
column.datatype == ColumnDataType::TimestampMillisecond as i32,
|
||||||
TypeMismatchSnafu {
|
TypeMismatchSnafu {
|
||||||
column_name,
|
column_name,
|
||||||
expected: "timestamp",
|
expected: "timestamp",
|
||||||
@@ -58,7 +58,9 @@ impl LinesWriter {
|
|||||||
);
|
);
|
||||||
// It is safe to use unwrap here, because values has been initialized in mut_column()
|
// It is safe to use unwrap here, because values has been initialized in mut_column()
|
||||||
let values = column.values.as_mut().unwrap();
|
let values = column.values.as_mut().unwrap();
|
||||||
values.ts_millis_values.push(to_ms_ts(value.1, value.0));
|
values
|
||||||
|
.ts_millisecond_values
|
||||||
|
.push(to_ms_ts(value.1, value.0));
|
||||||
self.null_masks[idx].push(false);
|
self.null_masks[idx].push(false);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -224,23 +226,23 @@ impl LinesWriter {
|
|||||||
|
|
||||||
pub fn to_ms_ts(p: Precision, ts: i64) -> i64 {
|
pub fn to_ms_ts(p: Precision, ts: i64) -> i64 {
|
||||||
match p {
|
match p {
|
||||||
Precision::NANOSECOND => ts / 1_000_000,
|
Precision::Nanosecond => ts / 1_000_000,
|
||||||
Precision::MICROSECOND => ts / 1000,
|
Precision::Microsecond => ts / 1000,
|
||||||
Precision::MILLISECOND => ts,
|
Precision::Millisecond => ts,
|
||||||
Precision::SECOND => ts * 1000,
|
Precision::Second => ts * 1000,
|
||||||
Precision::MINUTE => ts * 1000 * 60,
|
Precision::Minute => ts * 1000 * 60,
|
||||||
Precision::HOUR => ts * 1000 * 60 * 60,
|
Precision::Hour => ts * 1000 * 60 * 60,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum Precision {
|
pub enum Precision {
|
||||||
NANOSECOND,
|
Nanosecond,
|
||||||
MICROSECOND,
|
Microsecond,
|
||||||
MILLISECOND,
|
Millisecond,
|
||||||
SECOND,
|
Second,
|
||||||
MINUTE,
|
Minute,
|
||||||
HOUR,
|
Hour,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -261,13 +263,13 @@ mod tests {
|
|||||||
writer.write_f64("memory", 0.4).unwrap();
|
writer.write_f64("memory", 0.4).unwrap();
|
||||||
writer.write_string("name", "name1").unwrap();
|
writer.write_string("name", "name1").unwrap();
|
||||||
writer
|
writer
|
||||||
.write_ts("ts", (101011000, Precision::MILLISECOND))
|
.write_ts("ts", (101011000, Precision::Millisecond))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
writer.commit();
|
writer.commit();
|
||||||
|
|
||||||
writer.write_tag("host", "host2").unwrap();
|
writer.write_tag("host", "host2").unwrap();
|
||||||
writer
|
writer
|
||||||
.write_ts("ts", (102011001, Precision::MILLISECOND))
|
.write_ts("ts", (102011001, Precision::Millisecond))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
writer.write_bool("enable_reboot", true).unwrap();
|
writer.write_bool("enable_reboot", true).unwrap();
|
||||||
writer.write_u64("year_of_service", 2).unwrap();
|
writer.write_u64("year_of_service", 2).unwrap();
|
||||||
@@ -278,7 +280,7 @@ mod tests {
|
|||||||
writer.write_f64("cpu", 0.4).unwrap();
|
writer.write_f64("cpu", 0.4).unwrap();
|
||||||
writer.write_u64("cpu_core_num", 16).unwrap();
|
writer.write_u64("cpu_core_num", 16).unwrap();
|
||||||
writer
|
writer
|
||||||
.write_ts("ts", (103011002, Precision::MILLISECOND))
|
.write_ts("ts", (103011002, Precision::Millisecond))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
writer.commit();
|
writer.commit();
|
||||||
|
|
||||||
@@ -321,11 +323,11 @@ mod tests {
|
|||||||
|
|
||||||
let column = &columns[4];
|
let column = &columns[4];
|
||||||
assert_eq!("ts", column.column_name);
|
assert_eq!("ts", column.column_name);
|
||||||
assert_eq!(ColumnDataType::Timestamp as i32, column.datatype);
|
assert_eq!(ColumnDataType::TimestampMillisecond as i32, column.datatype);
|
||||||
assert_eq!(SemanticType::Timestamp as i32, column.semantic_type);
|
assert_eq!(SemanticType::Timestamp as i32, column.semantic_type);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
vec![101011000, 102011001, 103011002],
|
vec![101011000, 102011001, 103011002],
|
||||||
column.values.as_ref().unwrap().ts_millis_values
|
column.values.as_ref().unwrap().ts_millisecond_values
|
||||||
);
|
);
|
||||||
verify_null_mask(&column.null_mask, vec![false, false, false]);
|
verify_null_mask(&column.null_mask, vec![false, false, false]);
|
||||||
|
|
||||||
@@ -367,16 +369,16 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_to_ms() {
|
fn test_to_ms() {
|
||||||
assert_eq!(100, to_ms_ts(Precision::NANOSECOND, 100110000));
|
assert_eq!(100, to_ms_ts(Precision::Nanosecond, 100110000));
|
||||||
assert_eq!(100110, to_ms_ts(Precision::MICROSECOND, 100110000));
|
assert_eq!(100110, to_ms_ts(Precision::Microsecond, 100110000));
|
||||||
assert_eq!(100110000, to_ms_ts(Precision::MILLISECOND, 100110000));
|
assert_eq!(100110000, to_ms_ts(Precision::Millisecond, 100110000));
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
100110000 * 1000 * 60,
|
100110000 * 1000 * 60,
|
||||||
to_ms_ts(Precision::MINUTE, 100110000)
|
to_ms_ts(Precision::Minute, 100110000)
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
100110000 * 1000 * 60 * 60,
|
100110000 * 1000 * 60 * 60,
|
||||||
to_ms_ts(Precision::HOUR, 100110000)
|
to_ms_ts(Precision::Hour, 100110000)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,19 +1,17 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "common-query"
|
name = "common-query"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
common-error = { path = "../error" }
|
common-error = { path = "../error" }
|
||||||
common-recordbatch = { path = "../recordbatch" }
|
common-recordbatch = { path = "../recordbatch" }
|
||||||
common-time = { path = "../time" }
|
common-time = { path = "../time" }
|
||||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
datafusion.workspace = true
|
||||||
"simd",
|
datafusion-common.workspace = true
|
||||||
] }
|
datafusion-expr.workspace = true
|
||||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
|
||||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
|
||||||
datatypes = { path = "../../datatypes" }
|
datatypes = { path = "../../datatypes" }
|
||||||
snafu = { version = "0.7", features = ["backtraces"] }
|
snafu = { version = "0.7", features = ["backtraces"] }
|
||||||
statrs = "0.15"
|
statrs = "0.15"
|
||||||
|
|||||||
@@ -23,16 +23,9 @@ use datatypes::error::Error as DataTypeError;
|
|||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
use statrs::StatsError;
|
use statrs::StatsError;
|
||||||
|
|
||||||
common_error::define_opaque_error!(Error);
|
|
||||||
|
|
||||||
#[derive(Debug, Snafu)]
|
#[derive(Debug, Snafu)]
|
||||||
#[snafu(visibility(pub))]
|
#[snafu(visibility(pub))]
|
||||||
pub enum InnerError {
|
pub enum Error {
|
||||||
#[snafu(display("Fail to cast array to {:?}, source: {}", typ, source))]
|
|
||||||
TypeCast {
|
|
||||||
source: ArrowError,
|
|
||||||
typ: arrow::datatypes::DataType,
|
|
||||||
},
|
|
||||||
#[snafu(display("Fail to execute function, source: {}", source))]
|
#[snafu(display("Fail to execute function, source: {}", source))]
|
||||||
ExecuteFunction {
|
ExecuteFunction {
|
||||||
source: DataFusionError,
|
source: DataFusionError,
|
||||||
@@ -83,8 +76,8 @@ pub enum InnerError {
|
|||||||
backtrace: Backtrace,
|
backtrace: Backtrace,
|
||||||
},
|
},
|
||||||
|
|
||||||
#[snafu(display("Invalid inputs: {}", err_msg))]
|
#[snafu(display("Invalid input type: {}", err_msg))]
|
||||||
InvalidInputs {
|
InvalidInputType {
|
||||||
#[snafu(backtrace)]
|
#[snafu(backtrace)]
|
||||||
source: DataTypeError,
|
source: DataTypeError,
|
||||||
err_msg: String,
|
err_msg: String,
|
||||||
@@ -133,37 +126,74 @@ pub enum InnerError {
|
|||||||
#[snafu(backtrace)]
|
#[snafu(backtrace)]
|
||||||
source: BoxedError,
|
source: BoxedError,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to cast array to {:?}, source: {}", typ, source))]
|
||||||
|
TypeCast {
|
||||||
|
source: ArrowError,
|
||||||
|
typ: arrow::datatypes::DataType,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display(
|
||||||
|
"Failed to perform compute operation on arrow arrays, source: {}",
|
||||||
|
source
|
||||||
|
))]
|
||||||
|
ArrowCompute {
|
||||||
|
source: ArrowError,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Query engine fail to cast value: {}", source))]
|
||||||
|
ToScalarValue {
|
||||||
|
#[snafu(backtrace)]
|
||||||
|
source: DataTypeError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to get scalar vector, {}", source))]
|
||||||
|
GetScalarVector {
|
||||||
|
#[snafu(backtrace)]
|
||||||
|
source: DataTypeError,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Invalid function args: {}", err_msg))]
|
||||||
|
InvalidFuncArgs {
|
||||||
|
err_msg: String,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
|
|
||||||
impl ErrorExt for InnerError {
|
impl ErrorExt for Error {
|
||||||
fn status_code(&self) -> StatusCode {
|
fn status_code(&self) -> StatusCode {
|
||||||
match self {
|
match self {
|
||||||
InnerError::ExecuteFunction { .. }
|
Error::ExecuteFunction { .. }
|
||||||
| InnerError::GenerateFunction { .. }
|
| Error::GenerateFunction { .. }
|
||||||
| InnerError::CreateAccumulator { .. }
|
| Error::CreateAccumulator { .. }
|
||||||
| InnerError::DowncastVector { .. }
|
| Error::DowncastVector { .. }
|
||||||
| InnerError::InvalidInputState { .. }
|
| Error::InvalidInputState { .. }
|
||||||
| InnerError::InvalidInputCol { .. }
|
| Error::InvalidInputCol { .. }
|
||||||
| InnerError::BadAccumulatorImpl { .. } => StatusCode::EngineExecuteQuery,
|
| Error::BadAccumulatorImpl { .. }
|
||||||
|
| Error::ToScalarValue { .. }
|
||||||
|
| Error::GetScalarVector { .. }
|
||||||
|
| Error::ArrowCompute { .. } => StatusCode::EngineExecuteQuery,
|
||||||
|
|
||||||
InnerError::InvalidInputs { source, .. }
|
Error::InvalidInputType { source, .. }
|
||||||
| InnerError::IntoVector { source, .. }
|
| Error::IntoVector { source, .. }
|
||||||
| InnerError::FromScalarValue { source }
|
| Error::FromScalarValue { source }
|
||||||
| InnerError::ConvertArrowSchema { source }
|
| Error::ConvertArrowSchema { source }
|
||||||
| InnerError::FromArrowArray { source } => source.status_code(),
|
| Error::FromArrowArray { source } => source.status_code(),
|
||||||
|
|
||||||
InnerError::ExecuteRepeatedly { .. }
|
Error::ExecuteRepeatedly { .. }
|
||||||
| InnerError::GeneralDataFusion { .. }
|
| Error::GeneralDataFusion { .. }
|
||||||
| InnerError::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
|
| Error::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
|
||||||
|
|
||||||
InnerError::UnsupportedInputDataType { .. } | InnerError::TypeCast { .. } => {
|
Error::UnsupportedInputDataType { .. }
|
||||||
StatusCode::InvalidArguments
|
| Error::TypeCast { .. }
|
||||||
}
|
| Error::InvalidFuncArgs { .. } => StatusCode::InvalidArguments,
|
||||||
|
|
||||||
InnerError::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
|
Error::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
|
||||||
InnerError::ExecutePhysicalPlan { source } => source.status_code(),
|
Error::ExecutePhysicalPlan { source } => source.status_code(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -176,12 +206,6 @@ impl ErrorExt for InnerError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<InnerError> for Error {
|
|
||||||
fn from(e: InnerError) -> Error {
|
|
||||||
Error::new(e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Error> for DataFusionError {
|
impl From<Error> for DataFusionError {
|
||||||
fn from(e: Error) -> DataFusionError {
|
fn from(e: Error) -> DataFusionError {
|
||||||
DataFusionError::External(Box::new(e))
|
DataFusionError::External(Box::new(e))
|
||||||
@@ -190,7 +214,7 @@ impl From<Error> for DataFusionError {
|
|||||||
|
|
||||||
impl From<BoxedError> for Error {
|
impl From<BoxedError> for Error {
|
||||||
fn from(source: BoxedError) -> Self {
|
fn from(source: BoxedError) -> Self {
|
||||||
InnerError::ExecutePhysicalPlan { source }.into()
|
Error::ExecutePhysicalPlan { source }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -206,60 +230,51 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn assert_error(err: &Error, code: StatusCode) {
|
fn assert_error(err: &Error, code: StatusCode) {
|
||||||
let inner_err = err.as_any().downcast_ref::<InnerError>().unwrap();
|
let inner_err = err.as_any().downcast_ref::<Error>().unwrap();
|
||||||
assert_eq!(code, inner_err.status_code());
|
assert_eq!(code, inner_err.status_code());
|
||||||
assert!(inner_err.backtrace_opt().is_some());
|
assert!(inner_err.backtrace_opt().is_some());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_datafusion_as_source() {
|
fn test_datafusion_as_source() {
|
||||||
let err: Error = throw_df_error()
|
let err = throw_df_error()
|
||||||
.context(ExecuteFunctionSnafu)
|
.context(ExecuteFunctionSnafu)
|
||||||
.err()
|
.err()
|
||||||
.unwrap()
|
.unwrap();
|
||||||
.into();
|
|
||||||
assert_error(&err, StatusCode::EngineExecuteQuery);
|
assert_error(&err, StatusCode::EngineExecuteQuery);
|
||||||
|
|
||||||
let err: Error = throw_df_error()
|
let err: Error = throw_df_error()
|
||||||
.context(GeneralDataFusionSnafu)
|
.context(GeneralDataFusionSnafu)
|
||||||
.err()
|
.err()
|
||||||
.unwrap()
|
.unwrap();
|
||||||
.into();
|
|
||||||
assert_error(&err, StatusCode::Unexpected);
|
assert_error(&err, StatusCode::Unexpected);
|
||||||
|
|
||||||
let err: Error = throw_df_error()
|
let err = throw_df_error()
|
||||||
.context(DataFusionExecutionPlanSnafu)
|
.context(DataFusionExecutionPlanSnafu)
|
||||||
.err()
|
.err()
|
||||||
.unwrap()
|
.unwrap();
|
||||||
.into();
|
|
||||||
assert_error(&err, StatusCode::Unexpected);
|
assert_error(&err, StatusCode::Unexpected);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_execute_repeatedly_error() {
|
fn test_execute_repeatedly_error() {
|
||||||
let error: Error = None::<i32>
|
let error = None::<i32>.context(ExecuteRepeatedlySnafu).err().unwrap();
|
||||||
.context(ExecuteRepeatedlySnafu)
|
assert_eq!(error.status_code(), StatusCode::Unexpected);
|
||||||
.err()
|
|
||||||
.unwrap()
|
|
||||||
.into();
|
|
||||||
assert_eq!(error.inner.status_code(), StatusCode::Unexpected);
|
|
||||||
assert!(error.backtrace_opt().is_some());
|
assert!(error.backtrace_opt().is_some());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_convert_df_recordbatch_stream_error() {
|
fn test_convert_df_recordbatch_stream_error() {
|
||||||
let result: std::result::Result<i32, common_recordbatch::error::Error> =
|
let result: std::result::Result<i32, common_recordbatch::error::Error> =
|
||||||
Err(common_recordbatch::error::InnerError::PollStream {
|
Err(common_recordbatch::error::Error::PollStream {
|
||||||
source: ArrowError::Overflow,
|
source: ArrowError::DivideByZero,
|
||||||
backtrace: Backtrace::generate(),
|
backtrace: Backtrace::generate(),
|
||||||
}
|
});
|
||||||
.into());
|
let error = result
|
||||||
let error: Error = result
|
|
||||||
.context(ConvertDfRecordBatchStreamSnafu)
|
.context(ConvertDfRecordBatchStreamSnafu)
|
||||||
.err()
|
.err()
|
||||||
.unwrap()
|
.unwrap();
|
||||||
.into();
|
assert_eq!(error.status_code(), StatusCode::Internal);
|
||||||
assert_eq!(error.inner.status_code(), StatusCode::Internal);
|
|
||||||
assert!(error.backtrace_opt().is_some());
|
assert!(error.backtrace_opt().is_some());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -272,13 +287,12 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_into_vector_error() {
|
fn test_into_vector_error() {
|
||||||
let err: Error = raise_datatype_error()
|
let err = raise_datatype_error()
|
||||||
.context(IntoVectorSnafu {
|
.context(IntoVectorSnafu {
|
||||||
data_type: ArrowDatatype::Int32,
|
data_type: ArrowDatatype::Int32,
|
||||||
})
|
})
|
||||||
.err()
|
.err()
|
||||||
.unwrap()
|
.unwrap();
|
||||||
.into();
|
|
||||||
assert!(err.backtrace_opt().is_some());
|
assert!(err.backtrace_opt().is_some());
|
||||||
let datatype_err = raise_datatype_error().err().unwrap();
|
let datatype_err = raise_datatype_error().err().unwrap();
|
||||||
assert_eq!(datatype_err.status_code(), err.status_code());
|
assert_eq!(datatype_err.status_code(), err.status_code());
|
||||||
|
|||||||
@@ -161,12 +161,7 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!(4, vec.len());
|
assert_eq!(4, vec.len());
|
||||||
for i in 0..4 {
|
for i in 0..4 {
|
||||||
assert_eq!(
|
assert_eq!(i == 0 || i == 3, vec.get_data(i).unwrap(), "Failed at {i}")
|
||||||
i == 0 || i == 3,
|
|
||||||
vec.get_data(i).unwrap(),
|
|
||||||
"failed at {}",
|
|
||||||
i
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ use std::sync::Arc;
|
|||||||
use datatypes::prelude::ConcreteDataType;
|
use datatypes::prelude::ConcreteDataType;
|
||||||
|
|
||||||
pub use self::accumulator::{Accumulator, AggregateFunctionCreator, AggregateFunctionCreatorRef};
|
pub use self::accumulator::{Accumulator, AggregateFunctionCreator, AggregateFunctionCreatorRef};
|
||||||
pub use self::expr::Expr;
|
pub use self::expr::{DfExpr, Expr};
|
||||||
pub use self::udaf::AggregateFunction;
|
pub use self::udaf::AggregateFunction;
|
||||||
pub use self::udf::ScalarUdf;
|
pub use self::udf::ScalarUdf;
|
||||||
use crate::function::{ReturnTypeFunction, ScalarFunctionImplementation};
|
use crate::function::{ReturnTypeFunction, ScalarFunctionImplementation};
|
||||||
@@ -148,9 +148,7 @@ mod tests {
|
|||||||
|
|
||||||
let args = vec![
|
let args = vec![
|
||||||
DfColumnarValue::Scalar(ScalarValue::Boolean(Some(true))),
|
DfColumnarValue::Scalar(ScalarValue::Boolean(Some(true))),
|
||||||
DfColumnarValue::Array(Arc::new(BooleanArray::from_slice(vec![
|
DfColumnarValue::Array(Arc::new(BooleanArray::from(vec![true, false, false, true]))),
|
||||||
true, false, false, true,
|
|
||||||
]))),
|
|
||||||
];
|
];
|
||||||
|
|
||||||
// call the function
|
// call the function
|
||||||
@@ -17,12 +17,10 @@
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common_time::timestamp::TimeUnit;
|
|
||||||
use datafusion_common::Result as DfResult;
|
use datafusion_common::Result as DfResult;
|
||||||
use datafusion_expr::Accumulator as DfAccumulator;
|
use datafusion_expr::Accumulator as DfAccumulator;
|
||||||
use datatypes::arrow::array::ArrayRef;
|
use datatypes::arrow::array::ArrayRef;
|
||||||
use datatypes::prelude::*;
|
use datatypes::prelude::*;
|
||||||
use datatypes::value::ListValue;
|
|
||||||
use datatypes::vectors::{Helper as VectorHelper, VectorRef};
|
use datatypes::vectors::{Helper as VectorHelper, VectorRef};
|
||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
|
|
||||||
@@ -133,351 +131,48 @@ impl DfAccumulator for DfAccumulatorAdaptor {
|
|||||||
let state_types = self.creator.state_types()?;
|
let state_types = self.creator.state_types()?;
|
||||||
if state_values.len() != state_types.len() {
|
if state_values.len() != state_types.len() {
|
||||||
return error::BadAccumulatorImplSnafu {
|
return error::BadAccumulatorImplSnafu {
|
||||||
err_msg: format!("Accumulator {:?} returned state values size do not match its state types size.", self),
|
err_msg: format!("Accumulator {self:?} returned state values size do not match its state types size."),
|
||||||
}
|
}
|
||||||
.fail()
|
.fail()?;
|
||||||
.map_err(Error::from)?;
|
|
||||||
}
|
}
|
||||||
Ok(state_values
|
Ok(state_values
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.zip(state_types.iter())
|
.zip(state_types.iter())
|
||||||
.map(|(v, t)| try_into_scalar_value(v, t))
|
.map(|(v, t)| v.try_to_scalar_value(t).context(error::ToScalarValueSnafu))
|
||||||
.collect::<Result<Vec<_>>>()
|
.collect::<Result<Vec<_>>>()?)
|
||||||
.map_err(Error::from)?)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
|
fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
|
||||||
let vectors = VectorHelper::try_into_vectors(values)
|
let vectors = VectorHelper::try_into_vectors(values).context(FromScalarValueSnafu)?;
|
||||||
.context(FromScalarValueSnafu)
|
self.accumulator.update_batch(&vectors)?;
|
||||||
.map_err(Error::from)?;
|
Ok(())
|
||||||
self.accumulator
|
|
||||||
.update_batch(&vectors)
|
|
||||||
.map_err(|e| e.into())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
|
fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
|
||||||
let mut vectors = Vec::with_capacity(states.len());
|
let mut vectors = Vec::with_capacity(states.len());
|
||||||
for array in states.iter() {
|
for array in states.iter() {
|
||||||
vectors.push(
|
vectors.push(
|
||||||
VectorHelper::try_into_vector(array)
|
VectorHelper::try_into_vector(array).context(IntoVectorSnafu {
|
||||||
.context(IntoVectorSnafu {
|
data_type: array.data_type().clone(),
|
||||||
data_type: array.data_type().clone(),
|
})?,
|
||||||
})
|
|
||||||
.map_err(Error::from)?,
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
self.accumulator.merge_batch(&vectors).map_err(|e| e.into())
|
self.accumulator.merge_batch(&vectors)?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn evaluate(&self) -> DfResult<ScalarValue> {
|
fn evaluate(&self) -> DfResult<ScalarValue> {
|
||||||
let value = self.accumulator.evaluate()?;
|
let value = self.accumulator.evaluate()?;
|
||||||
let output_type = self.creator.output_type()?;
|
let output_type = self.creator.output_type()?;
|
||||||
Ok(try_into_scalar_value(value, &output_type)?)
|
let scalar_value = value
|
||||||
}
|
.try_to_scalar_value(&output_type)
|
||||||
}
|
.context(error::ToScalarValueSnafu)
|
||||||
|
.map_err(Error::from)?;
|
||||||
fn try_into_scalar_value(value: Value, datatype: &ConcreteDataType) -> Result<ScalarValue> {
|
Ok(scalar_value)
|
||||||
if !matches!(value, Value::Null) && datatype != &value.data_type() {
|
}
|
||||||
return error::BadAccumulatorImplSnafu {
|
|
||||||
err_msg: format!(
|
fn size(&self) -> usize {
|
||||||
"expect value to return datatype {:?}, actual: {:?}",
|
// TODO(LFC): Implement new "size" method for Accumulator.
|
||||||
datatype,
|
0
|
||||||
value.data_type()
|
|
||||||
),
|
|
||||||
}
|
|
||||||
.fail()?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(match value {
|
|
||||||
Value::Boolean(v) => ScalarValue::Boolean(Some(v)),
|
|
||||||
Value::UInt8(v) => ScalarValue::UInt8(Some(v)),
|
|
||||||
Value::UInt16(v) => ScalarValue::UInt16(Some(v)),
|
|
||||||
Value::UInt32(v) => ScalarValue::UInt32(Some(v)),
|
|
||||||
Value::UInt64(v) => ScalarValue::UInt64(Some(v)),
|
|
||||||
Value::Int8(v) => ScalarValue::Int8(Some(v)),
|
|
||||||
Value::Int16(v) => ScalarValue::Int16(Some(v)),
|
|
||||||
Value::Int32(v) => ScalarValue::Int32(Some(v)),
|
|
||||||
Value::Int64(v) => ScalarValue::Int64(Some(v)),
|
|
||||||
Value::Float32(v) => ScalarValue::Float32(Some(v.0)),
|
|
||||||
Value::Float64(v) => ScalarValue::Float64(Some(v.0)),
|
|
||||||
Value::String(v) => ScalarValue::Utf8(Some(v.as_utf8().to_string())),
|
|
||||||
Value::Binary(v) => ScalarValue::LargeBinary(Some(v.to_vec())),
|
|
||||||
Value::Date(v) => ScalarValue::Date32(Some(v.val())),
|
|
||||||
Value::DateTime(v) => ScalarValue::Date64(Some(v.val())),
|
|
||||||
Value::Null => try_convert_null_value(datatype)?,
|
|
||||||
Value::List(list) => try_convert_list_value(list)?,
|
|
||||||
Value::Timestamp(t) => timestamp_to_scalar_value(t.unit(), Some(t.value())),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn timestamp_to_scalar_value(unit: TimeUnit, val: Option<i64>) -> ScalarValue {
|
|
||||||
match unit {
|
|
||||||
TimeUnit::Second => ScalarValue::TimestampSecond(val, None),
|
|
||||||
TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(val, None),
|
|
||||||
TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(val, None),
|
|
||||||
TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(val, None),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn try_convert_null_value(datatype: &ConcreteDataType) -> Result<ScalarValue> {
|
|
||||||
Ok(match datatype {
|
|
||||||
ConcreteDataType::Boolean(_) => ScalarValue::Boolean(None),
|
|
||||||
ConcreteDataType::Int8(_) => ScalarValue::Int8(None),
|
|
||||||
ConcreteDataType::Int16(_) => ScalarValue::Int16(None),
|
|
||||||
ConcreteDataType::Int32(_) => ScalarValue::Int32(None),
|
|
||||||
ConcreteDataType::Int64(_) => ScalarValue::Int64(None),
|
|
||||||
ConcreteDataType::UInt8(_) => ScalarValue::UInt8(None),
|
|
||||||
ConcreteDataType::UInt16(_) => ScalarValue::UInt16(None),
|
|
||||||
ConcreteDataType::UInt32(_) => ScalarValue::UInt32(None),
|
|
||||||
ConcreteDataType::UInt64(_) => ScalarValue::UInt64(None),
|
|
||||||
ConcreteDataType::Float32(_) => ScalarValue::Float32(None),
|
|
||||||
ConcreteDataType::Float64(_) => ScalarValue::Float64(None),
|
|
||||||
ConcreteDataType::Binary(_) => ScalarValue::LargeBinary(None),
|
|
||||||
ConcreteDataType::String(_) => ScalarValue::Utf8(None),
|
|
||||||
ConcreteDataType::Timestamp(t) => timestamp_to_scalar_value(t.unit, None),
|
|
||||||
_ => {
|
|
||||||
return error::BadAccumulatorImplSnafu {
|
|
||||||
err_msg: format!(
|
|
||||||
"undefined transition from null value to datatype {:?}",
|
|
||||||
datatype
|
|
||||||
),
|
|
||||||
}
|
|
||||||
.fail()?
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn try_convert_list_value(list: ListValue) -> Result<ScalarValue> {
|
|
||||||
let vs = if let Some(items) = list.items() {
|
|
||||||
Some(Box::new(
|
|
||||||
items
|
|
||||||
.iter()
|
|
||||||
.map(|v| try_into_scalar_value(v.clone(), list.datatype()))
|
|
||||||
.collect::<Result<Vec<_>>>()?,
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
Ok(ScalarValue::List(
|
|
||||||
vs,
|
|
||||||
Box::new(list.datatype().as_arrow_type()),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use common_base::bytes::{Bytes, StringBytes};
|
|
||||||
use datafusion_common::ScalarValue;
|
|
||||||
use datatypes::arrow::datatypes::DataType;
|
|
||||||
use datatypes::value::{ListValue, OrderedFloat};
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_not_null_value_to_scalar_value() {
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Boolean(Some(true)),
|
|
||||||
try_into_scalar_value(Value::Boolean(true), &ConcreteDataType::boolean_datatype())
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Boolean(Some(false)),
|
|
||||||
try_into_scalar_value(Value::Boolean(false), &ConcreteDataType::boolean_datatype())
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::UInt8(Some(u8::MIN + 1)),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::UInt8(u8::MIN + 1),
|
|
||||||
&ConcreteDataType::uint8_datatype()
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::UInt16(Some(u16::MIN + 2)),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::UInt16(u16::MIN + 2),
|
|
||||||
&ConcreteDataType::uint16_datatype()
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::UInt32(Some(u32::MIN + 3)),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::UInt32(u32::MIN + 3),
|
|
||||||
&ConcreteDataType::uint32_datatype()
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::UInt64(Some(u64::MIN + 4)),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::UInt64(u64::MIN + 4),
|
|
||||||
&ConcreteDataType::uint64_datatype()
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Int8(Some(i8::MIN + 4)),
|
|
||||||
try_into_scalar_value(Value::Int8(i8::MIN + 4), &ConcreteDataType::int8_datatype())
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Int16(Some(i16::MIN + 5)),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::Int16(i16::MIN + 5),
|
|
||||||
&ConcreteDataType::int16_datatype()
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Int32(Some(i32::MIN + 6)),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::Int32(i32::MIN + 6),
|
|
||||||
&ConcreteDataType::int32_datatype()
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Int64(Some(i64::MIN + 7)),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::Int64(i64::MIN + 7),
|
|
||||||
&ConcreteDataType::int64_datatype()
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Float32(Some(8.0f32)),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::Float32(OrderedFloat(8.0f32)),
|
|
||||||
&ConcreteDataType::float32_datatype()
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Float64(Some(9.0f64)),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::Float64(OrderedFloat(9.0f64)),
|
|
||||||
&ConcreteDataType::float64_datatype()
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Utf8(Some("hello".to_string())),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::String(StringBytes::from("hello")),
|
|
||||||
&ConcreteDataType::string_datatype(),
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::LargeBinary(Some("world".as_bytes().to_vec())),
|
|
||||||
try_into_scalar_value(
|
|
||||||
Value::Binary(Bytes::from("world".as_bytes())),
|
|
||||||
&ConcreteDataType::binary_datatype()
|
|
||||||
)
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_null_value_to_scalar_value() {
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Boolean(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::boolean_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::UInt8(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint8_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::UInt16(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint16_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::UInt32(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint32_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::UInt64(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint64_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Int8(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int8_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Int16(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int16_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Int32(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int32_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Int64(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int64_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Float32(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::float32_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Float64(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::float64_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::Utf8(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::string_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::LargeBinary(None),
|
|
||||||
try_into_scalar_value(Value::Null, &ConcreteDataType::binary_datatype()).unwrap()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_list_value_to_scalar_value() {
|
|
||||||
let items = Some(Box::new(vec![Value::Int32(-1), Value::Null]));
|
|
||||||
let list = Value::List(ListValue::new(items, ConcreteDataType::int32_datatype()));
|
|
||||||
let df_list = try_into_scalar_value(
|
|
||||||
list,
|
|
||||||
&ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
assert!(matches!(df_list, ScalarValue::List(_, _)));
|
|
||||||
match df_list {
|
|
||||||
ScalarValue::List(vs, datatype) => {
|
|
||||||
assert_eq!(*datatype, DataType::Int32);
|
|
||||||
|
|
||||||
assert!(vs.is_some());
|
|
||||||
let vs = *vs.unwrap();
|
|
||||||
assert_eq!(
|
|
||||||
vs,
|
|
||||||
vec![ScalarValue::Int32(Some(-1)), ScalarValue::Int32(None)]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
pub fn test_timestamp_to_scalar_value() {
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::TimestampSecond(Some(1), None),
|
|
||||||
timestamp_to_scalar_value(TimeUnit::Second, Some(1))
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::TimestampMillisecond(Some(1), None),
|
|
||||||
timestamp_to_scalar_value(TimeUnit::Millisecond, Some(1))
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::TimestampMicrosecond(Some(1), None),
|
|
||||||
timestamp_to_scalar_value(TimeUnit::Microsecond, Some(1))
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
ScalarValue::TimestampNanosecond(Some(1), None),
|
|
||||||
timestamp_to_scalar_value(TimeUnit::Nanosecond, Some(1))
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,11 +12,11 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use datafusion::logical_plan::Expr as DfExpr;
|
pub use datafusion_expr::expr::Expr as DfExpr;
|
||||||
|
|
||||||
/// Central struct of query API.
|
/// Central struct of query API.
|
||||||
/// Represent logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
|
/// Represent logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
|
||||||
#[derive(Clone, PartialEq, Hash, Debug)]
|
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
|
||||||
pub struct Expr {
|
pub struct Expr {
|
||||||
df_expr: DfExpr,
|
df_expr: DfExpr,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -104,7 +104,7 @@ fn to_df_accumulator_func(
|
|||||||
accumulator: AccumulatorFunctionImpl,
|
accumulator: AccumulatorFunctionImpl,
|
||||||
creator: AggregateFunctionCreatorRef,
|
creator: AggregateFunctionCreatorRef,
|
||||||
) -> DfAccumulatorFunctionImplementation {
|
) -> DfAccumulatorFunctionImplementation {
|
||||||
Arc::new(move || {
|
Arc::new(move |_| {
|
||||||
let accumulator = accumulator()?;
|
let accumulator = accumulator()?;
|
||||||
let creator = creator.clone();
|
let creator = creator.clone();
|
||||||
Ok(Box::new(DfAccumulatorAdaptor::new(accumulator, creator)))
|
Ok(Box::new(DfAccumulatorAdaptor::new(accumulator, creator)))
|
||||||
|
|||||||
@@ -16,12 +16,11 @@ use std::any::Any;
|
|||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use common_recordbatch::adapter::{DfRecordBatchStreamAdapter, RecordBatchStreamAdapter};
|
||||||
use common_recordbatch::adapter::{AsyncRecordBatchStreamAdapter, DfRecordBatchStreamAdapter};
|
|
||||||
use common_recordbatch::{DfSendableRecordBatchStream, SendableRecordBatchStream};
|
use common_recordbatch::{DfSendableRecordBatchStream, SendableRecordBatchStream};
|
||||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||||
use datafusion::error::Result as DfResult;
|
use datafusion::error::Result as DfResult;
|
||||||
pub use datafusion::execution::runtime_env::RuntimeEnv;
|
pub use datafusion::execution::context::{SessionContext, TaskContext};
|
||||||
use datafusion::physical_plan::expressions::PhysicalSortExpr;
|
use datafusion::physical_plan::expressions::PhysicalSortExpr;
|
||||||
pub use datafusion::physical_plan::Partitioning;
|
pub use datafusion::physical_plan::Partitioning;
|
||||||
use datafusion::physical_plan::Statistics;
|
use datafusion::physical_plan::Statistics;
|
||||||
@@ -63,7 +62,7 @@ pub trait PhysicalPlan: Debug + Send + Sync {
|
|||||||
fn execute(
|
fn execute(
|
||||||
&self,
|
&self,
|
||||||
partition: usize,
|
partition: usize,
|
||||||
runtime: Arc<RuntimeEnv>,
|
context: Arc<TaskContext>,
|
||||||
) -> Result<SendableRecordBatchStream>;
|
) -> Result<SendableRecordBatchStream>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -111,6 +110,7 @@ impl PhysicalPlan for PhysicalPlanAdapter {
|
|||||||
.collect();
|
.collect();
|
||||||
let plan = self
|
let plan = self
|
||||||
.df_plan
|
.df_plan
|
||||||
|
.clone()
|
||||||
.with_new_children(children)
|
.with_new_children(children)
|
||||||
.context(error::GeneralDataFusionSnafu)?;
|
.context(error::GeneralDataFusionSnafu)?;
|
||||||
Ok(Arc::new(PhysicalPlanAdapter::new(self.schema(), plan)))
|
Ok(Arc::new(PhysicalPlanAdapter::new(self.schema(), plan)))
|
||||||
@@ -119,20 +119,22 @@ impl PhysicalPlan for PhysicalPlanAdapter {
|
|||||||
fn execute(
|
fn execute(
|
||||||
&self,
|
&self,
|
||||||
partition: usize,
|
partition: usize,
|
||||||
runtime: Arc<RuntimeEnv>,
|
context: Arc<TaskContext>,
|
||||||
) -> Result<SendableRecordBatchStream> {
|
) -> Result<SendableRecordBatchStream> {
|
||||||
let df_plan = self.df_plan.clone();
|
let df_plan = self.df_plan.clone();
|
||||||
let stream = Box::pin(async move { df_plan.execute(partition, runtime).await });
|
let stream = df_plan
|
||||||
let stream = AsyncRecordBatchStreamAdapter::new(self.schema(), stream);
|
.execute(partition, context)
|
||||||
|
.context(error::GeneralDataFusionSnafu)?;
|
||||||
|
let adapter = RecordBatchStreamAdapter::try_new(stream)
|
||||||
|
.context(error::ConvertDfRecordBatchStreamSnafu)?;
|
||||||
|
|
||||||
Ok(Box::pin(stream))
|
Ok(Box::pin(adapter))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct DfPhysicalPlanAdapter(pub PhysicalPlanRef);
|
pub struct DfPhysicalPlanAdapter(pub PhysicalPlanRef);
|
||||||
|
|
||||||
#[async_trait]
|
|
||||||
impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
||||||
fn as_any(&self) -> &dyn Any {
|
fn as_any(&self) -> &dyn Any {
|
||||||
self
|
self
|
||||||
@@ -159,15 +161,14 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn with_new_children(
|
fn with_new_children(
|
||||||
&self,
|
self: Arc<Self>,
|
||||||
children: Vec<Arc<dyn DfPhysicalPlan>>,
|
children: Vec<Arc<dyn DfPhysicalPlan>>,
|
||||||
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
|
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
|
||||||
let df_schema = self.schema();
|
let df_schema = self.schema();
|
||||||
let schema: SchemaRef = Arc::new(
|
let schema: SchemaRef = Arc::new(
|
||||||
df_schema
|
df_schema
|
||||||
.try_into()
|
.try_into()
|
||||||
.context(error::ConvertArrowSchemaSnafu)
|
.context(error::ConvertArrowSchemaSnafu)?,
|
||||||
.map_err(error::Error::from)?,
|
|
||||||
);
|
);
|
||||||
let children = children
|
let children = children
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@@ -177,12 +178,12 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
|||||||
Ok(Arc::new(DfPhysicalPlanAdapter(plan)))
|
Ok(Arc::new(DfPhysicalPlanAdapter(plan)))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn execute(
|
fn execute(
|
||||||
&self,
|
&self,
|
||||||
partition: usize,
|
partition: usize,
|
||||||
runtime: Arc<RuntimeEnv>,
|
context: Arc<TaskContext>,
|
||||||
) -> DfResult<DfSendableRecordBatchStream> {
|
) -> DfResult<DfSendableRecordBatchStream> {
|
||||||
let stream = self.0.execute(partition, runtime)?;
|
let stream = self.0.execute(partition, context)?;
|
||||||
Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
|
Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -194,16 +195,16 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
|
use async_trait::async_trait;
|
||||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||||
use datafusion::arrow_print;
|
use datafusion::datasource::{DefaultTableSource, TableProvider as DfTableProvider, TableType};
|
||||||
use datafusion::datasource::TableProvider as DfTableProvider;
|
use datafusion::execution::context::{SessionContext, SessionState};
|
||||||
use datafusion::logical_plan::LogicalPlanBuilder;
|
|
||||||
use datafusion::physical_plan::collect;
|
use datafusion::physical_plan::collect;
|
||||||
use datafusion::physical_plan::empty::EmptyExec;
|
use datafusion::physical_plan::empty::EmptyExec;
|
||||||
use datafusion::prelude::ExecutionContext;
|
use datafusion_expr::logical_plan::builder::LogicalPlanBuilder;
|
||||||
use datafusion_common::field_util::SchemaExt;
|
use datafusion_expr::{Expr, TableSource};
|
||||||
use datafusion_expr::Expr;
|
|
||||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||||
|
use datatypes::arrow::util::pretty;
|
||||||
use datatypes::schema::Schema;
|
use datatypes::schema::Schema;
|
||||||
use datatypes::vectors::Int32Vector;
|
use datatypes::vectors::Int32Vector;
|
||||||
|
|
||||||
@@ -225,9 +226,14 @@ mod test {
|
|||||||
)]))
|
)]))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn table_type(&self) -> TableType {
|
||||||
|
TableType::Base
|
||||||
|
}
|
||||||
|
|
||||||
async fn scan(
|
async fn scan(
|
||||||
&self,
|
&self,
|
||||||
_projection: &Option<Vec<usize>>,
|
_ctx: &SessionState,
|
||||||
|
_projection: Option<&Vec<usize>>,
|
||||||
_filters: &[Expr],
|
_filters: &[Expr],
|
||||||
_limit: Option<usize>,
|
_limit: Option<usize>,
|
||||||
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
|
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
|
||||||
@@ -240,6 +246,14 @@ mod test {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl MyDfTableProvider {
|
||||||
|
fn table_source() -> Arc<dyn TableSource> {
|
||||||
|
Arc::new(DefaultTableSource {
|
||||||
|
table_provider: Arc::new(Self),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct MyExecutionPlan {
|
struct MyExecutionPlan {
|
||||||
schema: SchemaRef,
|
schema: SchemaRef,
|
||||||
@@ -269,7 +283,7 @@ mod test {
|
|||||||
fn execute(
|
fn execute(
|
||||||
&self,
|
&self,
|
||||||
_partition: usize,
|
_partition: usize,
|
||||||
_runtime: Arc<RuntimeEnv>,
|
_context: Arc<TaskContext>,
|
||||||
) -> Result<SendableRecordBatchStream> {
|
) -> Result<SendableRecordBatchStream> {
|
||||||
let schema = self.schema();
|
let schema = self.schema();
|
||||||
let recordbatches = RecordBatches::try_new(
|
let recordbatches = RecordBatches::try_new(
|
||||||
@@ -295,20 +309,26 @@ mod test {
|
|||||||
// Test our physical plan can be executed by DataFusion, through adapters.
|
// Test our physical plan can be executed by DataFusion, through adapters.
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_execute_physical_plan() {
|
async fn test_execute_physical_plan() {
|
||||||
let ctx = ExecutionContext::new();
|
let ctx = SessionContext::new();
|
||||||
let logical_plan = LogicalPlanBuilder::scan("test", Arc::new(MyDfTableProvider), None)
|
let logical_plan =
|
||||||
.unwrap()
|
LogicalPlanBuilder::scan("test", MyDfTableProvider::table_source(), None)
|
||||||
.build()
|
.unwrap()
|
||||||
.unwrap();
|
.build()
|
||||||
|
.unwrap();
|
||||||
let physical_plan = ctx.create_physical_plan(&logical_plan).await.unwrap();
|
let physical_plan = ctx.create_physical_plan(&logical_plan).await.unwrap();
|
||||||
let df_recordbatches = collect(physical_plan, Arc::new(RuntimeEnv::default()))
|
let df_recordbatches = collect(physical_plan, Arc::new(TaskContext::from(&ctx)))
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let pretty_print = arrow_print::write(&df_recordbatches);
|
let pretty_print = pretty::pretty_format_batches(&df_recordbatches).unwrap();
|
||||||
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
pretty_print,
|
pretty_print.to_string(),
|
||||||
vec!["+---+", "| a |", "+---+", "| 1 |", "| 2 |", "| 3 |", "+---+",]
|
r#"+---+
|
||||||
|
| a |
|
||||||
|
+---+
|
||||||
|
| 1 |
|
||||||
|
| 2 |
|
||||||
|
| 3 |
|
||||||
|
+---+"#
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
//! Signature module contains foundational types that are used to represent signatures, types,
|
//! Signature module contains foundational types that are used to represent signatures, types,
|
||||||
//! and return types of functions.
|
//! and return types of functions.
|
||||||
//! Copied and modified from datafusion.
|
//! Copied and modified from datafusion.
|
||||||
pub use datafusion::physical_plan::functions::Volatility;
|
pub use datafusion_expr::Volatility;
|
||||||
use datafusion_expr::{Signature as DfSignature, TypeSignature as DfTypeSignature};
|
use datafusion_expr::{Signature as DfSignature, TypeSignature as DfTypeSignature};
|
||||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||||
use datatypes::data_type::DataType;
|
use datatypes::data_type::DataType;
|
||||||
|
|||||||
@@ -1,15 +1,13 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "common-recordbatch"
|
name = "common-recordbatch"
|
||||||
version = "0.1.0"
|
version.workspace = true
|
||||||
edition = "2021"
|
edition.workspace = true
|
||||||
license = "Apache-2.0"
|
license.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
common-error = { path = "../error" }
|
common-error = { path = "../error" }
|
||||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
datafusion.workspace = true
|
||||||
"simd",
|
datafusion-common.workspace = true
|
||||||
] }
|
|
||||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
|
||||||
datatypes = { path = "../../datatypes" }
|
datatypes = { path = "../../datatypes" }
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
paste = "1.0"
|
paste = "1.0"
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ use std::task::{Context, Poll};
|
|||||||
|
|
||||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||||
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
|
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
|
||||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
|
||||||
use datafusion_common::DataFusionError;
|
use datafusion_common::DataFusionError;
|
||||||
use datatypes::arrow::error::{ArrowError, Result as ArrowResult};
|
use datatypes::arrow::error::{ArrowError, Result as ArrowResult};
|
||||||
use datatypes::schema::{Schema, SchemaRef};
|
use datatypes::schema::{Schema, SchemaRef};
|
||||||
@@ -28,7 +27,8 @@ use snafu::ResultExt;
|
|||||||
|
|
||||||
use crate::error::{self, Result};
|
use crate::error::{self, Result};
|
||||||
use crate::{
|
use crate::{
|
||||||
DfSendableRecordBatchStream, RecordBatch, RecordBatchStream, SendableRecordBatchStream, Stream,
|
DfRecordBatch, DfSendableRecordBatchStream, RecordBatch, RecordBatchStream,
|
||||||
|
SendableRecordBatchStream, Stream,
|
||||||
};
|
};
|
||||||
|
|
||||||
type FutureStream = Pin<
|
type FutureStream = Pin<
|
||||||
@@ -63,8 +63,8 @@ impl Stream for DfRecordBatchStreamAdapter {
|
|||||||
match Pin::new(&mut self.stream).poll_next(cx) {
|
match Pin::new(&mut self.stream).poll_next(cx) {
|
||||||
Poll::Pending => Poll::Pending,
|
Poll::Pending => Poll::Pending,
|
||||||
Poll::Ready(Some(recordbatch)) => match recordbatch {
|
Poll::Ready(Some(recordbatch)) => match recordbatch {
|
||||||
Ok(recordbatch) => Poll::Ready(Some(Ok(recordbatch.df_recordbatch))),
|
Ok(recordbatch) => Poll::Ready(Some(Ok(recordbatch.into_df_record_batch()))),
|
||||||
Err(e) => Poll::Ready(Some(Err(ArrowError::External("".to_owned(), Box::new(e))))),
|
Err(e) => Poll::Ready(Some(Err(ArrowError::ExternalError(Box::new(e))))),
|
||||||
},
|
},
|
||||||
Poll::Ready(None) => Poll::Ready(None),
|
Poll::Ready(None) => Poll::Ready(None),
|
||||||
}
|
}
|
||||||
@@ -102,10 +102,13 @@ impl Stream for RecordBatchStreamAdapter {
|
|||||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||||
match Pin::new(&mut self.stream).poll_next(cx) {
|
match Pin::new(&mut self.stream).poll_next(cx) {
|
||||||
Poll::Pending => Poll::Pending,
|
Poll::Pending => Poll::Pending,
|
||||||
Poll::Ready(Some(df_recordbatch)) => Poll::Ready(Some(Ok(RecordBatch {
|
Poll::Ready(Some(df_record_batch)) => {
|
||||||
schema: self.schema(),
|
let df_record_batch = df_record_batch.context(error::PollStreamSnafu)?;
|
||||||
df_recordbatch: df_recordbatch.context(error::PollStreamSnafu)?,
|
Poll::Ready(Some(RecordBatch::try_from_df_record_batch(
|
||||||
}))),
|
self.schema(),
|
||||||
|
df_record_batch,
|
||||||
|
)))
|
||||||
|
}
|
||||||
Poll::Ready(None) => Poll::Ready(None),
|
Poll::Ready(None) => Poll::Ready(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -118,7 +121,8 @@ impl Stream for RecordBatchStreamAdapter {
|
|||||||
|
|
||||||
enum AsyncRecordBatchStreamAdapterState {
|
enum AsyncRecordBatchStreamAdapterState {
|
||||||
Uninit(FutureStream),
|
Uninit(FutureStream),
|
||||||
Inited(std::result::Result<DfSendableRecordBatchStream, DataFusionError>),
|
Ready(DfSendableRecordBatchStream),
|
||||||
|
Failed,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct AsyncRecordBatchStreamAdapter {
|
pub struct AsyncRecordBatchStreamAdapter {
|
||||||
@@ -148,31 +152,26 @@ impl Stream for AsyncRecordBatchStreamAdapter {
|
|||||||
loop {
|
loop {
|
||||||
match &mut self.state {
|
match &mut self.state {
|
||||||
AsyncRecordBatchStreamAdapterState::Uninit(stream_future) => {
|
AsyncRecordBatchStreamAdapterState::Uninit(stream_future) => {
|
||||||
self.state = AsyncRecordBatchStreamAdapterState::Inited(ready!(Pin::new(
|
match ready!(Pin::new(stream_future).poll(cx)) {
|
||||||
stream_future
|
Ok(stream) => {
|
||||||
)
|
self.state = AsyncRecordBatchStreamAdapterState::Ready(stream);
|
||||||
.poll(cx)));
|
continue;
|
||||||
continue;
|
}
|
||||||
|
Err(e) => {
|
||||||
|
self.state = AsyncRecordBatchStreamAdapterState::Failed;
|
||||||
|
return Poll::Ready(Some(
|
||||||
|
Err(e).context(error::InitRecordbatchStreamSnafu),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
AsyncRecordBatchStreamAdapterState::Inited(stream) => match stream {
|
AsyncRecordBatchStreamAdapterState::Ready(stream) => {
|
||||||
Ok(stream) => {
|
return Poll::Ready(ready!(Pin::new(stream).poll_next(cx)).map(|x| {
|
||||||
return Poll::Ready(ready!(Pin::new(stream).poll_next(cx)).map(|df| {
|
let df_record_batch = x.context(error::PollStreamSnafu)?;
|
||||||
Ok(RecordBatch {
|
RecordBatch::try_from_df_record_batch(self.schema(), df_record_batch)
|
||||||
schema: self.schema(),
|
}))
|
||||||
df_recordbatch: df.context(error::PollStreamSnafu)?,
|
}
|
||||||
})
|
AsyncRecordBatchStreamAdapterState::Failed => return Poll::Ready(None),
|
||||||
}));
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
return Poll::Ready(Some(
|
|
||||||
error::CreateRecordBatchesSnafu {
|
|
||||||
reason: format!("Read error {:?} from stream", e),
|
|
||||||
}
|
|
||||||
.fail()
|
|
||||||
.map_err(|e| e.into()),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -183,3 +182,104 @@ impl Stream for AsyncRecordBatchStreamAdapter {
|
|||||||
(0, None)
|
(0, None)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use common_error::mock::MockError;
|
||||||
|
use common_error::prelude::{BoxedError, StatusCode};
|
||||||
|
use datatypes::prelude::ConcreteDataType;
|
||||||
|
use datatypes::schema::ColumnSchema;
|
||||||
|
use datatypes::vectors::Int32Vector;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use crate::RecordBatches;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_async_recordbatch_stream_adaptor() {
|
||||||
|
struct MaybeErrorRecordBatchStream {
|
||||||
|
items: Vec<Result<RecordBatch>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RecordBatchStream for MaybeErrorRecordBatchStream {
|
||||||
|
fn schema(&self) -> SchemaRef {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Stream for MaybeErrorRecordBatchStream {
|
||||||
|
type Item = Result<RecordBatch>;
|
||||||
|
|
||||||
|
fn poll_next(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
_: &mut Context<'_>,
|
||||||
|
) -> Poll<Option<Self::Item>> {
|
||||||
|
if let Some(batch) = self.items.pop() {
|
||||||
|
Poll::Ready(Some(Ok(batch?)))
|
||||||
|
} else {
|
||||||
|
Poll::Ready(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_future_stream(
|
||||||
|
maybe_recordbatches: Result<Vec<Result<RecordBatch>>>,
|
||||||
|
) -> FutureStream {
|
||||||
|
Box::pin(async move {
|
||||||
|
maybe_recordbatches
|
||||||
|
.map(|items| {
|
||||||
|
Box::pin(DfRecordBatchStreamAdapter::new(Box::pin(
|
||||||
|
MaybeErrorRecordBatchStream { items },
|
||||||
|
))) as _
|
||||||
|
})
|
||||||
|
.map_err(|e| DataFusionError::External(Box::new(e)))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
let schema = Arc::new(Schema::new(vec![ColumnSchema::new(
|
||||||
|
"a",
|
||||||
|
ConcreteDataType::int32_datatype(),
|
||||||
|
false,
|
||||||
|
)]));
|
||||||
|
let batch1 = RecordBatch::new(
|
||||||
|
schema.clone(),
|
||||||
|
vec![Arc::new(Int32Vector::from_slice(&[1])) as _],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let batch2 = RecordBatch::new(
|
||||||
|
schema.clone(),
|
||||||
|
vec![Arc::new(Int32Vector::from_slice(&[2])) as _],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let success_stream = new_future_stream(Ok(vec![Ok(batch1.clone()), Ok(batch2.clone())]));
|
||||||
|
let adapter = AsyncRecordBatchStreamAdapter::new(schema.clone(), success_stream);
|
||||||
|
let collected = RecordBatches::try_collect(Box::pin(adapter)).await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
collected,
|
||||||
|
RecordBatches::try_new(schema.clone(), vec![batch2.clone(), batch1.clone()]).unwrap()
|
||||||
|
);
|
||||||
|
|
||||||
|
let poll_err_stream = new_future_stream(Ok(vec![
|
||||||
|
Ok(batch1.clone()),
|
||||||
|
Err(error::Error::External {
|
||||||
|
source: BoxedError::new(MockError::new(StatusCode::Unknown)),
|
||||||
|
}),
|
||||||
|
]));
|
||||||
|
let adapter = AsyncRecordBatchStreamAdapter::new(schema.clone(), poll_err_stream);
|
||||||
|
let result = RecordBatches::try_collect(Box::pin(adapter)).await;
|
||||||
|
assert_eq!(
|
||||||
|
result.unwrap_err().to_string(),
|
||||||
|
"Failed to poll stream, source: External error: External error, source: Unknown"
|
||||||
|
);
|
||||||
|
|
||||||
|
let failed_to_init_stream = new_future_stream(Err(error::Error::External {
|
||||||
|
source: BoxedError::new(MockError::new(StatusCode::Internal)),
|
||||||
|
}));
|
||||||
|
let adapter = AsyncRecordBatchStreamAdapter::new(schema.clone(), failed_to_init_stream);
|
||||||
|
let result = RecordBatches::try_collect(Box::pin(adapter)).await;
|
||||||
|
assert_eq!(
|
||||||
|
result.unwrap_err().to_string(),
|
||||||
|
"Failed to init Recordbatch stream, source: External error: External error, source: Internal"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -17,13 +17,12 @@ use std::any::Any;
|
|||||||
|
|
||||||
use common_error::ext::BoxedError;
|
use common_error::ext::BoxedError;
|
||||||
use common_error::prelude::*;
|
use common_error::prelude::*;
|
||||||
common_error::define_opaque_error!(Error);
|
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
|
|
||||||
#[derive(Debug, Snafu)]
|
#[derive(Debug, Snafu)]
|
||||||
#[snafu(visibility(pub))]
|
#[snafu(visibility(pub))]
|
||||||
pub enum InnerError {
|
pub enum Error {
|
||||||
#[snafu(display("Fail to create datafusion record batch, source: {}", source))]
|
#[snafu(display("Fail to create datafusion record batch, source: {}", source))]
|
||||||
NewDfRecordBatch {
|
NewDfRecordBatch {
|
||||||
source: datatypes::arrow::error::ArrowError,
|
source: datatypes::arrow::error::ArrowError,
|
||||||
@@ -59,20 +58,34 @@ pub enum InnerError {
|
|||||||
source: datatypes::arrow::error::ArrowError,
|
source: datatypes::arrow::error::ArrowError,
|
||||||
backtrace: Backtrace,
|
backtrace: Backtrace,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Fail to format record batch, source: {}", source))]
|
||||||
|
Format {
|
||||||
|
source: datatypes::arrow::error::ArrowError,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[snafu(display("Failed to init Recordbatch stream, source: {}", source))]
|
||||||
|
InitRecordbatchStream {
|
||||||
|
source: datafusion_common::DataFusionError,
|
||||||
|
backtrace: Backtrace,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ErrorExt for InnerError {
|
impl ErrorExt for Error {
|
||||||
fn status_code(&self) -> StatusCode {
|
fn status_code(&self) -> StatusCode {
|
||||||
match self {
|
match self {
|
||||||
InnerError::NewDfRecordBatch { .. } => StatusCode::InvalidArguments,
|
Error::NewDfRecordBatch { .. } => StatusCode::InvalidArguments,
|
||||||
|
|
||||||
InnerError::DataTypes { .. }
|
Error::DataTypes { .. }
|
||||||
| InnerError::CreateRecordBatches { .. }
|
| Error::CreateRecordBatches { .. }
|
||||||
| InnerError::PollStream { .. } => StatusCode::Internal,
|
| Error::PollStream { .. }
|
||||||
|
| Error::Format { .. }
|
||||||
|
| Error::InitRecordbatchStream { .. } => StatusCode::Internal,
|
||||||
|
|
||||||
InnerError::External { source } => source.status_code(),
|
Error::External { source } => source.status_code(),
|
||||||
|
|
||||||
InnerError::SchemaConversion { source, .. } => source.status_code(),
|
Error::SchemaConversion { source, .. } => source.status_code(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -84,9 +97,3 @@ impl ErrorExt for InnerError {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<InnerError> for Error {
|
|
||||||
fn from(e: InnerError) -> Error {
|
|
||||||
Error::new(e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -20,16 +20,17 @@ pub mod util;
|
|||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datafusion::arrow_print;
|
|
||||||
use datafusion::physical_plan::memory::MemoryStream;
|
use datafusion::physical_plan::memory::MemoryStream;
|
||||||
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||||
|
pub use datatypes::arrow::record_batch::RecordBatch as DfRecordBatch;
|
||||||
|
use datatypes::arrow::util::pretty;
|
||||||
use datatypes::prelude::VectorRef;
|
use datatypes::prelude::VectorRef;
|
||||||
use datatypes::schema::{Schema, SchemaRef};
|
use datatypes::schema::{Schema, SchemaRef};
|
||||||
use error::Result;
|
use error::Result;
|
||||||
use futures::task::{Context, Poll};
|
use futures::task::{Context, Poll};
|
||||||
use futures::{Stream, TryStreamExt};
|
use futures::{Stream, TryStreamExt};
|
||||||
pub use recordbatch::RecordBatch;
|
pub use recordbatch::RecordBatch;
|
||||||
use snafu::ensure;
|
use snafu::{ensure, ResultExt};
|
||||||
|
|
||||||
pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
|
pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
|
||||||
fn schema(&self) -> SchemaRef;
|
fn schema(&self) -> SchemaRef;
|
||||||
@@ -65,7 +66,7 @@ impl Stream for EmptyRecordBatchStream {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct RecordBatches {
|
pub struct RecordBatches {
|
||||||
schema: SchemaRef,
|
schema: SchemaRef,
|
||||||
batches: Vec<RecordBatch>,
|
batches: Vec<RecordBatch>,
|
||||||
@@ -98,17 +99,18 @@ impl RecordBatches {
|
|||||||
self.batches.iter()
|
self.batches.iter()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn pretty_print(&self) -> String {
|
pub fn pretty_print(&self) -> Result<String> {
|
||||||
arrow_print::write(
|
let df_batches = &self
|
||||||
&self
|
.iter()
|
||||||
.iter()
|
.map(|x| x.df_record_batch().clone())
|
||||||
.map(|x| x.df_recordbatch.clone())
|
.collect::<Vec<_>>();
|
||||||
.collect::<Vec<_>>(),
|
let result = pretty::pretty_format_batches(df_batches).context(error::FormatSnafu)?;
|
||||||
)
|
|
||||||
|
Ok(result.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn try_new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Result<Self> {
|
pub fn try_new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Result<Self> {
|
||||||
for batch in batches.iter() {
|
for batch in &batches {
|
||||||
ensure!(
|
ensure!(
|
||||||
batch.schema == schema,
|
batch.schema == schema,
|
||||||
error::CreateRecordBatchesSnafu {
|
error::CreateRecordBatchesSnafu {
|
||||||
@@ -144,7 +146,7 @@ impl RecordBatches {
|
|||||||
let df_record_batches = self
|
let df_record_batches = self
|
||||||
.batches
|
.batches
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|batch| batch.df_recordbatch)
|
.map(|batch| batch.into_df_record_batch())
|
||||||
.collect();
|
.collect();
|
||||||
// unwrap safety: `MemoryStream::try_new` won't fail
|
// unwrap safety: `MemoryStream::try_new` won't fail
|
||||||
Box::pin(
|
Box::pin(
|
||||||
@@ -229,8 +231,7 @@ mod tests {
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.unwrap_err().to_string(),
|
result.unwrap_err().to_string(),
|
||||||
format!(
|
format!(
|
||||||
"Failed to create RecordBatches, reason: expect RecordBatch schema equals {:?}, actual: {:?}",
|
"Failed to create RecordBatches, reason: expect RecordBatch schema equals {schema1:?}, actual: {schema2:?}",
|
||||||
schema1, schema2
|
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -242,7 +243,7 @@ mod tests {
|
|||||||
| 1 | hello |
|
| 1 | hello |
|
||||||
| 2 | world |
|
| 2 | world |
|
||||||
+---+-------+";
|
+---+-------+";
|
||||||
assert_eq!(batches.pretty_print(), expected);
|
assert_eq!(batches.pretty_print().unwrap(), expected);
|
||||||
|
|
||||||
assert_eq!(schema1, batches.schema());
|
assert_eq!(schema1, batches.schema());
|
||||||
assert_eq!(vec![batch1], batches.take());
|
assert_eq!(vec![batch1], batches.take());
|
||||||
|
|||||||
@@ -12,8 +12,6 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
|
||||||
use datatypes::arrow_array::arrow_array_get;
|
|
||||||
use datatypes::schema::SchemaRef;
|
use datatypes::schema::SchemaRef;
|
||||||
use datatypes::value::Value;
|
use datatypes::value::Value;
|
||||||
use datatypes::vectors::{Helper, VectorRef};
|
use datatypes::vectors::{Helper, VectorRef};
|
||||||
@@ -22,32 +20,88 @@ use serde::{Serialize, Serializer};
|
|||||||
use snafu::ResultExt;
|
use snafu::ResultExt;
|
||||||
|
|
||||||
use crate::error::{self, Result};
|
use crate::error::{self, Result};
|
||||||
|
use crate::DfRecordBatch;
|
||||||
|
|
||||||
// TODO(yingwen): We should hold vectors in the RecordBatch.
|
/// A two-dimensional batch of column-oriented data with a defined schema.
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub struct RecordBatch {
|
pub struct RecordBatch {
|
||||||
pub schema: SchemaRef,
|
pub schema: SchemaRef,
|
||||||
pub df_recordbatch: DfRecordBatch,
|
columns: Vec<VectorRef>,
|
||||||
|
df_record_batch: DfRecordBatch,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RecordBatch {
|
impl RecordBatch {
|
||||||
|
/// Create a new [`RecordBatch`] from `schema` and `columns`.
|
||||||
pub fn new<I: IntoIterator<Item = VectorRef>>(
|
pub fn new<I: IntoIterator<Item = VectorRef>>(
|
||||||
schema: SchemaRef,
|
schema: SchemaRef,
|
||||||
columns: I,
|
columns: I,
|
||||||
) -> Result<RecordBatch> {
|
) -> Result<RecordBatch> {
|
||||||
let arrow_arrays = columns.into_iter().map(|v| v.to_arrow_array()).collect();
|
let columns: Vec<_> = columns.into_iter().collect();
|
||||||
|
let arrow_arrays = columns.iter().map(|v| v.to_arrow_array()).collect();
|
||||||
|
|
||||||
let df_recordbatch = DfRecordBatch::try_new(schema.arrow_schema().clone(), arrow_arrays)
|
let df_record_batch = DfRecordBatch::try_new(schema.arrow_schema().clone(), arrow_arrays)
|
||||||
.context(error::NewDfRecordBatchSnafu)?;
|
.context(error::NewDfRecordBatchSnafu)?;
|
||||||
|
|
||||||
Ok(RecordBatch {
|
Ok(RecordBatch {
|
||||||
schema,
|
schema,
|
||||||
df_recordbatch,
|
columns,
|
||||||
|
df_record_batch,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a new [`RecordBatch`] from `schema` and `df_record_batch`.
|
||||||
|
///
|
||||||
|
/// This method doesn't check the schema.
|
||||||
|
pub fn try_from_df_record_batch(
|
||||||
|
schema: SchemaRef,
|
||||||
|
df_record_batch: DfRecordBatch,
|
||||||
|
) -> Result<RecordBatch> {
|
||||||
|
let columns = df_record_batch
|
||||||
|
.columns()
|
||||||
|
.iter()
|
||||||
|
.map(|c| Helper::try_into_vector(c.clone()).context(error::DataTypesSnafu))
|
||||||
|
.collect::<Result<Vec<_>>>()?;
|
||||||
|
|
||||||
|
Ok(RecordBatch {
|
||||||
|
schema,
|
||||||
|
columns,
|
||||||
|
df_record_batch,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn df_record_batch(&self) -> &DfRecordBatch {
|
||||||
|
&self.df_record_batch
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn into_df_record_batch(self) -> DfRecordBatch {
|
||||||
|
self.df_record_batch
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn columns(&self) -> &[VectorRef] {
|
||||||
|
&self.columns
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn column(&self, idx: usize) -> &VectorRef {
|
||||||
|
&self.columns[idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn column_by_name(&self, name: &str) -> Option<&VectorRef> {
|
||||||
|
let idx = self.schema.column_index_by_name(name)?;
|
||||||
|
Some(&self.columns[idx])
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn num_columns(&self) -> usize {
|
||||||
|
self.columns.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn num_rows(&self) -> usize {
|
pub fn num_rows(&self) -> usize {
|
||||||
self.df_recordbatch.num_rows()
|
self.df_record_batch.num_rows()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create an iterator to traverse the data by row
|
/// Create an iterator to traverse the data by row
|
||||||
@@ -61,14 +115,15 @@ impl Serialize for RecordBatch {
|
|||||||
where
|
where
|
||||||
S: Serializer,
|
S: Serializer,
|
||||||
{
|
{
|
||||||
|
// TODO(yingwen): arrow and arrow2's schemas have different fields, so
|
||||||
|
// it might be better to use our `RawSchema` as serialized field.
|
||||||
let mut s = serializer.serialize_struct("record", 2)?;
|
let mut s = serializer.serialize_struct("record", 2)?;
|
||||||
s.serialize_field("schema", &self.schema.arrow_schema())?;
|
s.serialize_field("schema", &**self.schema.arrow_schema())?;
|
||||||
|
|
||||||
let df_columns = self.df_recordbatch.columns();
|
let vec = self
|
||||||
|
.columns
|
||||||
let vec = df_columns
|
|
||||||
.iter()
|
.iter()
|
||||||
.map(|c| Helper::try_into_vector(c.clone())?.serialize_to_json())
|
.map(|c| c.serialize_to_json())
|
||||||
.collect::<std::result::Result<Vec<_>, _>>()
|
.collect::<std::result::Result<Vec<_>, _>>()
|
||||||
.map_err(S::Error::custom)?;
|
.map_err(S::Error::custom)?;
|
||||||
|
|
||||||
@@ -88,8 +143,8 @@ impl<'a> RecordBatchRowIterator<'a> {
|
|||||||
fn new(record_batch: &'a RecordBatch) -> RecordBatchRowIterator {
|
fn new(record_batch: &'a RecordBatch) -> RecordBatchRowIterator {
|
||||||
RecordBatchRowIterator {
|
RecordBatchRowIterator {
|
||||||
record_batch,
|
record_batch,
|
||||||
rows: record_batch.df_recordbatch.num_rows(),
|
rows: record_batch.df_record_batch.num_rows(),
|
||||||
columns: record_batch.df_recordbatch.num_columns(),
|
columns: record_batch.df_record_batch.num_columns(),
|
||||||
row_cursor: 0,
|
row_cursor: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -104,15 +159,9 @@ impl<'a> Iterator for RecordBatchRowIterator<'a> {
|
|||||||
} else {
|
} else {
|
||||||
let mut row = Vec::with_capacity(self.columns);
|
let mut row = Vec::with_capacity(self.columns);
|
||||||
|
|
||||||
// TODO(yingwen): Get from the vector if RecordBatch also holds vectors.
|
|
||||||
for col in 0..self.columns {
|
for col in 0..self.columns {
|
||||||
let column_array = self.record_batch.df_recordbatch.column(col);
|
let column = self.record_batch.column(col);
|
||||||
match arrow_array_get(column_array.as_ref(), self.row_cursor)
|
row.push(column.get(self.row_cursor));
|
||||||
.context(error::DataTypesSnafu)
|
|
||||||
{
|
|
||||||
Ok(field) => row.push(field),
|
|
||||||
Err(e) => return Some(Err(e.into())),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self.row_cursor += 1;
|
self.row_cursor += 1;
|
||||||
@@ -125,63 +174,60 @@ impl<'a> Iterator for RecordBatchRowIterator<'a> {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datafusion_common::field_util::SchemaExt;
|
|
||||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
|
||||||
use datatypes::arrow::array::UInt32Array;
|
|
||||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||||
use datatypes::prelude::*;
|
use datatypes::data_type::ConcreteDataType;
|
||||||
use datatypes::schema::{ColumnSchema, Schema};
|
use datatypes::schema::{ColumnSchema, Schema};
|
||||||
use datatypes::vectors::{StringVector, UInt32Vector, Vector};
|
use datatypes::vectors::{StringVector, UInt32Vector};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_new_record_batch() {
|
fn test_record_batch() {
|
||||||
let arrow_schema = Arc::new(ArrowSchema::new(vec![
|
let arrow_schema = Arc::new(ArrowSchema::new(vec![
|
||||||
Field::new("c1", DataType::UInt32, false),
|
Field::new("c1", DataType::UInt32, false),
|
||||||
Field::new("c2", DataType::UInt32, false),
|
Field::new("c2", DataType::UInt32, false),
|
||||||
]));
|
]));
|
||||||
let schema = Arc::new(Schema::try_from(arrow_schema).unwrap());
|
let schema = Arc::new(Schema::try_from(arrow_schema).unwrap());
|
||||||
|
|
||||||
let v = Arc::new(UInt32Vector::from_slice(&[1, 2, 3]));
|
let c1 = Arc::new(UInt32Vector::from_slice(&[1, 2, 3]));
|
||||||
let columns: Vec<VectorRef> = vec![v.clone(), v.clone()];
|
let c2 = Arc::new(UInt32Vector::from_slice(&[4, 5, 6]));
|
||||||
|
let columns: Vec<VectorRef> = vec![c1, c2];
|
||||||
|
|
||||||
let batch = RecordBatch::new(schema.clone(), columns).unwrap();
|
let batch = RecordBatch::new(schema.clone(), columns.clone()).unwrap();
|
||||||
let expect = v.to_arrow_array();
|
assert_eq!(3, batch.num_rows());
|
||||||
for column in batch.df_recordbatch.columns() {
|
assert_eq!(&columns, batch.columns());
|
||||||
let array = column.as_any().downcast_ref::<UInt32Array>().unwrap();
|
for (i, expect) in columns.iter().enumerate().take(batch.num_columns()) {
|
||||||
assert_eq!(
|
let column = batch.column(i);
|
||||||
expect.as_any().downcast_ref::<UInt32Array>().unwrap(),
|
assert_eq!(expect, column);
|
||||||
array
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
assert_eq!(schema, batch.schema);
|
assert_eq!(schema, batch.schema);
|
||||||
|
|
||||||
|
assert_eq!(columns[0], *batch.column_by_name("c1").unwrap());
|
||||||
|
assert_eq!(columns[1], *batch.column_by_name("c2").unwrap());
|
||||||
|
assert!(batch.column_by_name("c3").is_none());
|
||||||
|
|
||||||
|
let converted =
|
||||||
|
RecordBatch::try_from_df_record_batch(schema, batch.df_record_batch().clone()).unwrap();
|
||||||
|
assert_eq!(batch, converted);
|
||||||
|
assert_eq!(*batch.df_record_batch(), converted.into_df_record_batch());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn test_serialize_recordbatch() {
|
pub fn test_serialize_recordbatch() {
|
||||||
let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
|
let column_schemas = vec![ColumnSchema::new(
|
||||||
"number",
|
"number",
|
||||||
DataType::UInt32,
|
ConcreteDataType::uint32_datatype(),
|
||||||
false,
|
false,
|
||||||
)]));
|
)];
|
||||||
let schema = Arc::new(Schema::try_from(arrow_schema.clone()).unwrap());
|
let schema = Arc::new(Schema::try_new(column_schemas).unwrap());
|
||||||
|
|
||||||
let numbers: Vec<u32> = (0..10).collect();
|
let numbers: Vec<u32> = (0..10).collect();
|
||||||
let df_batch = DfRecordBatch::try_new(
|
let columns = vec![Arc::new(UInt32Vector::from_slice(&numbers)) as VectorRef];
|
||||||
arrow_schema,
|
let batch = RecordBatch::new(schema, columns).unwrap();
|
||||||
vec![Arc::new(UInt32Array::from_slice(&numbers))],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let batch = RecordBatch {
|
|
||||||
schema,
|
|
||||||
df_recordbatch: df_batch,
|
|
||||||
};
|
|
||||||
|
|
||||||
let output = serde_json::to_string(&batch).unwrap();
|
let output = serde_json::to_string(&batch).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
r#"{"schema":{"fields":[{"name":"number","data_type":"UInt32","is_nullable":false,"metadata":{}}],"metadata":{}},"columns":[[0,1,2,3,4,5,6,7,8,9]]}"#,
|
r#"{"schema":{"fields":[{"name":"number","data_type":"UInt32","nullable":false,"dict_id":0,"dict_is_ordered":false,"metadata":{}}],"metadata":{"greptime:version":"0"}},"columns":[[0,1,2,3,4,5,6,7,8,9]]}"#,
|
||||||
output
|
output
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,23 +15,29 @@
|
|||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
|
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::{RecordBatch, SendableRecordBatchStream};
|
use crate::{RecordBatch, RecordBatches, SendableRecordBatchStream};
|
||||||
|
|
||||||
|
/// Collect all the items from the stream into a vector of [`RecordBatch`].
|
||||||
pub async fn collect(stream: SendableRecordBatchStream) -> Result<Vec<RecordBatch>> {
|
pub async fn collect(stream: SendableRecordBatchStream) -> Result<Vec<RecordBatch>> {
|
||||||
stream.try_collect::<Vec<_>>().await
|
stream.try_collect::<Vec<_>>().await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Collect all the items from the stream into [RecordBatches].
|
||||||
|
pub async fn collect_batches(stream: SendableRecordBatchStream) -> Result<RecordBatches> {
|
||||||
|
let schema = stream.schema();
|
||||||
|
let batches = stream.try_collect::<Vec<_>>().await?;
|
||||||
|
RecordBatches::try_new(schema, batches)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use datafusion_common::field_util::SchemaExt;
|
use datatypes::prelude::*;
|
||||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||||
use datatypes::arrow::array::UInt32Array;
|
use datatypes::vectors::UInt32Vector;
|
||||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
|
||||||
use datatypes::schema::{Schema, SchemaRef};
|
|
||||||
use futures::task::{Context, Poll};
|
use futures::task::{Context, Poll};
|
||||||
use futures::Stream;
|
use futures::Stream;
|
||||||
|
|
||||||
@@ -65,12 +71,13 @@ mod tests {
|
|||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_collect() {
|
async fn test_collect() {
|
||||||
let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
|
let column_schemas = vec![ColumnSchema::new(
|
||||||
"number",
|
"number",
|
||||||
DataType::UInt32,
|
ConcreteDataType::uint32_datatype(),
|
||||||
false,
|
false,
|
||||||
)]));
|
)];
|
||||||
let schema = Arc::new(Schema::try_from(arrow_schema.clone()).unwrap());
|
|
||||||
|
let schema = Arc::new(Schema::try_new(column_schemas).unwrap());
|
||||||
|
|
||||||
let stream = MockRecordBatchStream {
|
let stream = MockRecordBatchStream {
|
||||||
schema: schema.clone(),
|
schema: schema.clone(),
|
||||||
@@ -81,24 +88,23 @@ mod tests {
|
|||||||
assert_eq!(0, batches.len());
|
assert_eq!(0, batches.len());
|
||||||
|
|
||||||
let numbers: Vec<u32> = (0..10).collect();
|
let numbers: Vec<u32> = (0..10).collect();
|
||||||
let df_batch = DfRecordBatch::try_new(
|
let columns = [Arc::new(UInt32Vector::from_vec(numbers)) as _];
|
||||||
arrow_schema.clone(),
|
let batch = RecordBatch::new(schema.clone(), columns).unwrap();
|
||||||
vec![Arc::new(UInt32Array::from_slice(&numbers))],
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let batch = RecordBatch {
|
|
||||||
schema: schema.clone(),
|
|
||||||
df_recordbatch: df_batch,
|
|
||||||
};
|
|
||||||
|
|
||||||
let stream = MockRecordBatchStream {
|
let stream = MockRecordBatchStream {
|
||||||
schema: Arc::new(Schema::try_from(arrow_schema).unwrap()),
|
schema: schema.clone(),
|
||||||
batch: Some(batch.clone()),
|
batch: Some(batch.clone()),
|
||||||
};
|
};
|
||||||
let batches = collect(Box::pin(stream)).await.unwrap();
|
let batches = collect(Box::pin(stream)).await.unwrap();
|
||||||
assert_eq!(1, batches.len());
|
assert_eq!(1, batches.len());
|
||||||
|
|
||||||
assert_eq!(batch, batches[0]);
|
assert_eq!(batch, batches[0]);
|
||||||
|
|
||||||
|
let stream = MockRecordBatchStream {
|
||||||
|
schema: schema.clone(),
|
||||||
|
batch: Some(batch.clone()),
|
||||||
|
};
|
||||||
|
let batches = collect_batches(Box::pin(stream)).await.unwrap();
|
||||||
|
let expect_batches = RecordBatches::try_new(schema.clone(), vec![batch]).unwrap();
|
||||||
|
assert_eq!(expect_batches, batches);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user