mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-23 06:30:05 +00:00
Compare commits
45 Commits
v0.1.0-alp
...
replace-ar
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8630cdb38 | ||
|
|
0f3dcc1b38 | ||
|
|
7c696dae08 | ||
|
|
142dee41d6 | ||
|
|
ce6d1cb7d1 | ||
|
|
dbb3034ecb | ||
|
|
652d59a643 | ||
|
|
fa971c6513 | ||
|
|
36c929e1a7 | ||
|
|
a712382fba | ||
|
|
4b644aa482 | ||
|
|
4defde055c | ||
|
|
95b2d8654f | ||
|
|
42fdc7251a | ||
|
|
d0892bf0b7 | ||
|
|
fff530cb50 | ||
|
|
b936d8b18a | ||
|
|
1bde1ba399 | ||
|
|
3687bc7346 | ||
|
|
587bdc9800 | ||
|
|
58c26def6b | ||
|
|
6f3baf96b0 | ||
|
|
a898f846d1 | ||
|
|
a562199455 | ||
|
|
fb0b4eb826 | ||
|
|
2ba99259e1 | ||
|
|
551cde23b1 | ||
|
|
653906d4fa | ||
|
|
829ff491c4 | ||
|
|
b32438e78c | ||
|
|
0ccb8b4302 | ||
|
|
b48ae21b71 | ||
|
|
3c0adb00f3 | ||
|
|
8c66b7d000 | ||
|
|
99371fd31b | ||
|
|
fe505fecfd | ||
|
|
cc1ec26416 | ||
|
|
504059a699 | ||
|
|
7151deb4ed | ||
|
|
d0686f9c19 | ||
|
|
221f3e9d2e | ||
|
|
61c4a3691a | ||
|
|
d7626fd6af | ||
|
|
e3201a4705 | ||
|
|
571a84d91b |
1562
Cargo.lock
generated
1562
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -20,7 +20,6 @@ members = [
|
||||
"src/common/time",
|
||||
"src/datanode",
|
||||
"src/datatypes",
|
||||
"src/datatypes2",
|
||||
"src/frontend",
|
||||
"src/log-store",
|
||||
"src/meta-client",
|
||||
|
||||
@@ -5,10 +5,10 @@ edition = "2021"
|
||||
license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
arrow = "10"
|
||||
arrow = "26.0.0"
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
client = { path = "../src/client" }
|
||||
indicatif = "0.17.1"
|
||||
itertools = "0.10.5"
|
||||
parquet = { version = "*" }
|
||||
parquet = "26.0.0"
|
||||
tokio = { version = "1.21", features = ["full"] }
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use arrow::array::{ArrayRef, PrimitiveArray, StringArray, TimestampNanosecondArray};
|
||||
@@ -32,9 +31,7 @@ use client::api::v1::column::Values;
|
||||
use client::api::v1::{Column, ColumnDataType, ColumnDef, CreateExpr, InsertExpr};
|
||||
use client::{Client, Database, Select};
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use parquet::arrow::{ArrowReader, ParquetFileArrowReader};
|
||||
use parquet::file::reader::FileReader;
|
||||
use parquet::file::serialized_reader::SerializedFileReader;
|
||||
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
|
||||
use tokio::task::JoinSet;
|
||||
|
||||
const DATABASE_NAME: &str = "greptime";
|
||||
@@ -86,10 +83,14 @@ async fn write_data(
|
||||
pb_style: ProgressStyle,
|
||||
) -> u128 {
|
||||
let file = std::fs::File::open(&path).unwrap();
|
||||
let file_reader = Arc::new(SerializedFileReader::new(file).unwrap());
|
||||
let row_num = file_reader.metadata().file_metadata().num_rows();
|
||||
let record_batch_reader = ParquetFileArrowReader::new(file_reader)
|
||||
.get_record_reader(batch_size)
|
||||
let record_batch_reader_builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
|
||||
let row_num = record_batch_reader_builder
|
||||
.metadata()
|
||||
.file_metadata()
|
||||
.num_rows();
|
||||
let record_batch_reader = record_batch_reader_builder
|
||||
.with_batch_size(batch_size)
|
||||
.build()
|
||||
.unwrap();
|
||||
let progress_bar = mpb.add(ProgressBar::new(row_num as _));
|
||||
progress_bar.set_style(pb_style);
|
||||
@@ -210,9 +211,10 @@ fn build_values(column: &ArrayRef) -> Values {
|
||||
| DataType::FixedSizeList(_, _)
|
||||
| DataType::LargeList(_)
|
||||
| DataType::Struct(_)
|
||||
| DataType::Union(_, _)
|
||||
| DataType::Union(_, _, _)
|
||||
| DataType::Dictionary(_, _)
|
||||
| DataType::Decimal(_, _)
|
||||
| DataType::Decimal128(_, _)
|
||||
| DataType::Decimal256(_, _)
|
||||
| DataType::Map(_, _) => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,7 +32,10 @@ message Column {
|
||||
|
||||
repeated int32 date_values = 14;
|
||||
repeated int64 datetime_values = 15;
|
||||
repeated int64 ts_millis_values = 16;
|
||||
repeated int64 ts_second_values = 16;
|
||||
repeated int64 ts_millisecond_values = 17;
|
||||
repeated int64 ts_microsecond_values = 18;
|
||||
repeated int64 ts_nanosecond_values = 19;
|
||||
}
|
||||
// The array of non-null values in this column.
|
||||
//
|
||||
@@ -75,5 +78,8 @@ enum ColumnDataType {
|
||||
STRING = 12;
|
||||
DATE = 13;
|
||||
DATETIME = 14;
|
||||
TIMESTAMP = 15;
|
||||
TIMESTAMP_SECOND = 15;
|
||||
TIMESTAMP_MILLISECOND = 16;
|
||||
TIMESTAMP_MICROSECOND = 17;
|
||||
TIMESTAMP_NANOSECOND = 18;
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use common_base::BitVec;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::TimestampType;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use snafu::prelude::*;
|
||||
@@ -56,7 +57,16 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
|
||||
ColumnDataType::String => ConcreteDataType::string_datatype(),
|
||||
ColumnDataType::Date => ConcreteDataType::date_datatype(),
|
||||
ColumnDataType::Datetime => ConcreteDataType::datetime_datatype(),
|
||||
ColumnDataType::Timestamp => ConcreteDataType::timestamp_millis_datatype(),
|
||||
ColumnDataType::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
ColumnDataType::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
}
|
||||
ColumnDataType::TimestampMicrosecond => {
|
||||
ConcreteDataType::timestamp_microsecond_datatype()
|
||||
}
|
||||
ColumnDataType::TimestampNanosecond => {
|
||||
ConcreteDataType::timestamp_nanosecond_datatype()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -81,7 +91,12 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
|
||||
ConcreteDataType::String(_) => ColumnDataType::String,
|
||||
ConcreteDataType::Date(_) => ColumnDataType::Date,
|
||||
ConcreteDataType::DateTime(_) => ColumnDataType::Datetime,
|
||||
ConcreteDataType::Timestamp(_) => ColumnDataType::Timestamp,
|
||||
ConcreteDataType::Timestamp(unit) => match unit {
|
||||
TimestampType::Second(_) => ColumnDataType::TimestampSecond,
|
||||
TimestampType::Millisecond(_) => ColumnDataType::TimestampMillisecond,
|
||||
TimestampType::Microsecond(_) => ColumnDataType::TimestampMicrosecond,
|
||||
TimestampType::Nanosecond(_) => ColumnDataType::TimestampNanosecond,
|
||||
},
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => {
|
||||
return error::IntoColumnDataTypeSnafu { from: datatype }.fail()
|
||||
}
|
||||
@@ -153,8 +168,20 @@ impl Values {
|
||||
datetime_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::Timestamp => Values {
|
||||
ts_millis_values: Vec::with_capacity(capacity),
|
||||
ColumnDataType::TimestampSecond => Values {
|
||||
ts_second_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::TimestampMillisecond => Values {
|
||||
ts_millisecond_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::TimestampMicrosecond => Values {
|
||||
ts_microsecond_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
ColumnDataType::TimestampNanosecond => Values {
|
||||
ts_nanosecond_values: Vec::with_capacity(capacity),
|
||||
..Default::default()
|
||||
},
|
||||
}
|
||||
@@ -187,9 +214,12 @@ impl Column {
|
||||
Value::Binary(val) => values.binary_values.push(val.to_vec()),
|
||||
Value::Date(val) => values.date_values.push(val.val()),
|
||||
Value::DateTime(val) => values.datetime_values.push(val.val()),
|
||||
Value::Timestamp(val) => values
|
||||
.ts_millis_values
|
||||
.push(val.convert_to(TimeUnit::Millisecond)),
|
||||
Value::Timestamp(val) => match val.unit() {
|
||||
TimeUnit::Second => values.ts_second_values.push(val.value()),
|
||||
TimeUnit::Millisecond => values.ts_millisecond_values.push(val.value()),
|
||||
TimeUnit::Microsecond => values.ts_microsecond_values.push(val.value()),
|
||||
TimeUnit::Nanosecond => values.ts_nanosecond_values.push(val.value()),
|
||||
},
|
||||
Value::List(_) => unreachable!(),
|
||||
});
|
||||
self.null_mask = null_mask.into_vec();
|
||||
@@ -200,7 +230,10 @@ impl Column {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::vectors::BooleanVector;
|
||||
use datatypes::vectors::{
|
||||
BooleanVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -258,8 +291,8 @@ mod tests {
|
||||
let values = values.datetime_values;
|
||||
assert_eq!(2, values.capacity());
|
||||
|
||||
let values = Values::with_capacity(ColumnDataType::Timestamp, 2);
|
||||
let values = values.ts_millis_values;
|
||||
let values = Values::with_capacity(ColumnDataType::TimestampMillisecond, 2);
|
||||
let values = values.ts_millisecond_values;
|
||||
assert_eq!(2, values.capacity());
|
||||
}
|
||||
|
||||
@@ -326,8 +359,8 @@ mod tests {
|
||||
ColumnDataTypeWrapper(ColumnDataType::Datetime).into()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ColumnDataTypeWrapper(ColumnDataType::Timestamp).into()
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond).into()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -394,8 +427,8 @@ mod tests {
|
||||
ConcreteDataType::datetime_datatype().try_into().unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ColumnDataTypeWrapper(ColumnDataType::Timestamp),
|
||||
ConcreteDataType::timestamp_millis_datatype()
|
||||
ColumnDataTypeWrapper(ColumnDataType::TimestampMillisecond),
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
.try_into()
|
||||
.unwrap()
|
||||
);
|
||||
@@ -412,7 +445,48 @@ mod tests {
|
||||
assert!(result.is_err());
|
||||
assert_eq!(
|
||||
result.unwrap_err().to_string(),
|
||||
"Failed to create column datatype from List(ListType { inner: Boolean(BooleanType) })"
|
||||
"Failed to create column datatype from List(ListType { item_type: Boolean(BooleanType) })"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_put_timestamp_values() {
|
||||
let mut column = Column {
|
||||
column_name: "test".to_string(),
|
||||
semantic_type: 0,
|
||||
values: Some(Values {
|
||||
..Default::default()
|
||||
}),
|
||||
null_mask: vec![],
|
||||
datatype: 0,
|
||||
};
|
||||
|
||||
let vector = Arc::new(TimestampNanosecondVector::from_vec(vec![1, 2, 3]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![1, 2, 3],
|
||||
column.values.as_ref().unwrap().ts_nanosecond_values
|
||||
);
|
||||
|
||||
let vector = Arc::new(TimestampMillisecondVector::from_vec(vec![4, 5, 6]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![4, 5, 6],
|
||||
column.values.as_ref().unwrap().ts_millisecond_values
|
||||
);
|
||||
|
||||
let vector = Arc::new(TimestampMicrosecondVector::from_vec(vec![7, 8, 9]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![7, 8, 9],
|
||||
column.values.as_ref().unwrap().ts_microsecond_values
|
||||
);
|
||||
|
||||
let vector = Arc::new(TimestampSecondVector::from_vec(vec![10, 11, 12]));
|
||||
column.push_vals(3, vector);
|
||||
assert_eq!(
|
||||
vec![10, 11, 12],
|
||||
column.values.as_ref().unwrap().ts_second_values
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -19,9 +19,7 @@ common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::any::Any;
|
||||
use common_error::ext::{BoxedError, ErrorExt};
|
||||
use common_error::prelude::{Snafu, StatusCode};
|
||||
use datafusion::error::DataFusionError;
|
||||
use datatypes::arrow;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::RawSchema;
|
||||
use snafu::{Backtrace, ErrorCompat};
|
||||
|
||||
@@ -51,14 +51,12 @@ pub enum Error {
|
||||
SystemCatalog { msg: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display(
|
||||
"System catalog table type mismatch, expected: binary, found: {:?} source: {}",
|
||||
"System catalog table type mismatch, expected: binary, found: {:?}",
|
||||
data_type,
|
||||
source
|
||||
))]
|
||||
SystemCatalogTypeMismatch {
|
||||
data_type: arrow::datatypes::DataType,
|
||||
#[snafu(backtrace)]
|
||||
source: datatypes::error::Error,
|
||||
data_type: ConcreteDataType,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid system catalog entry type: {:?}", entry_type))]
|
||||
@@ -222,10 +220,11 @@ impl ErrorExt for Error {
|
||||
| Error::ValueDeserialize { .. }
|
||||
| Error::Io { .. } => StatusCode::StorageUnavailable,
|
||||
|
||||
Error::RegisterTable { .. } => StatusCode::Internal,
|
||||
Error::RegisterTable { .. } | Error::SystemCatalogTypeMismatch { .. } => {
|
||||
StatusCode::Internal
|
||||
}
|
||||
|
||||
Error::ReadSystemCatalog { source, .. } => source.status_code(),
|
||||
Error::SystemCatalogTypeMismatch { source, .. } => source.status_code(),
|
||||
Error::InvalidCatalogValue { source, .. } => source.status_code(),
|
||||
|
||||
Error::TableExists { .. } => StatusCode::TableAlreadyExists,
|
||||
@@ -265,7 +264,6 @@ impl From<Error> for DataFusionError {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_error::mock::MockError;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use snafu::GenerateImplicitData;
|
||||
|
||||
use super::*;
|
||||
@@ -314,11 +312,8 @@ mod tests {
|
||||
assert_eq!(
|
||||
StatusCode::Internal,
|
||||
Error::SystemCatalogTypeMismatch {
|
||||
data_type: DataType::Boolean,
|
||||
source: datatypes::error::Error::UnsupportedArrowType {
|
||||
arrow_type: DataType::Boolean,
|
||||
backtrace: Backtrace::generate()
|
||||
}
|
||||
data_type: ConcreteDataType::binary_datatype(),
|
||||
backtrace: Backtrace::generate(),
|
||||
}
|
||||
.status_code()
|
||||
);
|
||||
|
||||
@@ -138,7 +138,7 @@ impl TableGlobalKey {
|
||||
|
||||
/// Table global info contains necessary info for a datanode to create table regions, including
|
||||
/// table id, table meta(schema...), region id allocation across datanodes.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct TableGlobalValue {
|
||||
/// Id of datanode that created the global table info kv. only for debugging.
|
||||
pub node_id: u64,
|
||||
|
||||
@@ -145,27 +145,34 @@ impl LocalCatalogManager {
|
||||
/// Convert `RecordBatch` to a vector of `Entry`.
|
||||
fn record_batch_to_entry(rb: RecordBatch) -> Result<Vec<Entry>> {
|
||||
ensure!(
|
||||
rb.df_recordbatch.columns().len() >= 6,
|
||||
rb.num_columns() >= 6,
|
||||
SystemCatalogSnafu {
|
||||
msg: format!("Length mismatch: {}", rb.df_recordbatch.columns().len())
|
||||
msg: format!("Length mismatch: {}", rb.num_columns())
|
||||
}
|
||||
);
|
||||
|
||||
let entry_type = UInt8Vector::try_from_arrow_array(&rb.df_recordbatch.columns()[0])
|
||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.df_recordbatch.columns()[ENTRY_TYPE_INDEX]
|
||||
.data_type()
|
||||
.clone(),
|
||||
let entry_type = rb
|
||||
.column(ENTRY_TYPE_INDEX)
|
||||
.as_any()
|
||||
.downcast_ref::<UInt8Vector>()
|
||||
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.column(ENTRY_TYPE_INDEX).data_type(),
|
||||
})?;
|
||||
|
||||
let key = BinaryVector::try_from_arrow_array(&rb.df_recordbatch.columns()[1])
|
||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.df_recordbatch.columns()[KEY_INDEX].data_type().clone(),
|
||||
let key = rb
|
||||
.column(KEY_INDEX)
|
||||
.as_any()
|
||||
.downcast_ref::<BinaryVector>()
|
||||
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.column(KEY_INDEX).data_type(),
|
||||
})?;
|
||||
|
||||
let value = BinaryVector::try_from_arrow_array(&rb.df_recordbatch.columns()[3])
|
||||
.with_context(|_| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.df_recordbatch.columns()[VALUE_INDEX].data_type().clone(),
|
||||
let value = rb
|
||||
.column(VALUE_INDEX)
|
||||
.as_any()
|
||||
.downcast_ref::<BinaryVector>()
|
||||
.with_context(|| SystemCatalogTypeMismatchSnafu {
|
||||
data_type: rb.column(VALUE_INDEX).data_type(),
|
||||
})?;
|
||||
|
||||
let mut res = Vec::with_capacity(rb.num_rows());
|
||||
|
||||
@@ -21,14 +21,13 @@ use common_catalog::consts::{
|
||||
SYSTEM_CATALOG_TABLE_ID, SYSTEM_CATALOG_TABLE_NAME,
|
||||
};
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::{PhysicalPlanRef, RuntimeEnv};
|
||||
use common_query::physical_plan::{PhysicalPlanRef, SessionContext};
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use common_telemetry::debug;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use common_time::util;
|
||||
use datatypes::prelude::{ConcreteDataType, ScalarVector};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaBuilder, SchemaRef};
|
||||
use datatypes::vectors::{BinaryVector, TimestampVector, UInt8Vector};
|
||||
use datatypes::vectors::{BinaryVector, TimestampMillisecondVector, UInt8Vector};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::engine::{EngineContext, TableEngineRef};
|
||||
@@ -127,13 +126,14 @@ impl SystemCatalogTable {
|
||||
/// Create a stream of all entries inside system catalog table
|
||||
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
|
||||
let full_projection = None;
|
||||
let ctx = SessionContext::new();
|
||||
let scan = self
|
||||
.table
|
||||
.scan(&full_projection, &[], None)
|
||||
.await
|
||||
.context(error::SystemCatalogTableScanSnafu)?;
|
||||
let stream = scan
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.execute(0, ctx.task_ctx())
|
||||
.context(error::SystemCatalogTableScanExecSnafu)?;
|
||||
Ok(stream)
|
||||
}
|
||||
@@ -161,7 +161,7 @@ fn build_system_catalog_schema() -> Schema {
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"timestamp".to_string(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
@@ -172,12 +172,12 @@ fn build_system_catalog_schema() -> Schema {
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"gmt_created".to_string(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
"gmt_modified".to_string(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
),
|
||||
];
|
||||
@@ -222,7 +222,7 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
|
||||
// Timestamp in key part is intentionally left to 0
|
||||
columns_values.insert(
|
||||
"timestamp".to_string(),
|
||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(0)])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[0])) as _,
|
||||
);
|
||||
|
||||
columns_values.insert(
|
||||
@@ -230,18 +230,15 @@ pub fn build_insert_request(entry_type: EntryType, key: &[u8], value: &[u8]) ->
|
||||
Arc::new(BinaryVector::from_slice(&[value])) as _,
|
||||
);
|
||||
|
||||
let now = util::current_time_millis();
|
||||
columns_values.insert(
|
||||
"gmt_created".to_string(),
|
||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(
|
||||
util::current_time_millis(),
|
||||
)])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
|
||||
);
|
||||
|
||||
columns_values.insert(
|
||||
"gmt_modified".to_string(),
|
||||
Arc::new(TimestampVector::from_slice(&[Timestamp::from_millis(
|
||||
util::current_time_millis(),
|
||||
)])) as _,
|
||||
Arc::new(TimestampMillisecondVector::from_slice(&[now])) as _,
|
||||
);
|
||||
|
||||
InsertRequest {
|
||||
|
||||
@@ -26,9 +26,9 @@ use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_recordbatch::error::Result as RecordBatchResult;
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream};
|
||||
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
|
||||
use datatypes::prelude::{ConcreteDataType, DataType};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::value::ValueRef;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use futures::Stream;
|
||||
use snafu::ResultExt;
|
||||
@@ -149,26 +149,33 @@ fn tables_to_record_batch(
|
||||
engine: &str,
|
||||
) -> Vec<VectorRef> {
|
||||
let mut catalog_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut schema_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut table_name_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
let mut engine_vec =
|
||||
VectorBuilder::with_capacity(ConcreteDataType::string_datatype(), table_names.len());
|
||||
ConcreteDataType::string_datatype().create_mutable_vector(table_names.len());
|
||||
|
||||
for table_name in table_names {
|
||||
catalog_vec.push(&Value::String(catalog_name.into()));
|
||||
schema_vec.push(&Value::String(schema_name.into()));
|
||||
table_name_vec.push(&Value::String(table_name.into()));
|
||||
engine_vec.push(&Value::String(engine.into()));
|
||||
// Safety: All these vectors are string type.
|
||||
catalog_vec
|
||||
.push_value_ref(ValueRef::String(catalog_name))
|
||||
.unwrap();
|
||||
schema_vec
|
||||
.push_value_ref(ValueRef::String(schema_name))
|
||||
.unwrap();
|
||||
table_name_vec
|
||||
.push_value_ref(ValueRef::String(&table_name))
|
||||
.unwrap();
|
||||
engine_vec.push_value_ref(ValueRef::String(engine)).unwrap();
|
||||
}
|
||||
|
||||
vec![
|
||||
catalog_vec.finish(),
|
||||
schema_vec.finish(),
|
||||
table_name_vec.finish(),
|
||||
engine_vec.finish(),
|
||||
catalog_vec.to_vector(),
|
||||
schema_vec.to_vector(),
|
||||
table_name_vec.to_vector(),
|
||||
engine_vec.to_vector(),
|
||||
]
|
||||
}
|
||||
|
||||
@@ -340,9 +347,7 @@ fn build_schema_for_tables() -> Schema {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_query::physical_plan::RuntimeEnv;
|
||||
use datatypes::arrow::array::Utf8Array;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use common_query::physical_plan::SessionContext;
|
||||
use futures_util::StreamExt;
|
||||
use table::table::numbers::NumbersTable;
|
||||
|
||||
@@ -366,56 +371,47 @@ mod tests {
|
||||
|
||||
let tables = Tables::new(catalog_list, "test_engine".to_string());
|
||||
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
|
||||
let mut tables_stream = tables_stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.unwrap();
|
||||
let session_ctx = SessionContext::new();
|
||||
let mut tables_stream = tables_stream.execute(0, session_ctx.task_ctx()).unwrap();
|
||||
|
||||
if let Some(t) = tables_stream.next().await {
|
||||
let batch = t.unwrap().df_recordbatch;
|
||||
let batch = t.unwrap();
|
||||
assert_eq!(1, batch.num_rows());
|
||||
assert_eq!(4, batch.num_columns());
|
||||
assert_eq!(&DataType::Utf8, batch.column(0).data_type());
|
||||
assert_eq!(&DataType::Utf8, batch.column(1).data_type());
|
||||
assert_eq!(&DataType::Utf8, batch.column(2).data_type());
|
||||
assert_eq!(&DataType::Utf8, batch.column(3).data_type());
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(0).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(1).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(2).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::string_datatype(),
|
||||
batch.column(3).data_type()
|
||||
);
|
||||
assert_eq!(
|
||||
"greptime",
|
||||
batch
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(0).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"public",
|
||||
batch
|
||||
.column(1)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(1).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"test_table",
|
||||
batch
|
||||
.column(2)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(2).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
"test_engine",
|
||||
batch
|
||||
.column(3)
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
.value(0)
|
||||
batch.column(3).get_ref(0).as_string().unwrap().unwrap()
|
||||
);
|
||||
} else {
|
||||
panic!("Record batch should not be empty!")
|
||||
|
||||
@@ -15,9 +15,7 @@ common-grpc-expr = { path = "../common/grpc-expr" }
|
||||
common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../datatypes" }
|
||||
enum_dispatch = "0.3"
|
||||
parking_lot = "0.12"
|
||||
|
||||
@@ -41,7 +41,7 @@ async fn run() {
|
||||
column_defs: vec![
|
||||
ColumnDef {
|
||||
name: "timestamp".to_string(),
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
},
|
||||
|
||||
@@ -318,12 +318,11 @@ mod tests {
|
||||
|
||||
fn create_test_column(vector: VectorRef) -> Column {
|
||||
let wrapper: ColumnDataTypeWrapper = vector.data_type().try_into().unwrap();
|
||||
let array = vector.to_arrow_array();
|
||||
Column {
|
||||
column_name: "test".to_string(),
|
||||
semantic_type: 1,
|
||||
values: Some(values(&[array.clone()]).unwrap()),
|
||||
null_mask: null_mask(&vec![array], vector.len()),
|
||||
values: Some(values(&[vector.clone()]).unwrap()),
|
||||
null_mask: null_mask(&[vector.clone()], vector.len()),
|
||||
datatype: wrapper.datatype() as i32,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use anymap::AnyMap;
|
||||
use clap::Parser;
|
||||
use frontend::frontend::{Frontend, FrontendOptions};
|
||||
@@ -138,14 +136,14 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
if let Some(addr) = cmd.mysql_addr {
|
||||
opts.mysql_options = Some(MysqlOptions {
|
||||
addr,
|
||||
tls: Arc::new(tls_option.clone()),
|
||||
tls: tls_option.clone(),
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
if let Some(addr) = cmd.postgres_addr {
|
||||
opts.postgres_options = Some(PostgresOptions {
|
||||
addr,
|
||||
tls: Arc::new(tls_option),
|
||||
tls: tls_option,
|
||||
..Default::default()
|
||||
});
|
||||
}
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use anymap::AnyMap;
|
||||
use clap::Parser;
|
||||
use common_telemetry::info;
|
||||
@@ -262,12 +260,12 @@ impl TryFrom<StartCommand> for FrontendOptions {
|
||||
let tls_option = TlsOption::new(cmd.tls_mode, cmd.tls_cert_path, cmd.tls_key_path);
|
||||
|
||||
if let Some(mut mysql_options) = opts.mysql_options {
|
||||
mysql_options.tls = Arc::new(tls_option.clone());
|
||||
mysql_options.tls = tls_option.clone();
|
||||
opts.mysql_options = Some(mysql_options);
|
||||
}
|
||||
|
||||
if let Some(mut postgres_options) = opts.postgres_options {
|
||||
postgres_options.tls = Arc::new(tls_option);
|
||||
postgres_options.tls = tls_option;
|
||||
opts.postgres_options = Some(postgres_options);
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ common-error = { path = "../error" }
|
||||
common-function-macro = { path = "../function-macro" }
|
||||
common-query = { path = "../query" }
|
||||
common-time = { path = "../time" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datafusion-common = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
libc = "0.2"
|
||||
num = "0.4"
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_error::prelude::*;
|
||||
pub use common_query::error::{Error, Result};
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum InnerError {
|
||||
#[snafu(display("Fail to get scalar vector, {}", source))]
|
||||
GetScalarVector {
|
||||
source: DataTypeError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for InnerError {
|
||||
fn backtrace_opt(&self) -> Option<&Backtrace> {
|
||||
ErrorCompat::backtrace(self)
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl From<InnerError> for Error {
|
||||
fn from(err: InnerError) -> Self {
|
||||
Self::new(err)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use snafu::GenerateImplicitData;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn raise_datatype_error() -> std::result::Result<(), DataTypeError> {
|
||||
Err(DataTypeError::Conversion {
|
||||
from: "test".to_string(),
|
||||
backtrace: Backtrace::generate(),
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_scalar_vector_error() {
|
||||
let err: Error = raise_datatype_error()
|
||||
.context(GetScalarVectorSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
}
|
||||
}
|
||||
@@ -12,5 +12,4 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod error;
|
||||
pub mod scalars;
|
||||
|
||||
@@ -23,6 +23,5 @@ pub(crate) mod test;
|
||||
mod timestamp;
|
||||
pub mod udf;
|
||||
|
||||
pub use aggregate::MedianAccumulatorCreator;
|
||||
pub use function::{Function, FunctionRef};
|
||||
pub use function_registry::{FunctionRegistry, FUNCTION_REGISTRY};
|
||||
|
||||
@@ -16,7 +16,6 @@ mod argmax;
|
||||
mod argmin;
|
||||
mod diff;
|
||||
mod mean;
|
||||
mod median;
|
||||
mod percentile;
|
||||
mod polyval;
|
||||
mod scipy_stats_norm_cdf;
|
||||
@@ -29,7 +28,6 @@ pub use argmin::ArgminAccumulatorCreator;
|
||||
use common_query::logical_plan::AggregateFunctionCreatorRef;
|
||||
pub use diff::DiffAccumulatorCreator;
|
||||
pub use mean::MeanAccumulatorCreator;
|
||||
pub use median::MedianAccumulatorCreator;
|
||||
pub use percentile::PercentileAccumulatorCreator;
|
||||
pub use polyval::PolyvalAccumulatorCreator;
|
||||
pub use scipy_stats_norm_cdf::ScipyStatsNormCdfAccumulatorCreator;
|
||||
@@ -88,7 +86,6 @@ impl AggregateFunctions {
|
||||
};
|
||||
}
|
||||
|
||||
register_aggr_func!("median", 1, MedianAccumulatorCreator);
|
||||
register_aggr_func!("diff", 1, DiffAccumulatorCreator);
|
||||
register_aggr_func!("mean", 1, MeanAccumulatorCreator);
|
||||
register_aggr_func!("polyval", 2, PolyvalAccumulatorCreator);
|
||||
@@ -20,24 +20,22 @@ use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Resul
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::ConstantVector;
|
||||
use datatypes::types::{LogicalPrimitiveType, WrapperType};
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::ensure;
|
||||
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.argmax.html
|
||||
// return the index of the max value
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Argmax<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
{
|
||||
pub struct Argmax<T> {
|
||||
max: Option<T>,
|
||||
n: u64,
|
||||
}
|
||||
|
||||
impl<T> Argmax<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
T: PartialOrd + Copy,
|
||||
{
|
||||
fn update(&mut self, value: T, index: u64) {
|
||||
if let Some(Ordering::Less) = self.max.partial_cmp(&Some(value)) {
|
||||
@@ -49,8 +47,7 @@ where
|
||||
|
||||
impl<T> Accumulator for Argmax<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType + PartialOrd,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
match self.max {
|
||||
@@ -66,10 +63,10 @@ where
|
||||
|
||||
let column = &values[0];
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
for (i, v) in column.iter_data().enumerate() {
|
||||
if let Some(value) = v {
|
||||
@@ -93,8 +90,8 @@ where
|
||||
|
||||
let max = &states[0];
|
||||
let index = &states[1];
|
||||
let max: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(max) };
|
||||
let index: &<u64 as Scalar>::VectorType = unsafe { VectorHelper::static_cast(index) };
|
||||
let max: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(max) };
|
||||
let index: &<u64 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
|
||||
index
|
||||
.iter_data()
|
||||
.flatten()
|
||||
@@ -122,7 +119,7 @@ impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Argmax::<$S>::default()))
|
||||
Ok(Box::new(Argmax::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -154,7 +151,7 @@ impl AggregateFunctionCreator for ArgmaxAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -166,21 +163,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(argmax.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(0_u64), argmax.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(argmax.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, argmax.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(3),
|
||||
@@ -190,7 +185,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(4),
|
||||
@@ -201,7 +196,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut argmax = Argmax::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(argmax.update_batch(&v).is_ok());
|
||||
|
||||
@@ -20,23 +20,20 @@ use common_query::error::{BadAccumulatorImplSnafu, CreateAccumulatorSnafu, Resul
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::ConstantVector;
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::ensure;
|
||||
|
||||
// // https://numpy.org/doc/stable/reference/generated/numpy.argmin.html
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Argmin<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
{
|
||||
pub struct Argmin<T> {
|
||||
min: Option<T>,
|
||||
n: u32,
|
||||
}
|
||||
|
||||
impl<T> Argmin<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
T: Copy + PartialOrd,
|
||||
{
|
||||
fn update(&mut self, value: T, index: u32) {
|
||||
match self.min {
|
||||
@@ -56,8 +53,7 @@ where
|
||||
|
||||
impl<T> Accumulator for Argmin<T>
|
||||
where
|
||||
T: Primitive + PartialOrd,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType + PartialOrd,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
match self.min {
|
||||
@@ -75,10 +71,10 @@ where
|
||||
|
||||
let column = &values[0];
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
for (i, v) in column.iter_data().enumerate() {
|
||||
if let Some(value) = v {
|
||||
@@ -102,8 +98,8 @@ where
|
||||
|
||||
let min = &states[0];
|
||||
let index = &states[1];
|
||||
let min: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(min) };
|
||||
let index: &<u32 as Scalar>::VectorType = unsafe { VectorHelper::static_cast(index) };
|
||||
let min: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(min) };
|
||||
let index: &<u32 as Scalar>::VectorType = unsafe { Helper::static_cast(index) };
|
||||
index
|
||||
.iter_data()
|
||||
.flatten()
|
||||
@@ -131,7 +127,7 @@ impl AggregateFunctionCreator for ArgminAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Argmin::<$S>::default()))
|
||||
Ok(Box::new(Argmin::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -163,7 +159,7 @@ impl AggregateFunctionCreator for ArgminAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -175,21 +171,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(argmin.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(0_u32), argmin.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(argmin.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, argmin.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(3),
|
||||
@@ -199,7 +193,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(4),
|
||||
@@ -210,7 +204,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut argmin = Argmin::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(argmin.update_batch(&v).is_ok());
|
||||
|
||||
@@ -22,40 +22,32 @@ use common_query::error::{
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::PrimitiveType;
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
// https://numpy.org/doc/stable/reference/generated/numpy.diff.html
|
||||
// I is the input type, O is the output type.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Diff<T, SubT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<SubT>,
|
||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
||||
{
|
||||
values: Vec<T>,
|
||||
_phantom: PhantomData<SubT>,
|
||||
pub struct Diff<I, O> {
|
||||
values: Vec<I>,
|
||||
_phantom: PhantomData<O>,
|
||||
}
|
||||
|
||||
impl<T, SubT> Diff<T, SubT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<SubT>,
|
||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
impl<I, O> Diff<I, O> {
|
||||
fn push(&mut self, value: I) {
|
||||
self.values.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, SubT> Accumulator for Diff<T, SubT>
|
||||
impl<I, O> Accumulator for Diff<I, O>
|
||||
where
|
||||
T: Primitive + AsPrimitive<SubT>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
SubT: Primitive + std::ops::Sub<Output = SubT>,
|
||||
for<'a> SubT: Scalar<RefType<'a> = SubT>,
|
||||
I: WrapperType,
|
||||
O: WrapperType,
|
||||
I::Native: AsPrimitive<O::Native>,
|
||||
O::Native: std::ops::Sub<Output = O::Native>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -65,7 +57,7 @@ where
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
I::LogicalType::build_data_type(),
|
||||
))])
|
||||
}
|
||||
|
||||
@@ -78,12 +70,12 @@ where
|
||||
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
let column: &<I as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
@@ -109,8 +101,9 @@ where
|
||||
),
|
||||
})?;
|
||||
for state in states.values_iter() {
|
||||
let state = state.context(FromScalarValueSnafu)?;
|
||||
self.update_batch(&[state])?
|
||||
if let Some(state) = state.context(FromScalarValueSnafu)? {
|
||||
self.update_batch(&[state])?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -122,11 +115,14 @@ where
|
||||
let diff = self
|
||||
.values
|
||||
.windows(2)
|
||||
.map(|x| (x[1].as_() - x[0].as_()).into())
|
||||
.map(|x| {
|
||||
let native = x[1].into_native().as_() - x[0].into_native().as_();
|
||||
O::from_native(native).into()
|
||||
})
|
||||
.collect::<Vec<Value>>();
|
||||
let diff = Value::List(ListValue::new(
|
||||
Some(Box::new(diff)),
|
||||
SubT::default().into().data_type(),
|
||||
O::LogicalType::build_data_type(),
|
||||
));
|
||||
Ok(diff)
|
||||
}
|
||||
@@ -143,7 +139,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Diff::<$S,<$S as Primitive>::LargestType>::default()))
|
||||
Ok(Box::new(Diff::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -163,7 +159,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_types[0].logical_type_id(),
|
||||
|$S| {
|
||||
Ok(ConcreteDataType::list_datatype(PrimitiveType::<<$S as Primitive>::LargestType>::default().into()))
|
||||
Ok(ConcreteDataType::list_datatype($S::default().into()))
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
@@ -177,7 +173,7 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_types[0].logical_type_id(),
|
||||
|$S| {
|
||||
Ok(vec![ConcreteDataType::list_datatype(PrimitiveType::<$S>::default().into())])
|
||||
Ok(vec![ConcreteDataType::list_datatype($S::default().into())])
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
@@ -188,9 +184,10 @@ impl AggregateFunctionCreator for DiffAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
@@ -201,21 +198,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(diff.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(diff.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, diff.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
@@ -232,7 +227,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
@@ -251,7 +246,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut diff = Diff::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
4,
|
||||
))];
|
||||
let values = vec![Value::from(0_i64), Value::from(0_i64), Value::from(0_i64)];
|
||||
|
||||
@@ -22,16 +22,14 @@ use common_query::error::{
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, UInt64Vector};
|
||||
use datatypes::types::WrapperType;
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, UInt64Vector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Mean<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64>,
|
||||
{
|
||||
pub struct Mean<T> {
|
||||
sum: f64,
|
||||
n: u64,
|
||||
_phantom: PhantomData<T>,
|
||||
@@ -39,11 +37,12 @@ where
|
||||
|
||||
impl<T> Mean<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
#[inline(always)]
|
||||
fn push(&mut self, value: T) {
|
||||
self.sum += value.as_();
|
||||
self.sum += value.into_native().as_();
|
||||
self.n += 1;
|
||||
}
|
||||
|
||||
@@ -56,8 +55,8 @@ where
|
||||
|
||||
impl<T> Accumulator for Mean<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
Ok(vec![self.sum.into(), self.n.into()])
|
||||
@@ -73,10 +72,10 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
@@ -150,7 +149,7 @@ impl AggregateFunctionCreator for MeanAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Mean::<$S>::default()))
|
||||
Ok(Box::new(Mean::<<$S as LogicalPrimitiveType>::Native>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -182,7 +181,7 @@ impl AggregateFunctionCreator for MeanAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -194,21 +193,19 @@ mod test {
|
||||
|
||||
// test update one not-null value
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Some(42)]))];
|
||||
assert!(mean.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(42.0_f64), mean.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![Option::<i32>::None]))];
|
||||
assert!(mean.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, mean.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
@@ -218,7 +215,7 @@ mod test {
|
||||
|
||||
// test update null-value batch
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
let v: Vec<VectorRef> = vec![Arc::new(Int32Vector::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
@@ -230,7 +227,7 @@ mod test {
|
||||
// test update with constant vector
|
||||
let mut mean = Mean::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(mean.update_batch(&v).is_ok());
|
||||
|
||||
@@ -1,289 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::BinaryHeap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_function_macro::{as_aggr_func_creator, AggrFuncTypeStore};
|
||||
use common_query::error::{
|
||||
CreateAccumulatorSnafu, DowncastVectorSnafu, FromScalarValueSnafu, Result,
|
||||
};
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::OrdPrimitive;
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num::NumCast;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
|
||||
// This median calculation algorithm's details can be found at
|
||||
// https://leetcode.cn/problems/find-median-from-data-stream/
|
||||
//
|
||||
// Basically, it uses two heaps, a maximum heap and a minimum. The maximum heap stores numbers that
|
||||
// are not greater than the median, and the minimum heap stores the greater. In a streaming of
|
||||
// numbers, when a number is arrived, we adjust the heaps' tops, so that either one top is the
|
||||
// median or both tops can be averaged to get the median.
|
||||
//
|
||||
// The time complexity to update the median is O(logn), O(1) to get the median; and the space
|
||||
// complexity is O(n). (Ignore the costs for heap expansion.)
|
||||
//
|
||||
// From the point of algorithm, [quick select](https://en.wikipedia.org/wiki/Quickselect) might be
|
||||
// better. But to use quick select here, we need a mutable self in the final calculation(`evaluate`)
|
||||
// to swap stored numbers in the states vector. Though we can make our `evaluate` received
|
||||
// `&mut self`, DataFusion calls our accumulator with `&self` (see `DfAccumulatorAdaptor`). That
|
||||
// means we have to introduce some kinds of interior mutability, and the overhead is not neglectable.
|
||||
//
|
||||
// TODO(LFC): Use quick select to get median when we can modify DataFusion's code, and benchmark with two-heap algorithm.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Median<T>
|
||||
where
|
||||
T: Primitive,
|
||||
{
|
||||
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
|
||||
not_greater: BinaryHeap<OrdPrimitive<T>>,
|
||||
}
|
||||
|
||||
impl<T> Median<T>
|
||||
where
|
||||
T: Primitive,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
let value = OrdPrimitive::<T>(value);
|
||||
|
||||
if self.not_greater.is_empty() {
|
||||
self.not_greater.push(value);
|
||||
return;
|
||||
}
|
||||
// The `unwrap`s below are safe because there are `push`s before them.
|
||||
if value <= *self.not_greater.peek().unwrap() {
|
||||
self.not_greater.push(value);
|
||||
if self.not_greater.len() > self.greater.len() + 1 {
|
||||
self.greater.push(Reverse(self.not_greater.pop().unwrap()));
|
||||
}
|
||||
} else {
|
||||
self.greater.push(Reverse(value));
|
||||
if self.greater.len() > self.not_greater.len() {
|
||||
self.not_greater.push(self.greater.pop().unwrap().0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// UDAFs are built using the trait `Accumulator`, that offers DataFusion the necessary functions
|
||||
// to use them.
|
||||
impl<T> Accumulator for Median<T>
|
||||
where
|
||||
T: Primitive,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
// This function serializes our state to `ScalarValue`, which DataFusion uses to pass this
|
||||
// state between execution stages. Note that this can be arbitrary data.
|
||||
//
|
||||
// The `ScalarValue`s returned here will be passed in as argument `states: &[VectorRef]` to
|
||||
// `merge_batch` function.
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
.greater
|
||||
.iter()
|
||||
.map(|x| &x.0)
|
||||
.chain(self.not_greater.iter())
|
||||
.map(|&n| n.into())
|
||||
.collect::<Vec<Value>>();
|
||||
Ok(vec![Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
))])
|
||||
}
|
||||
|
||||
// DataFusion calls this function to update the accumulator's state for a batch of inputs rows.
|
||||
// It is expected this function to update the accumulator's state.
|
||||
fn update_batch(&mut self, values: &[VectorRef]) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
ensure!(values.len() == 1, InvalidInputStateSnafu);
|
||||
|
||||
// This is a unary accumulator, so only one column is provided.
|
||||
let column = &values[0];
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// DataFusion executes accumulators in partitions. In some execution stage, DataFusion will
|
||||
// merge states from other accumulators (returned by `state()` method).
|
||||
fn merge_batch(&mut self, states: &[VectorRef]) -> Result<()> {
|
||||
if states.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// The states here are returned by the `state` method. Since we only returned a vector
|
||||
// with one value in that method, `states[0]` is fine.
|
||||
let states = &states[0];
|
||||
let states = states
|
||||
.as_any()
|
||||
.downcast_ref::<ListVector>()
|
||||
.with_context(|| DowncastVectorSnafu {
|
||||
err_msg: format!(
|
||||
"expect ListVector, got vector type {}",
|
||||
states.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
for state in states.values_iter() {
|
||||
let state = state.context(FromScalarValueSnafu)?;
|
||||
// merging state is simply accumulate stored numbers from others', so just call update
|
||||
self.update_batch(&[state])?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// DataFusion expects this function to return the final value of this aggregator.
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
if self.not_greater.is_empty() {
|
||||
assert!(
|
||||
self.greater.is_empty(),
|
||||
"not expected in two-heap median algorithm, there must be a bug when implementing it"
|
||||
);
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
|
||||
// unwrap is safe because we checked not_greater heap's len above
|
||||
let not_greater = *self.not_greater.peek().unwrap();
|
||||
let median = if self.not_greater.len() > self.greater.len() {
|
||||
not_greater.into()
|
||||
} else {
|
||||
// unwrap is safe because greater heap len >= not_greater heap len, which is > 0
|
||||
let greater = self.greater.peek().unwrap();
|
||||
|
||||
// the following three NumCast's `unwrap`s are safe because T is primitive
|
||||
let not_greater_v: f64 = NumCast::from(not_greater.as_primitive()).unwrap();
|
||||
let greater_v: f64 = NumCast::from(greater.0.as_primitive()).unwrap();
|
||||
let median: T = NumCast::from((not_greater_v + greater_v) / 2.0).unwrap();
|
||||
median.into()
|
||||
};
|
||||
Ok(median)
|
||||
}
|
||||
}
|
||||
|
||||
#[as_aggr_func_creator]
|
||||
#[derive(Debug, Default, AggrFuncTypeStore)]
|
||||
pub struct MedianAccumulatorCreator {}
|
||||
|
||||
impl AggregateFunctionCreator for MedianAccumulatorCreator {
|
||||
fn creator(&self) -> AccumulatorCreatorFunction {
|
||||
let creator: AccumulatorCreatorFunction = Arc::new(move |types: &[ConcreteDataType]| {
|
||||
let input_type = &types[0];
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Median::<$S>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
"\"MEDIAN\" aggregate function not support data type {:?}",
|
||||
input_type.logical_type_id(),
|
||||
);
|
||||
CreateAccumulatorSnafu { err_msg }.fail()?
|
||||
}
|
||||
)
|
||||
});
|
||||
creator
|
||||
}
|
||||
|
||||
fn output_type(&self) -> Result<ConcreteDataType> {
|
||||
let input_types = self.input_types()?;
|
||||
ensure!(input_types.len() == 1, InvalidInputStateSnafu);
|
||||
// unwrap is safe because we have checked input_types len must equals 1
|
||||
Ok(input_types.into_iter().next().unwrap())
|
||||
}
|
||||
|
||||
fn state_types(&self) -> Result<Vec<ConcreteDataType>> {
|
||||
Ok(vec![ConcreteDataType::list_datatype(self.output_type()?)])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
fn test_update_batch() {
|
||||
// test update empty batch, expect not updating anything
|
||||
let mut median = Median::<i32>::default();
|
||||
assert!(median.update_batch(&[]).is_ok());
|
||||
assert!(median.not_greater.is_empty());
|
||||
assert!(median.greater.is_empty());
|
||||
assert_eq!(Value::Null, median.evaluate().unwrap());
|
||||
|
||||
// test update one not-null value
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(42), median.evaluate().unwrap());
|
||||
|
||||
// test update one null value
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Option::<i32>::None,
|
||||
]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, median.evaluate().unwrap());
|
||||
|
||||
// test update no null-value batch
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(1), median.evaluate().unwrap());
|
||||
|
||||
// test update null-value batch
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
]))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(3), median.evaluate().unwrap());
|
||||
|
||||
// test update with constant vector
|
||||
let mut median = Median::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
10,
|
||||
))];
|
||||
assert!(median.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int32(4), median.evaluate().unwrap());
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,7 @@ use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::OrdPrimitive;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num::NumCast;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -44,15 +44,15 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
// This optional method parameter specifies the method to use when the desired quantile lies between two data points i < j.
|
||||
// If g is the fractional part of the index surrounded by i and alpha and beta are correction constants modifying i and j.
|
||||
// i+g = (q-alpha)/(n-alpha-beta+1)
|
||||
// Below, ‘q’ is the quantile value, ‘n’ is the sample size and alpha and beta are constants. The following formula gives an interpolation “i + g” of where the quantile would be in the sorted sample.
|
||||
// With ‘i’ being the floor and ‘g’ the fractional part of the result.
|
||||
// Below, 'q' is the quantile value, 'n' is the sample size and alpha and beta are constants. The following formula gives an interpolation "i + g" of where the quantile would be in the sorted sample.
|
||||
// With 'i' being the floor and 'g' the fractional part of the result.
|
||||
// the default method is linear where
|
||||
// alpha = 1
|
||||
// beta = 1
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Percentile<T>
|
||||
where
|
||||
T: Primitive,
|
||||
T: WrapperType,
|
||||
{
|
||||
greater: BinaryHeap<Reverse<OrdPrimitive<T>>>,
|
||||
not_greater: BinaryHeap<OrdPrimitive<T>>,
|
||||
@@ -62,7 +62,7 @@ where
|
||||
|
||||
impl<T> Percentile<T>
|
||||
where
|
||||
T: Primitive,
|
||||
T: WrapperType,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
let value = OrdPrimitive::<T>(value);
|
||||
@@ -93,8 +93,7 @@ where
|
||||
|
||||
impl<T> Accumulator for Percentile<T>
|
||||
where
|
||||
T: Primitive,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -107,7 +106,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.p.into(),
|
||||
])
|
||||
@@ -129,14 +128,14 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"POLYVAL\" function's second argument to be float64",
|
||||
})?;
|
||||
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
||||
@@ -209,10 +208,11 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -259,7 +259,7 @@ impl AggregateFunctionCreator for PercentileAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Percentile::<$S>::default()))
|
||||
Ok(Box::new(Percentile::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -292,7 +292,7 @@ impl AggregateFunctionCreator for PercentileAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -307,8 +307,8 @@ mod test {
|
||||
// test update one not-null value
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Some(42)])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![Some(100.0_f64)])),
|
||||
Arc::new(Int32Vector::from(vec![Some(42)])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
|
||||
];
|
||||
assert!(percentile.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(42.0_f64), percentile.evaluate().unwrap());
|
||||
@@ -316,8 +316,8 @@ mod test {
|
||||
// test update one null value
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Option::<i32>::None])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![Some(100.0_f64)])),
|
||||
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64)])),
|
||||
];
|
||||
assert!(percentile.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, percentile.evaluate().unwrap());
|
||||
@@ -325,12 +325,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
@@ -342,13 +338,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
@@ -362,13 +353,10 @@ mod test {
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
2,
|
||||
)),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
])),
|
||||
Arc::new(Float64Vector::from(vec![Some(100.0_f64), Some(100.0_f64)])),
|
||||
];
|
||||
assert!(percentile.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::from(4_f64), percentile.evaluate().unwrap());
|
||||
@@ -376,12 +364,8 @@ mod test {
|
||||
// test left border
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(0.0_f64),
|
||||
Some(0.0_f64),
|
||||
Some(0.0_f64),
|
||||
@@ -393,12 +377,8 @@ mod test {
|
||||
// test medium
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(50.0_f64),
|
||||
Some(50.0_f64),
|
||||
Some(50.0_f64),
|
||||
@@ -410,12 +390,8 @@ mod test {
|
||||
// test right border
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
Some(100.0_f64),
|
||||
@@ -431,12 +407,8 @@ mod test {
|
||||
// >> 6.400000000000
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(10i32),
|
||||
Some(7),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(40.0_f64),
|
||||
Some(40.0_f64),
|
||||
Some(40.0_f64),
|
||||
@@ -451,12 +423,8 @@ mod test {
|
||||
// >> 9.7000000000000011
|
||||
let mut percentile = Percentile::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(10i32),
|
||||
Some(7),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(10i32), Some(7), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(95.0_f64),
|
||||
Some(95.0_f64),
|
||||
Some(95.0_f64),
|
||||
|
||||
@@ -23,9 +23,9 @@ use common_query::error::{
|
||||
use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::PrimitiveType;
|
||||
use datatypes::types::{LogicalPrimitiveType, WrapperType};
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{ConstantVector, Int64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Helper, Int64Vector, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -34,8 +34,10 @@ use snafu::{ensure, OptionExt, ResultExt};
|
||||
#[derive(Debug, Default)]
|
||||
pub struct Polyval<T, PolyT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<PolyT>,
|
||||
PolyT: Primitive + std::ops::Mul<Output = PolyT>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
|
||||
{
|
||||
values: Vec<T>,
|
||||
// DataFusion casts constant in into i64 type.
|
||||
@@ -45,8 +47,10 @@ where
|
||||
|
||||
impl<T, PolyT> Polyval<T, PolyT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<PolyT>,
|
||||
PolyT: Primitive + std::ops::Mul<Output = PolyT>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native>,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
@@ -55,11 +59,11 @@ where
|
||||
|
||||
impl<T, PolyT> Accumulator for Polyval<T, PolyT>
|
||||
where
|
||||
T: Primitive + AsPrimitive<PolyT>,
|
||||
PolyT: Primitive + std::ops::Mul<Output = PolyT> + std::iter::Sum<PolyT>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
for<'a> PolyT: Scalar<RefType<'a> = PolyT>,
|
||||
i64: AsPrimitive<PolyT>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<PolyT::Native>,
|
||||
PolyT: WrapperType + std::iter::Sum<<PolyT as WrapperType>::Native>,
|
||||
PolyT::Native: std::ops::Mul<Output = PolyT::Native> + std::iter::Sum<PolyT::Native>,
|
||||
i64: AsPrimitive<<PolyT as WrapperType>::Native>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -70,7 +74,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.x.into(),
|
||||
])
|
||||
@@ -91,10 +95,10 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
(0..len).for_each(|_| {
|
||||
for v in column.iter_data().flatten() {
|
||||
@@ -103,7 +107,7 @@ where
|
||||
});
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<i64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<i64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"POLYVAL\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
// `get(0)` is safe because we have checked `values[1].len() == values[0].len() != 0`
|
||||
@@ -172,12 +176,14 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -196,7 +202,7 @@ where
|
||||
.values
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, &value)| value.as_() * (x.pow((len - 1 - i) as u32)).as_())
|
||||
.map(|(i, &value)| value.into_native().as_() * x.pow((len - 1 - i) as u32).as_())
|
||||
.sum();
|
||||
Ok(polyval.into())
|
||||
}
|
||||
@@ -213,7 +219,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(Polyval::<$S,<$S as Primitive>::LargestType>::default()))
|
||||
Ok(Box::new(Polyval::<<$S as LogicalPrimitiveType>::Wrapper, <<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -234,7 +240,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type,
|
||||
|$S| {
|
||||
Ok(PrimitiveType::<<$S as Primitive>::LargestType>::default().into())
|
||||
Ok(<<$S as LogicalPrimitiveType>::LargestType as LogicalPrimitiveType>::build_data_type())
|
||||
},
|
||||
{
|
||||
unreachable!()
|
||||
@@ -254,7 +260,7 @@ impl AggregateFunctionCreator for PolyvalAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -268,8 +274,8 @@ mod test {
|
||||
// test update one not-null value
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Some(3)])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(2_i64)])),
|
||||
Arc::new(Int32Vector::from(vec![Some(3)])),
|
||||
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
|
||||
];
|
||||
assert!(polyval.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int64(3), polyval.evaluate().unwrap());
|
||||
@@ -277,8 +283,8 @@ mod test {
|
||||
// test update one null value
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![Option::<i32>::None])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(2_i64)])),
|
||||
Arc::new(Int32Vector::from(vec![Option::<i32>::None])),
|
||||
Arc::new(Int64Vector::from(vec![Some(2_i64)])),
|
||||
];
|
||||
assert!(polyval.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Null, polyval.evaluate().unwrap());
|
||||
@@ -286,12 +292,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(3),
|
||||
Some(0),
|
||||
Some(1),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(3), Some(0), Some(1)])),
|
||||
Arc::new(Int64Vector::from(vec![
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
@@ -303,13 +305,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(3),
|
||||
Some(0),
|
||||
None,
|
||||
Some(1),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(3), Some(0), None, Some(1)])),
|
||||
Arc::new(Int64Vector::from(vec![
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
Some(2_i64),
|
||||
@@ -323,10 +320,10 @@ mod test {
|
||||
let mut polyval = Polyval::<i32, i64>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(PrimitiveVector::<i32>::from_vec(vec![4])),
|
||||
Arc::new(Int32Vector::from_vec(vec![4])),
|
||||
2,
|
||||
)),
|
||||
Arc::new(PrimitiveVector::<i64>::from(vec![Some(5_i64), Some(5_i64)])),
|
||||
Arc::new(Int64Vector::from(vec![Some(5_i64), Some(5_i64)])),
|
||||
];
|
||||
assert!(polyval.update_batch(&v).is_ok());
|
||||
assert_eq!(Value::Int64(24), polyval.evaluate().unwrap());
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -33,18 +33,12 @@ use statrs::statistics::Statistics;
|
||||
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ScipyStatsNormCdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
pub struct ScipyStatsNormCdf<T> {
|
||||
values: Vec<T>,
|
||||
x: Option<f64>,
|
||||
}
|
||||
|
||||
impl<T> ScipyStatsNormCdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
impl<T> ScipyStatsNormCdf<T> {
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
}
|
||||
@@ -52,8 +46,8 @@ where
|
||||
|
||||
impl<T> Accumulator for ScipyStatsNormCdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType + std::iter::Sum<T>,
|
||||
T::Native: AsPrimitive<f64>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -64,7 +58,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.x.into(),
|
||||
])
|
||||
@@ -86,14 +80,14 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"SCIPYSTATSNORMCDF\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
let first = x.get(0);
|
||||
@@ -160,19 +154,19 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
let values = self.values.iter().map(|&v| v.as_()).collect::<Vec<_>>();
|
||||
let mean = values.clone().mean();
|
||||
let std_dev = values.std_dev();
|
||||
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
|
||||
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
|
||||
if mean.is_nan() || std_dev.is_nan() {
|
||||
Ok(Value::Null)
|
||||
} else {
|
||||
@@ -198,7 +192,7 @@ impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(ScipyStatsNormCdf::<$S>::default()))
|
||||
Ok(Box::new(ScipyStatsNormCdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -230,7 +224,7 @@ impl AggregateFunctionCreator for ScipyStatsNormCdfAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -244,12 +238,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
@@ -264,13 +254,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut scipy_stats_norm_cdf = ScipyStatsNormCdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
None,
|
||||
Some(2.0_f64),
|
||||
|
||||
@@ -23,7 +23,7 @@ use common_query::logical_plan::{Accumulator, AggregateFunctionCreator};
|
||||
use common_query::prelude::*;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, ListVector};
|
||||
use datatypes::vectors::{ConstantVector, Float64Vector, Helper, ListVector};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
@@ -33,18 +33,12 @@ use statrs::statistics::Statistics;
|
||||
// https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ScipyStatsNormPdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
pub struct ScipyStatsNormPdf<T> {
|
||||
values: Vec<T>,
|
||||
x: Option<f64>,
|
||||
}
|
||||
|
||||
impl<T> ScipyStatsNormPdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
impl<T> ScipyStatsNormPdf<T> {
|
||||
fn push(&mut self, value: T) {
|
||||
self.values.push(value);
|
||||
}
|
||||
@@ -52,8 +46,8 @@ where
|
||||
|
||||
impl<T> Accumulator for ScipyStatsNormPdf<T>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: WrapperType,
|
||||
T::Native: AsPrimitive<f64> + std::iter::Sum<T>,
|
||||
{
|
||||
fn state(&self) -> Result<Vec<Value>> {
|
||||
let nums = self
|
||||
@@ -64,7 +58,7 @@ where
|
||||
Ok(vec![
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(nums)),
|
||||
T::default().into().data_type(),
|
||||
T::LogicalType::build_data_type(),
|
||||
)),
|
||||
self.x.into(),
|
||||
])
|
||||
@@ -86,14 +80,14 @@ where
|
||||
let mut len = 1;
|
||||
let column: &<T as Scalar>::VectorType = if column.is_const() {
|
||||
len = column.len();
|
||||
let column: &ConstantVector = unsafe { VectorHelper::static_cast(column) };
|
||||
unsafe { VectorHelper::static_cast(column.inner()) }
|
||||
let column: &ConstantVector = unsafe { Helper::static_cast(column) };
|
||||
unsafe { Helper::static_cast(column.inner()) }
|
||||
} else {
|
||||
unsafe { VectorHelper::static_cast(column) }
|
||||
unsafe { Helper::static_cast(column) }
|
||||
};
|
||||
|
||||
let x = &values[1];
|
||||
let x = VectorHelper::check_get_scalar::<f64>(x).context(error::InvalidInputsSnafu {
|
||||
let x = Helper::check_get_scalar::<f64>(x).context(error::InvalidInputTypeSnafu {
|
||||
err_msg: "expecting \"SCIPYSTATSNORMPDF\" function's second argument to be a positive integer",
|
||||
})?;
|
||||
let first = x.get(0);
|
||||
@@ -160,19 +154,20 @@ where
|
||||
),
|
||||
})?;
|
||||
for value in values.values_iter() {
|
||||
let value = value.context(FromScalarValueSnafu)?;
|
||||
let column: &<T as Scalar>::VectorType = unsafe { VectorHelper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
if let Some(value) = value.context(FromScalarValueSnafu)? {
|
||||
let column: &<T as Scalar>::VectorType = unsafe { Helper::static_cast(&value) };
|
||||
for v in column.iter_data().flatten() {
|
||||
self.push(v);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> Result<Value> {
|
||||
let values = self.values.iter().map(|&v| v.as_()).collect::<Vec<_>>();
|
||||
let mean = values.clone().mean();
|
||||
let std_dev = values.std_dev();
|
||||
let mean = self.values.iter().map(|v| v.into_native().as_()).mean();
|
||||
let std_dev = self.values.iter().map(|v| v.into_native().as_()).std_dev();
|
||||
|
||||
if mean.is_nan() || std_dev.is_nan() {
|
||||
Ok(Value::Null)
|
||||
} else {
|
||||
@@ -198,7 +193,7 @@ impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
|
||||
with_match_primitive_type_id!(
|
||||
input_type.logical_type_id(),
|
||||
|$S| {
|
||||
Ok(Box::new(ScipyStatsNormPdf::<$S>::default()))
|
||||
Ok(Box::new(ScipyStatsNormPdf::<<$S as LogicalPrimitiveType>::Wrapper>::default()))
|
||||
},
|
||||
{
|
||||
let err_msg = format!(
|
||||
@@ -230,7 +225,7 @@ impl AggregateFunctionCreator for ScipyStatsNormPdfAccumulatorCreator {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::vectors::{Float64Vector, Int32Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -244,12 +239,8 @@ mod test {
|
||||
// test update no null-value batch
|
||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-1i32),
|
||||
Some(1),
|
||||
Some(2),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-1i32), Some(1), Some(2)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
Some(2.0_f64),
|
||||
@@ -264,13 +255,8 @@ mod test {
|
||||
// test update null-value batch
|
||||
let mut scipy_stats_norm_pdf = ScipyStatsNormPdf::<i32>::default();
|
||||
let v: Vec<VectorRef> = vec![
|
||||
Arc::new(PrimitiveVector::<i32>::from(vec![
|
||||
Some(-2i32),
|
||||
None,
|
||||
Some(3),
|
||||
Some(4),
|
||||
])),
|
||||
Arc::new(PrimitiveVector::<f64>::from(vec![
|
||||
Arc::new(Int32Vector::from(vec![Some(-2i32), None, Some(3), Some(4)])),
|
||||
Arc::new(Float64Vector::from(vec![
|
||||
Some(2.0_f64),
|
||||
None,
|
||||
Some(2.0_f64),
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
use std::iter;
|
||||
|
||||
use common_query::error::Result;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::ConstantVector;
|
||||
use datatypes::vectors::{ConstantVector, Helper};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::ctx::EvalContext;
|
||||
|
||||
pub fn scalar_binary_op<L: Scalar, R: Scalar, O: Scalar, F>(
|
||||
@@ -36,10 +36,9 @@ where
|
||||
|
||||
let result = match (l.is_const(), r.is_const()) {
|
||||
(false, true) => {
|
||||
let left: &<L as Scalar>::VectorType = unsafe { VectorHelper::static_cast(l) };
|
||||
let right: &ConstantVector = unsafe { VectorHelper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(right.inner()) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(l) };
|
||||
let right: &ConstantVector = unsafe { Helper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(right.inner()) };
|
||||
let b = right.get_data(0);
|
||||
|
||||
let it = left.iter_data().map(|a| f(a, b, ctx));
|
||||
@@ -47,8 +46,8 @@ where
|
||||
}
|
||||
|
||||
(false, false) => {
|
||||
let left: &<L as Scalar>::VectorType = unsafe { VectorHelper::static_cast(l) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { VectorHelper::static_cast(r) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(l) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(r) };
|
||||
|
||||
let it = left
|
||||
.iter_data()
|
||||
@@ -58,25 +57,22 @@ where
|
||||
}
|
||||
|
||||
(true, false) => {
|
||||
let left: &ConstantVector = unsafe { VectorHelper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(left.inner()) };
|
||||
let left: &ConstantVector = unsafe { Helper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(left.inner()) };
|
||||
let a = left.get_data(0);
|
||||
|
||||
let right: &<R as Scalar>::VectorType = unsafe { VectorHelper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(r) };
|
||||
let it = right.iter_data().map(|b| f(a, b, ctx));
|
||||
<O as Scalar>::VectorType::from_owned_iterator(it)
|
||||
}
|
||||
|
||||
(true, true) => {
|
||||
let left: &ConstantVector = unsafe { VectorHelper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(left.inner()) };
|
||||
let left: &ConstantVector = unsafe { Helper::static_cast(l) };
|
||||
let left: &<L as Scalar>::VectorType = unsafe { Helper::static_cast(left.inner()) };
|
||||
let a = left.get_data(0);
|
||||
|
||||
let right: &ConstantVector = unsafe { VectorHelper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType =
|
||||
unsafe { VectorHelper::static_cast(right.inner()) };
|
||||
let right: &ConstantVector = unsafe { Helper::static_cast(r) };
|
||||
let right: &<R as Scalar>::VectorType = unsafe { Helper::static_cast(right.inner()) };
|
||||
let b = right.get_data(0);
|
||||
|
||||
let it = iter::repeat(a)
|
||||
|
||||
@@ -13,8 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
use chrono_tz::Tz;
|
||||
|
||||
use crate::error::Error;
|
||||
use common_query::error::Error;
|
||||
|
||||
pub struct EvalContext {
|
||||
_tz: Tz,
|
||||
|
||||
@@ -12,10 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use common_query::error::{self, Result};
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::Helper;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{GetScalarVectorSnafu, Result};
|
||||
use crate::scalars::expression::ctx::EvalContext;
|
||||
|
||||
/// TODO: remove the allow_unused when it's used.
|
||||
@@ -28,7 +29,7 @@ pub fn scalar_unary_op<L: Scalar, O: Scalar, F>(
|
||||
where
|
||||
F: Fn(Option<L::RefType<'_>>, &mut EvalContext) -> Option<O>,
|
||||
{
|
||||
let left = VectorHelper::check_get_scalar::<L>(l).context(GetScalarVectorSnafu)?;
|
||||
let left = Helper::check_get_scalar::<L>(l).context(error::GetScalarVectorSnafu)?;
|
||||
let it = left.iter_data().map(|a| f(a, ctx));
|
||||
let result = <O as Scalar>::VectorType::from_owned_iterator(it);
|
||||
|
||||
|
||||
@@ -16,12 +16,11 @@ use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use chrono_tz::Tz;
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::Signature;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::vectors::VectorRef;
|
||||
|
||||
use crate::error::Result;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FunctionContext {
|
||||
pub tz: Tz,
|
||||
|
||||
@@ -15,15 +15,16 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::types::LogicalPrimitiveType;
|
||||
use datatypes::vectors::VectorRef;
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num::traits::Pow;
|
||||
use num_traits::AsPrimitive;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
@@ -46,7 +47,7 @@ impl Function for PowFunction {
|
||||
fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
|
||||
let col = scalar_binary_op::<$S, $T, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
|
||||
let col = scalar_binary_op::<<$S as LogicalPrimitiveType>::Native, <$T as LogicalPrimitiveType>::Native, f64, _>(&columns[0], &columns[1], scalar_pow, &mut EvalContext::default())?;
|
||||
Ok(Arc::new(col))
|
||||
},{
|
||||
unreachable!()
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use arrow::array::Array;
|
||||
use common_query::error::{FromArrowArraySnafu, Result, TypeCastSnafu};
|
||||
use common_query::error::{self, Result};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::arrow;
|
||||
use datatypes::arrow::compute::kernels::{arithmetic, cast};
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
@@ -51,28 +51,21 @@ impl Function for RateFunction {
|
||||
let val = &columns[0].to_arrow_array();
|
||||
let val_0 = val.slice(0, val.len() - 1);
|
||||
let val_1 = val.slice(1, val.len() - 1);
|
||||
let dv = arrow::compute::arithmetics::sub(&*val_1, &*val_0);
|
||||
let dv = arithmetic::subtract_dyn(&val_1, &val_0).context(error::ArrowComputeSnafu)?;
|
||||
let ts = &columns[1].to_arrow_array();
|
||||
let ts_0 = ts.slice(0, ts.len() - 1);
|
||||
let ts_1 = ts.slice(1, ts.len() - 1);
|
||||
let dt = arrow::compute::arithmetics::sub(&*ts_1, &*ts_0);
|
||||
fn all_to_f64(array: &dyn Array) -> Result<Box<dyn Array>> {
|
||||
Ok(arrow::compute::cast::cast(
|
||||
array,
|
||||
&arrow::datatypes::DataType::Float64,
|
||||
arrow::compute::cast::CastOptions {
|
||||
wrapped: true,
|
||||
partial: true,
|
||||
},
|
||||
)
|
||||
.context(TypeCastSnafu {
|
||||
typ: arrow::datatypes::DataType::Float64,
|
||||
})?)
|
||||
}
|
||||
let dv = all_to_f64(&*dv)?;
|
||||
let dt = all_to_f64(&*dt)?;
|
||||
let rate = arrow::compute::arithmetics::div(&*dv, &*dt);
|
||||
let v = Helper::try_into_vector(&rate).context(FromArrowArraySnafu)?;
|
||||
let dt = arithmetic::subtract_dyn(&ts_1, &ts_0).context(error::ArrowComputeSnafu)?;
|
||||
|
||||
let dv = cast::cast(&dv, &DataType::Float64).context(error::TypeCastSnafu {
|
||||
typ: DataType::Float64,
|
||||
})?;
|
||||
let dt = cast::cast(&dt, &DataType::Float64).context(error::TypeCastSnafu {
|
||||
typ: DataType::Float64,
|
||||
})?;
|
||||
let rate = arithmetic::divide_dyn(&dv, &dt).context(error::ArrowComputeSnafu)?;
|
||||
let v = Helper::try_into_vector(&rate).context(error::FromArrowArraySnafu)?;
|
||||
|
||||
Ok(v)
|
||||
}
|
||||
}
|
||||
@@ -81,9 +74,8 @@ impl Function for RateFunction {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::Float64Array;
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::vectors::{Float32Vector, Int64Vector};
|
||||
use datatypes::vectors::{Float32Vector, Float64Vector, Int64Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -108,9 +100,7 @@ mod tests {
|
||||
Arc::new(Int64Vector::from_vec(ts)),
|
||||
];
|
||||
let vector = rate.eval(FunctionContext::default(), &args).unwrap();
|
||||
let arr = vector.to_arrow_array();
|
||||
let expect = Arc::new(Float64Array::from_vec(vec![2.0, 3.0]));
|
||||
let res = arrow::compute::comparison::eq(&*arr, &*expect);
|
||||
res.iter().for_each(|x| assert!(matches!(x, Some(true))));
|
||||
let expect: VectorRef = Arc::new(Float64Vector::from_vec(vec![2.0, 3.0]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
// limitations under the License.
|
||||
|
||||
mod clip;
|
||||
#[allow(unused)]
|
||||
mod interp;
|
||||
|
||||
use std::sync::Arc;
|
||||
@@ -15,14 +15,15 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::{Scalar, VectorRef};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use num_traits::AsPrimitive;
|
||||
use datatypes::arrow::compute;
|
||||
use datatypes::arrow::datatypes::ArrowPrimitiveType;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use paste::paste;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
@@ -34,25 +35,32 @@ macro_rules! define_eval {
|
||||
($O: ident) => {
|
||||
paste! {
|
||||
fn [<eval_ $O>](columns: &[VectorRef]) -> Result<VectorRef> {
|
||||
with_match_primitive_type_id!(columns[0].data_type().logical_type_id(), |$S| {
|
||||
with_match_primitive_type_id!(columns[1].data_type().logical_type_id(), |$T| {
|
||||
with_match_primitive_type_id!(columns[2].data_type().logical_type_id(), |$R| {
|
||||
// clip(a, min, max) is equals to min(max(a, min), max)
|
||||
let col: VectorRef = Arc::new(scalar_binary_op::<$S, $T, $O, _>(&columns[0], &columns[1], scalar_max, &mut EvalContext::default())?);
|
||||
let col = scalar_binary_op::<$O, $R, $O, _>(&col, &columns[2], scalar_min, &mut EvalContext::default())?;
|
||||
Ok(Arc::new(col))
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
}, {
|
||||
unreachable!()
|
||||
})
|
||||
fn cast_vector(input: &VectorRef) -> VectorRef {
|
||||
Arc::new(PrimitiveVector::<<$O as WrapperType>::LogicalType>::try_from_arrow_array(
|
||||
compute::cast(&input.to_arrow_array(), &<<<$O as WrapperType>::LogicalType as LogicalPrimitiveType>::ArrowPrimitive as ArrowPrimitiveType>::DATA_TYPE).unwrap()
|
||||
).unwrap()) as _
|
||||
}
|
||||
let operator_1 = cast_vector(&columns[0]);
|
||||
let operator_2 = cast_vector(&columns[1]);
|
||||
let operator_3 = cast_vector(&columns[2]);
|
||||
|
||||
// clip(a, min, max) is equals to min(max(a, min), max)
|
||||
let col: VectorRef = Arc::new(scalar_binary_op::<$O, $O, $O, _>(
|
||||
&operator_1,
|
||||
&operator_2,
|
||||
scalar_max,
|
||||
&mut EvalContext::default(),
|
||||
)?);
|
||||
let col = scalar_binary_op::<$O, $O, $O, _>(
|
||||
&col,
|
||||
&operator_3,
|
||||
scalar_min,
|
||||
&mut EvalContext::default(),
|
||||
)?;
|
||||
Ok(Arc::new(col))
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
define_eval!(i64);
|
||||
@@ -108,27 +116,23 @@ pub fn max<T: PartialOrd>(input: T, max: T) -> T {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn scalar_min<S, T, O>(left: Option<S>, right: Option<T>, _ctx: &mut EvalContext) -> Option<O>
|
||||
fn scalar_min<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
|
||||
where
|
||||
S: AsPrimitive<O>,
|
||||
T: AsPrimitive<O>,
|
||||
O: Scalar + Copy + PartialOrd,
|
||||
{
|
||||
match (left, right) {
|
||||
(Some(left), Some(right)) => Some(min(left.as_(), right.as_())),
|
||||
(Some(left), Some(right)) => Some(min(left, right)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn scalar_max<S, T, O>(left: Option<S>, right: Option<T>, _ctx: &mut EvalContext) -> Option<O>
|
||||
fn scalar_max<O>(left: Option<O>, right: Option<O>, _ctx: &mut EvalContext) -> Option<O>
|
||||
where
|
||||
S: AsPrimitive<O>,
|
||||
T: AsPrimitive<O>,
|
||||
O: Scalar + Copy + PartialOrd,
|
||||
{
|
||||
match (left, right) {
|
||||
(Some(left), Some(right)) => Some(max(left.as_(), right.as_())),
|
||||
(Some(left), Some(right)) => Some(max(left, right)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -143,11 +147,15 @@ impl fmt::Display for ClipFunction {
|
||||
mod tests {
|
||||
use common_query::prelude::TypeSignature;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{ConstantVector, Float32Vector, Int32Vector, UInt32Vector};
|
||||
use datatypes::vectors::{
|
||||
ConstantVector, Float32Vector, Int16Vector, Int32Vector, Int8Vector, UInt16Vector,
|
||||
UInt32Vector, UInt8Vector,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_clip_function() {
|
||||
fn test_clip_signature() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
assert_eq!("clip", clip.name());
|
||||
@@ -190,16 +198,21 @@ mod tests {
|
||||
volatility: Volatility::Immutable
|
||||
} if valid_types == ConcreteDataType::numerics()
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clip_fn_signed() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
// eval with signed integers
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from_values(0..10)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![3])),
|
||||
Arc::new(Int8Vector::from_vec(vec![3])),
|
||||
10,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![6])),
|
||||
Arc::new(Int16Vector::from_vec(vec![6])),
|
||||
10,
|
||||
)),
|
||||
];
|
||||
@@ -217,16 +230,21 @@ mod tests {
|
||||
assert!(matches!(vector.get(i), Value::Int64(v) if v == 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clip_fn_unsigned() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
// eval with unsigned integers
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(UInt32Vector::from_values(0..10)),
|
||||
Arc::new(UInt8Vector::from_values(0..10)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(UInt32Vector::from_vec(vec![3])),
|
||||
10,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(UInt32Vector::from_vec(vec![6])),
|
||||
Arc::new(UInt16Vector::from_vec(vec![6])),
|
||||
10,
|
||||
)),
|
||||
];
|
||||
@@ -244,12 +262,17 @@ mod tests {
|
||||
assert!(matches!(vector.get(i), Value::UInt64(v) if v == 6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clip_fn_float() {
|
||||
let clip = ClipFunction::default();
|
||||
|
||||
// eval with floats
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(Int32Vector::from_values(0..10)),
|
||||
Arc::new(Int8Vector::from_values(0..10)),
|
||||
Arc::new(ConstantVector::new(
|
||||
Arc::new(Int32Vector::from_vec(vec![3])),
|
||||
Arc::new(UInt32Vector::from_vec(vec![3])),
|
||||
10,
|
||||
)),
|
||||
Arc::new(ConstantVector::new(
|
||||
|
||||
@@ -14,41 +14,18 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::arrow::array::PrimitiveArray;
|
||||
use datatypes::arrow::compute::cast::primitive_to_primitive;
|
||||
use datatypes::arrow::datatypes::DataType::Float64;
|
||||
use common_query::error::{self, Result};
|
||||
use datatypes::arrow::compute::cast;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ScalarVector;
|
||||
use datatypes::type_id::LogicalTypeId;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{Float64Vector, PrimitiveVector, Vector, VectorRef};
|
||||
use datatypes::{arrow, with_match_primitive_type_id};
|
||||
use snafu::{ensure, Snafu};
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
pub enum Error {
|
||||
#[snafu(display(
|
||||
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||
expect,
|
||||
actual,
|
||||
))]
|
||||
ArgsLenNotEnough { expect: usize, actual: usize },
|
||||
|
||||
#[snafu(display("The sample {} is empty", name))]
|
||||
SampleEmpty { name: String },
|
||||
|
||||
#[snafu(display(
|
||||
"The length of the len1: {} don't match the length of the len2: {}",
|
||||
len1,
|
||||
len2,
|
||||
))]
|
||||
LenNotEquals { len1: usize, len2: usize },
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
use datatypes::vectors::{Float64Vector, Vector, VectorRef};
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
/* search the biggest number that smaller than x in xp */
|
||||
fn linear_search_ascending_vector(x: Value, xp: &PrimitiveVector<f64>) -> usize {
|
||||
fn linear_search_ascending_vector(x: Value, xp: &Float64Vector) -> usize {
|
||||
for i in 0..xp.len() {
|
||||
if x < xp.get(i) {
|
||||
return i - 1;
|
||||
@@ -58,7 +35,7 @@ fn linear_search_ascending_vector(x: Value, xp: &PrimitiveVector<f64>) -> usize
|
||||
}
|
||||
|
||||
/* search the biggest number that smaller than x in xp */
|
||||
fn binary_search_ascending_vector(key: Value, xp: &PrimitiveVector<f64>) -> usize {
|
||||
fn binary_search_ascending_vector(key: Value, xp: &Float64Vector) -> usize {
|
||||
let mut left = 0;
|
||||
let mut right = xp.len();
|
||||
/* If len <= 4 use linear search. */
|
||||
@@ -77,27 +54,33 @@ fn binary_search_ascending_vector(key: Value, xp: &PrimitiveVector<f64>) -> usiz
|
||||
left - 1
|
||||
}
|
||||
|
||||
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<PrimitiveVector<f64>> {
|
||||
fn concrete_type_to_primitive_vector(arg: &VectorRef) -> Result<Float64Vector> {
|
||||
with_match_primitive_type_id!(arg.data_type().logical_type_id(), |$S| {
|
||||
let tmp = arg.to_arrow_array();
|
||||
let from = tmp.as_any().downcast_ref::<PrimitiveArray<$S>>().expect("cast failed");
|
||||
let array = primitive_to_primitive(from, &Float64);
|
||||
Ok(PrimitiveVector::new(array))
|
||||
let array = cast(&tmp, &ArrowDataType::Float64).context(error::TypeCastSnafu {
|
||||
typ: ArrowDataType::Float64,
|
||||
})?;
|
||||
// Safety: array has been cast to Float64Array.
|
||||
Ok(Float64Vector::try_from_arrow_array(array).unwrap())
|
||||
},{
|
||||
unreachable!()
|
||||
})
|
||||
}
|
||||
|
||||
/// https://github.com/numpy/numpy/blob/b101756ac02e390d605b2febcded30a1da50cc2c/numpy/core/src/multiarray/compiled_base.c#L491
|
||||
#[allow(unused)]
|
||||
pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
let mut left = None;
|
||||
let mut right = None;
|
||||
|
||||
ensure!(
|
||||
args.len() >= 3,
|
||||
ArgsLenNotEnoughSnafu {
|
||||
expect: 3_usize,
|
||||
actual: args.len()
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||
3,
|
||||
args.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
@@ -109,9 +92,12 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
if args.len() > 3 {
|
||||
ensure!(
|
||||
args.len() == 5,
|
||||
ArgsLenNotEnoughSnafu {
|
||||
expect: 5_usize,
|
||||
actual: args.len()
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the args is not enough, expect at least: {}, have: {}",
|
||||
5,
|
||||
args.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
@@ -123,14 +109,32 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
.get_data(0);
|
||||
}
|
||||
|
||||
ensure!(x.len() != 0, SampleEmptySnafu { name: "x" });
|
||||
ensure!(xp.len() != 0, SampleEmptySnafu { name: "xp" });
|
||||
ensure!(fp.len() != 0, SampleEmptySnafu { name: "fp" });
|
||||
ensure!(
|
||||
x.len() != 0,
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The sample x is empty",
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
xp.len() != 0,
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The sample xp is empty",
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
fp.len() != 0,
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: "The sample fp is empty",
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
xp.len() == fp.len(),
|
||||
LenNotEqualsSnafu {
|
||||
len1: xp.len(),
|
||||
len2: fp.len(),
|
||||
error::InvalidFuncArgsSnafu {
|
||||
err_msg: format!(
|
||||
"The length of the len1: {} don't match the length of the len2: {}",
|
||||
xp.len(),
|
||||
fp.len()
|
||||
),
|
||||
}
|
||||
);
|
||||
|
||||
@@ -147,7 +151,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
|
||||
let res;
|
||||
if xp.len() == 1 {
|
||||
res = x
|
||||
let datas = x
|
||||
.iter_data()
|
||||
.map(|x| {
|
||||
if Value::from(x) < xp.get(0) {
|
||||
@@ -158,7 +162,8 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
fp.get_data(0)
|
||||
}
|
||||
})
|
||||
.collect::<Float64Vector>();
|
||||
.collect::<Vec<_>>();
|
||||
res = Float64Vector::from(datas);
|
||||
} else {
|
||||
let mut j = 0;
|
||||
/* only pre-calculate slopes if there are relatively few of them. */
|
||||
@@ -185,7 +190,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
}
|
||||
slopes = Some(slopes_tmp);
|
||||
}
|
||||
res = x
|
||||
let datas = x
|
||||
.iter_data()
|
||||
.map(|x| match x {
|
||||
Some(xi) => {
|
||||
@@ -248,7 +253,8 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Float64Vector>();
|
||||
.collect::<Vec<_>>();
|
||||
res = Float64Vector::from(datas);
|
||||
}
|
||||
Ok(Arc::new(res) as _)
|
||||
}
|
||||
@@ -257,8 +263,7 @@ pub fn interp(args: &[VectorRef]) -> Result<VectorRef> {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datatypes::prelude::ScalarVectorBuilder;
|
||||
use datatypes::vectors::{Int32Vector, Int64Vector, PrimitiveVectorBuilder};
|
||||
use datatypes::vectors::{Int32Vector, Int64Vector};
|
||||
|
||||
use super::*;
|
||||
#[test]
|
||||
@@ -341,12 +346,8 @@ mod tests {
|
||||
assert!(matches!(vector.get(0), Value::Float64(v) if v==x[0] as f64));
|
||||
|
||||
// x=None output:Null
|
||||
let input = [None, Some(0.0), Some(0.3)];
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let x = builder.finish();
|
||||
let input = vec![None, Some(0.0), Some(0.3)];
|
||||
let x = Float64Vector::from(input);
|
||||
let args: Vec<VectorRef> = vec![
|
||||
Arc::new(x),
|
||||
Arc::new(Int64Vector::from_vec(xp)),
|
||||
|
||||
@@ -15,11 +15,11 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::Result;
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::prelude::VectorRef;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::expression::{scalar_binary_op, EvalContext};
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
|
||||
@@ -17,16 +17,17 @@
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_query::error::{IntoVectorSnafu, UnsupportedInputDataTypeSnafu};
|
||||
use common_query::error::{
|
||||
ArrowComputeSnafu, IntoVectorSnafu, Result, TypeCastSnafu, UnsupportedInputDataTypeSnafu,
|
||||
};
|
||||
use common_query::prelude::{Signature, Volatility};
|
||||
use datatypes::arrow::compute::arithmetics;
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDatatype;
|
||||
use datatypes::arrow::scalar::PrimitiveScalar;
|
||||
use datatypes::arrow::compute;
|
||||
use datatypes::arrow::datatypes::{DataType as ArrowDatatype, Int64Type};
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::{TimestampVector, VectorRef};
|
||||
use datatypes::vectors::{TimestampMillisecondVector, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::scalars::function::{Function, FunctionContext};
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
@@ -40,7 +41,7 @@ impl Function for FromUnixtimeFunction {
|
||||
}
|
||||
|
||||
fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
|
||||
Ok(ConcreteDataType::timestamp_millis_datatype())
|
||||
Ok(ConcreteDataType::timestamp_millisecond_datatype())
|
||||
}
|
||||
|
||||
fn signature(&self) -> Signature {
|
||||
@@ -56,14 +57,18 @@ impl Function for FromUnixtimeFunction {
|
||||
ConcreteDataType::Int64(_) => {
|
||||
let array = columns[0].to_arrow_array();
|
||||
// Our timestamp vector's time unit is millisecond
|
||||
let array = arithmetics::mul_scalar(
|
||||
&*array,
|
||||
&PrimitiveScalar::new(ArrowDatatype::Int64, Some(1000i64)),
|
||||
);
|
||||
let array = compute::multiply_scalar_dyn::<Int64Type>(&array, 1000i64)
|
||||
.context(ArrowComputeSnafu)?;
|
||||
|
||||
let arrow_datatype = &self.return_type(&[]).unwrap().as_arrow_type();
|
||||
Ok(Arc::new(
|
||||
TimestampVector::try_from_arrow_array(array).context(IntoVectorSnafu {
|
||||
data_type: ArrowDatatype::Int64,
|
||||
TimestampMillisecondVector::try_from_arrow_array(
|
||||
compute::cast(&array, arrow_datatype).context(TypeCastSnafu {
|
||||
typ: ArrowDatatype::Int64,
|
||||
})?,
|
||||
)
|
||||
.context(IntoVectorSnafu {
|
||||
data_type: arrow_datatype.clone(),
|
||||
})?,
|
||||
))
|
||||
}
|
||||
@@ -71,8 +76,7 @@ impl Function for FromUnixtimeFunction {
|
||||
function: NAME,
|
||||
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
|
||||
}
|
||||
.fail()
|
||||
.map_err(|e| e.into()),
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -96,7 +100,7 @@ mod tests {
|
||||
let f = FromUnixtimeFunction::default();
|
||||
assert_eq!("from_unixtime", f.name());
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
f.return_type(&[]).unwrap()
|
||||
);
|
||||
|
||||
|
||||
@@ -19,7 +19,8 @@ use common_query::prelude::{
|
||||
ColumnarValue, ReturnTypeFunction, ScalarFunctionImplementation, ScalarUdf, ScalarValue,
|
||||
};
|
||||
use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::{ConcreteDataType, VectorHelper};
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::vectors::Helper;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::scalars::function::{FunctionContext, FunctionRef};
|
||||
@@ -47,7 +48,7 @@ pub fn create_udf(func: FunctionRef) -> ScalarUdf {
|
||||
let args: Result<Vec<_>, DataTypeError> = args
|
||||
.iter()
|
||||
.map(|arg| match arg {
|
||||
ColumnarValue::Scalar(v) => VectorHelper::try_from_scalar_value(v.clone(), rows),
|
||||
ColumnarValue::Scalar(v) => Helper::try_from_scalar_value(v.clone(), rows),
|
||||
ColumnarValue::Vector(v) => Ok(v.clone()),
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -22,11 +22,11 @@ use api::v1::{AddColumn, AddColumns, Column, ColumnDataType, ColumnDef, CreateEx
|
||||
use common_base::BitVec;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use common_time::{Date, DateTime};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::data_type::{ConcreteDataType, DataType};
|
||||
use datatypes::prelude::{ValueRef, VectorRef};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::VectorBuilder;
|
||||
use datatypes::vectors::MutableVector;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use table::metadata::TableId;
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequest};
|
||||
@@ -99,7 +99,7 @@ pub fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
|
||||
let column_datatype = wrapper.datatype();
|
||||
|
||||
let rows = rows as usize;
|
||||
let mut vector = VectorBuilder::with_capacity(wrapper.into(), rows);
|
||||
let mut vector = ConcreteDataType::from(wrapper).create_mutable_vector(rows);
|
||||
|
||||
if let Some(values) = &column.values {
|
||||
let values = collect_column_values(column_datatype, values);
|
||||
@@ -110,21 +110,31 @@ pub fn column_to_vector(column: &Column, rows: u32) -> Result<VectorRef> {
|
||||
|
||||
for i in 0..rows {
|
||||
if let Some(true) = nulls_iter.next() {
|
||||
vector.push_null();
|
||||
vector
|
||||
.push_value_ref(ValueRef::Null)
|
||||
.context(CreateVectorSnafu)?;
|
||||
} else {
|
||||
let value_ref = values_iter.next().context(InvalidColumnProtoSnafu {
|
||||
err_msg: format!(
|
||||
"value not found at position {} of column {}",
|
||||
i, &column.column_name
|
||||
),
|
||||
})?;
|
||||
vector.try_push_ref(value_ref).context(CreateVectorSnafu)?;
|
||||
let value_ref = values_iter
|
||||
.next()
|
||||
.with_context(|| InvalidColumnProtoSnafu {
|
||||
err_msg: format!(
|
||||
"value not found at position {} of column {}",
|
||||
i, &column.column_name
|
||||
),
|
||||
})?;
|
||||
vector
|
||||
.push_value_ref(value_ref)
|
||||
.context(CreateVectorSnafu)?;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
(0..rows).for_each(|_| vector.push_null());
|
||||
(0..rows).try_for_each(|_| {
|
||||
vector
|
||||
.push_value_ref(ValueRef::Null)
|
||||
.context(CreateVectorSnafu)
|
||||
})?;
|
||||
}
|
||||
Ok(vector.finish())
|
||||
Ok(vector.to_vector())
|
||||
}
|
||||
|
||||
fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Vec<ValueRef> {
|
||||
@@ -174,9 +184,24 @@ fn collect_column_values(column_datatype: ColumnDataType, values: &Values) -> Ve
|
||||
DateTime::new(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::Timestamp => {
|
||||
collect_values!(values.ts_millis_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::from_millis(*v)
|
||||
ColumnDataType::TimestampSecond => {
|
||||
collect_values!(values.ts_second_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_second(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::TimestampMillisecond => {
|
||||
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_millisecond(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::TimestampMicrosecond => {
|
||||
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_microsecond(*v)
|
||||
))
|
||||
}
|
||||
ColumnDataType::TimestampNanosecond => {
|
||||
collect_values!(values.ts_millisecond_values, |v| ValueRef::Timestamp(
|
||||
Timestamp::new_nanosecond(*v)
|
||||
))
|
||||
}
|
||||
}
|
||||
@@ -289,10 +314,7 @@ pub fn insertion_expr_to_request(
|
||||
},
|
||||
)?;
|
||||
let data_type = &column_schema.data_type;
|
||||
entry.insert(VectorBuilder::with_capacity(
|
||||
data_type.clone(),
|
||||
row_count as usize,
|
||||
))
|
||||
entry.insert(data_type.create_mutable_vector(row_count as usize))
|
||||
}
|
||||
};
|
||||
add_values_to_builder(vector_builder, values, row_count as usize, null_mask)?;
|
||||
@@ -300,7 +322,7 @@ pub fn insertion_expr_to_request(
|
||||
}
|
||||
let columns_values = columns_builders
|
||||
.into_iter()
|
||||
.map(|(column_name, mut vector_builder)| (column_name, vector_builder.finish()))
|
||||
.map(|(column_name, mut vector_builder)| (column_name, vector_builder.to_vector()))
|
||||
.collect();
|
||||
|
||||
Ok(InsertRequest {
|
||||
@@ -312,7 +334,7 @@ pub fn insertion_expr_to_request(
|
||||
}
|
||||
|
||||
fn add_values_to_builder(
|
||||
builder: &mut VectorBuilder,
|
||||
builder: &mut Box<dyn MutableVector>,
|
||||
values: Values,
|
||||
row_count: usize,
|
||||
null_mask: Vec<u8>,
|
||||
@@ -323,9 +345,11 @@ fn add_values_to_builder(
|
||||
if null_mask.is_empty() {
|
||||
ensure!(values.len() == row_count, IllegalInsertDataSnafu);
|
||||
|
||||
values.iter().for_each(|value| {
|
||||
builder.push(value);
|
||||
});
|
||||
values.iter().try_for_each(|value| {
|
||||
builder
|
||||
.push_value_ref(value.as_value_ref())
|
||||
.context(CreateVectorSnafu)
|
||||
})?;
|
||||
} else {
|
||||
let null_mask = BitVec::from_vec(null_mask);
|
||||
ensure!(
|
||||
@@ -336,9 +360,13 @@ fn add_values_to_builder(
|
||||
let mut idx_of_values = 0;
|
||||
for idx in 0..row_count {
|
||||
match is_null(&null_mask, idx) {
|
||||
Some(true) => builder.push(&Value::Null),
|
||||
Some(true) => builder
|
||||
.push_value_ref(ValueRef::Null)
|
||||
.context(CreateVectorSnafu)?,
|
||||
_ => {
|
||||
builder.push(&values[idx_of_values]);
|
||||
builder
|
||||
.push_value_ref(values[idx_of_values].as_value_ref())
|
||||
.context(CreateVectorSnafu)?;
|
||||
idx_of_values += 1
|
||||
}
|
||||
}
|
||||
@@ -418,9 +446,9 @@ fn convert_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
|
||||
.map(|v| Value::Date(v.into()))
|
||||
.collect(),
|
||||
ConcreteDataType::Timestamp(_) => values
|
||||
.ts_millis_values
|
||||
.ts_millisecond_values
|
||||
.into_iter()
|
||||
.map(|v| Value::Timestamp(Timestamp::from_millis(v)))
|
||||
.map(|v| Value::Timestamp(Timestamp::new_millisecond(v)))
|
||||
.collect(),
|
||||
ConcreteDataType::Null(_) => unreachable!(),
|
||||
ConcreteDataType::List(_) => unreachable!(),
|
||||
@@ -543,7 +571,7 @@ mod tests {
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::from(
|
||||
ColumnDataTypeWrapper::try_new(
|
||||
column_defs
|
||||
@@ -624,8 +652,8 @@ mod tests {
|
||||
assert_eq!(Value::Float64(0.1.into()), memory.get(1));
|
||||
|
||||
let ts = insert_req.columns_values.get("ts").unwrap();
|
||||
assert_eq!(Value::Timestamp(Timestamp::from_millis(100)), ts.get(0));
|
||||
assert_eq!(Value::Timestamp(Timestamp::from_millis(101)), ts.get(1));
|
||||
assert_eq!(Value::Timestamp(Timestamp::new_millisecond(100)), ts.get(0));
|
||||
assert_eq!(Value::Timestamp(Timestamp::new_millisecond(101)), ts.get(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -675,8 +703,12 @@ mod tests {
|
||||
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
|
||||
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), true)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
|
||||
Arc::new(
|
||||
@@ -741,7 +773,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let ts_vals = column::Values {
|
||||
ts_millis_values: vec![100, 101],
|
||||
ts_millisecond_values: vec![100, 101],
|
||||
..Default::default()
|
||||
};
|
||||
let ts_column = Column {
|
||||
@@ -749,7 +781,7 @@ mod tests {
|
||||
semantic_type: TIMESTAMP_SEMANTIC_TYPE,
|
||||
values: Some(ts_vals),
|
||||
null_mask: vec![0],
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||
};
|
||||
|
||||
(
|
||||
|
||||
@@ -13,9 +13,7 @@ common-query = { path = "../query" }
|
||||
common-recordbatch = { path = "../recordbatch" }
|
||||
common-runtime = { path = "../runtime" }
|
||||
dashmap = "5.4"
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::result::{build_err_result, ObjectResultBuilder};
|
||||
use api::v1::codec::SelectResult;
|
||||
@@ -24,9 +22,14 @@ use common_error::prelude::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::{RecordBatches, SendableRecordBatchStream};
|
||||
use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray};
|
||||
use datatypes::arrow_array::{BinaryArray, StringArray};
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::types::{TimestampType, WrapperType};
|
||||
use datatypes::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, Float32Vector, Float64Vector,
|
||||
Int16Vector, Int32Vector, Int64Vector, Int8Vector, StringVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
|
||||
UInt32Vector, UInt64Vector, UInt8Vector, VectorRef,
|
||||
};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::error::{self, ConversionSnafu, Result};
|
||||
@@ -46,6 +49,7 @@ pub async fn to_object_result(output: std::result::Result<Output, impl ErrorExt>
|
||||
Err(e) => build_err_result(&e),
|
||||
}
|
||||
}
|
||||
|
||||
async fn collect(stream: SendableRecordBatchStream) -> Result<ObjectResult> {
|
||||
let recordbatches = RecordBatches::try_collect(stream)
|
||||
.await
|
||||
@@ -78,10 +82,7 @@ fn try_convert(record_batches: RecordBatches) -> Result<SelectResult> {
|
||||
let schema = record_batches.schema();
|
||||
let record_batches = record_batches.take();
|
||||
|
||||
let row_count: usize = record_batches
|
||||
.iter()
|
||||
.map(|r| r.df_recordbatch.num_rows())
|
||||
.sum();
|
||||
let row_count: usize = record_batches.iter().map(|r| r.num_rows()).sum();
|
||||
|
||||
let schemas = schema.column_schemas();
|
||||
let mut columns = Vec::with_capacity(schemas.len());
|
||||
@@ -89,9 +90,9 @@ fn try_convert(record_batches: RecordBatches) -> Result<SelectResult> {
|
||||
for (idx, column_schema) in schemas.iter().enumerate() {
|
||||
let column_name = column_schema.name.clone();
|
||||
|
||||
let arrays: Vec<Arc<dyn Array>> = record_batches
|
||||
let arrays: Vec<_> = record_batches
|
||||
.iter()
|
||||
.map(|r| r.df_recordbatch.columns()[idx].clone())
|
||||
.map(|r| r.column(idx).clone())
|
||||
.collect();
|
||||
|
||||
let column = Column {
|
||||
@@ -112,7 +113,7 @@ fn try_convert(record_batches: RecordBatches) -> Result<SelectResult> {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
|
||||
pub fn null_mask(arrays: &[VectorRef], row_count: usize) -> Vec<u8> {
|
||||
let null_count: usize = arrays.iter().map(|a| a.null_count()).sum();
|
||||
|
||||
if null_count == 0 {
|
||||
@@ -122,10 +123,12 @@ pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
|
||||
let mut null_mask = BitVec::with_capacity(row_count);
|
||||
for array in arrays {
|
||||
let validity = array.validity();
|
||||
if let Some(v) = validity {
|
||||
v.iter().for_each(|x| null_mask.push(!x));
|
||||
} else {
|
||||
if validity.is_all_valid() {
|
||||
null_mask.extend_from_bitslice(&BitVec::repeat(false, array.len()));
|
||||
} else {
|
||||
for i in 0..array.len() {
|
||||
null_mask.push(!validity.is_set(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
null_mask.into_vec()
|
||||
@@ -133,7 +136,9 @@ pub fn null_mask(arrays: &Vec<Arc<dyn Array>>, row_count: usize) -> Vec<u8> {
|
||||
|
||||
macro_rules! convert_arrow_array_to_grpc_vals {
|
||||
($data_type: expr, $arrays: ident, $(($Type: pat, $CastType: ty, $field: ident, $MapFunction: expr)), +) => {{
|
||||
use datatypes::arrow::datatypes::{DataType, TimeUnit};
|
||||
use datatypes::data_type::{ConcreteDataType};
|
||||
use datatypes::prelude::ScalarVector;
|
||||
|
||||
match $data_type {
|
||||
$(
|
||||
$Type => {
|
||||
@@ -143,52 +148,114 @@ macro_rules! convert_arrow_array_to_grpc_vals {
|
||||
from: format!("{:?}", $data_type),
|
||||
})?;
|
||||
vals.$field.extend(array
|
||||
.iter()
|
||||
.iter_data()
|
||||
.filter_map(|i| i.map($MapFunction))
|
||||
.collect::<Vec<_>>());
|
||||
}
|
||||
return Ok(vals);
|
||||
},
|
||||
)+
|
||||
_ => unimplemented!(),
|
||||
ConcreteDataType::Null(_) | ConcreteDataType::List(_) => unreachable!("Should not send {:?} in gRPC", $data_type),
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
pub fn values(arrays: &[Arc<dyn Array>]) -> Result<Values> {
|
||||
pub fn values(arrays: &[VectorRef]) -> Result<Values> {
|
||||
if arrays.is_empty() {
|
||||
return Ok(Values::default());
|
||||
}
|
||||
let data_type = arrays[0].data_type();
|
||||
|
||||
convert_arrow_array_to_grpc_vals!(
|
||||
data_type, arrays,
|
||||
|
||||
(DataType::Boolean, BooleanArray, bool_values, |x| {x}),
|
||||
|
||||
(DataType::Int8, PrimitiveArray<i8>, i8_values, |x| {*x as i32}),
|
||||
(DataType::Int16, PrimitiveArray<i16>, i16_values, |x| {*x as i32}),
|
||||
(DataType::Int32, PrimitiveArray<i32>, i32_values, |x| {*x}),
|
||||
(DataType::Int64, PrimitiveArray<i64>, i64_values, |x| {*x}),
|
||||
|
||||
(DataType::UInt8, PrimitiveArray<u8>, u8_values, |x| {*x as u32}),
|
||||
(DataType::UInt16, PrimitiveArray<u16>, u16_values, |x| {*x as u32}),
|
||||
(DataType::UInt32, PrimitiveArray<u32>, u32_values, |x| {*x}),
|
||||
(DataType::UInt64, PrimitiveArray<u64>, u64_values, |x| {*x}),
|
||||
|
||||
(DataType::Float32, PrimitiveArray<f32>, f32_values, |x| {*x}),
|
||||
(DataType::Float64, PrimitiveArray<f64>, f64_values, |x| {*x}),
|
||||
|
||||
(DataType::Binary, BinaryArray, binary_values, |x| {x.into()}),
|
||||
(DataType::LargeBinary, BinaryArray, binary_values, |x| {x.into()}),
|
||||
|
||||
(DataType::Utf8, StringArray, string_values, |x| {x.into()}),
|
||||
(DataType::LargeUtf8, StringArray, string_values, |x| {x.into()}),
|
||||
|
||||
(DataType::Date32, PrimitiveArray<i32>, date_values, |x| {*x as i32}),
|
||||
(DataType::Date64, PrimitiveArray<i64>, datetime_values,|x| {*x as i64}),
|
||||
|
||||
(DataType::Timestamp(TimeUnit::Millisecond, _), PrimitiveArray<i64>, ts_millis_values, |x| {*x})
|
||||
data_type,
|
||||
arrays,
|
||||
(
|
||||
ConcreteDataType::Boolean(_),
|
||||
BooleanVector,
|
||||
bool_values,
|
||||
|x| { x }
|
||||
),
|
||||
(ConcreteDataType::Int8(_), Int8Vector, i8_values, |x| {
|
||||
i32::from(x)
|
||||
}),
|
||||
(ConcreteDataType::Int16(_), Int16Vector, i16_values, |x| {
|
||||
i32::from(x)
|
||||
}),
|
||||
(ConcreteDataType::Int32(_), Int32Vector, i32_values, |x| {
|
||||
x
|
||||
}),
|
||||
(ConcreteDataType::Int64(_), Int64Vector, i64_values, |x| {
|
||||
x
|
||||
}),
|
||||
(ConcreteDataType::UInt8(_), UInt8Vector, u8_values, |x| {
|
||||
u32::from(x)
|
||||
}),
|
||||
(ConcreteDataType::UInt16(_), UInt16Vector, u16_values, |x| {
|
||||
u32::from(x)
|
||||
}),
|
||||
(ConcreteDataType::UInt32(_), UInt32Vector, u32_values, |x| {
|
||||
x
|
||||
}),
|
||||
(ConcreteDataType::UInt64(_), UInt64Vector, u64_values, |x| {
|
||||
x
|
||||
}),
|
||||
(
|
||||
ConcreteDataType::Float32(_),
|
||||
Float32Vector,
|
||||
f32_values,
|
||||
|x| { x }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Float64(_),
|
||||
Float64Vector,
|
||||
f64_values,
|
||||
|x| { x }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Binary(_),
|
||||
BinaryVector,
|
||||
binary_values,
|
||||
|x| { x.into() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::String(_),
|
||||
StringVector,
|
||||
string_values,
|
||||
|x| { x.into() }
|
||||
),
|
||||
(ConcreteDataType::Date(_), DateVector, date_values, |x| {
|
||||
x.val()
|
||||
}),
|
||||
(
|
||||
ConcreteDataType::DateTime(_),
|
||||
DateTimeVector,
|
||||
datetime_values,
|
||||
|x| { x.val() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(_)),
|
||||
TimestampSecondVector,
|
||||
ts_second_values,
|
||||
|x| { x.into_native() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Timestamp(TimestampType::Millisecond(_)),
|
||||
TimestampMillisecondVector,
|
||||
ts_millisecond_values,
|
||||
|x| { x.into_native() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Timestamp(TimestampType::Microsecond(_)),
|
||||
TimestampMicrosecondVector,
|
||||
ts_microsecond_values,
|
||||
|x| { x.into_native() }
|
||||
),
|
||||
(
|
||||
ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)),
|
||||
TimestampNanosecondVector,
|
||||
ts_nanosecond_values,
|
||||
|x| { x.into_native() }
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -197,14 +264,10 @@ mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datatypes::arrow::array::{Array, BooleanArray, PrimitiveArray};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||
use datatypes::arrow_array::StringArray;
|
||||
use datatypes::schema::Schema;
|
||||
use datatypes::vectors::{UInt32Vector, VectorRef};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
|
||||
use crate::select::{null_mask, try_convert, values};
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_convert_record_batches_to_select_result() {
|
||||
@@ -230,9 +293,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_convert_arrow_arrays_i32() {
|
||||
let array: PrimitiveArray<i32> =
|
||||
PrimitiveArray::from(vec![Some(1), Some(2), None, Some(3)]);
|
||||
let array: Arc<dyn Array> = Arc::new(array);
|
||||
let array = Int32Vector::from(vec![Some(1), Some(2), None, Some(3)]);
|
||||
let array: VectorRef = Arc::new(array);
|
||||
|
||||
let values = values(&[array]).unwrap();
|
||||
|
||||
@@ -241,14 +303,14 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_convert_arrow_arrays_string() {
|
||||
let array = StringArray::from(vec![
|
||||
let array = StringVector::from(vec![
|
||||
Some("1".to_string()),
|
||||
Some("2".to_string()),
|
||||
None,
|
||||
Some("3".to_string()),
|
||||
None,
|
||||
]);
|
||||
let array: Arc<dyn Array> = Arc::new(array);
|
||||
let array: VectorRef = Arc::new(array);
|
||||
|
||||
let values = values(&[array]).unwrap();
|
||||
|
||||
@@ -257,8 +319,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_convert_arrow_arrays_bool() {
|
||||
let array = BooleanArray::from(vec![Some(true), Some(false), None, Some(false), None]);
|
||||
let array: Arc<dyn Array> = Arc::new(array);
|
||||
let array = BooleanVector::from(vec![Some(true), Some(false), None, Some(false), None]);
|
||||
let array: VectorRef = Arc::new(array);
|
||||
|
||||
let values = values(&[array]).unwrap();
|
||||
|
||||
@@ -267,43 +329,42 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_convert_arrow_arrays_empty() {
|
||||
let array = BooleanArray::from(vec![None, None, None, None, None]);
|
||||
let array: Arc<dyn Array> = Arc::new(array);
|
||||
let array = BooleanVector::from(vec![None, None, None, None, None]);
|
||||
let array: VectorRef = Arc::new(array);
|
||||
|
||||
let values = values(&[array]).unwrap();
|
||||
|
||||
assert_eq!(Vec::<bool>::default(), values.bool_values);
|
||||
assert!(values.bool_values.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_mask() {
|
||||
let a1: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![None, Some(2), None]));
|
||||
let a2: Arc<dyn Array> =
|
||||
Arc::new(PrimitiveArray::from(vec![Some(1), Some(2), None, Some(4)]));
|
||||
let mask = null_mask(&vec![a1, a2], 3 + 4);
|
||||
let a1: VectorRef = Arc::new(Int32Vector::from(vec![None, Some(2), None]));
|
||||
let a2: VectorRef = Arc::new(Int32Vector::from(vec![Some(1), Some(2), None, Some(4)]));
|
||||
let mask = null_mask(&[a1, a2], 3 + 4);
|
||||
assert_eq!(vec![0b0010_0101], mask);
|
||||
|
||||
let empty: Arc<dyn Array> = Arc::new(PrimitiveArray::<i32>::from(vec![None, None, None]));
|
||||
let mask = null_mask(&vec![empty.clone(), empty.clone(), empty], 9);
|
||||
let empty: VectorRef = Arc::new(Int32Vector::from(vec![None, None, None]));
|
||||
let mask = null_mask(&[empty.clone(), empty.clone(), empty], 9);
|
||||
assert_eq!(vec![0b1111_1111, 0b0000_0001], mask);
|
||||
|
||||
let a1: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(1), Some(2), Some(3)]));
|
||||
let a2: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(4), Some(5), Some(6)]));
|
||||
let mask = null_mask(&vec![a1, a2], 3 + 3);
|
||||
let a1: VectorRef = Arc::new(Int32Vector::from(vec![Some(1), Some(2), Some(3)]));
|
||||
let a2: VectorRef = Arc::new(Int32Vector::from(vec![Some(4), Some(5), Some(6)]));
|
||||
let mask = null_mask(&[a1, a2], 3 + 3);
|
||||
assert_eq!(Vec::<u8>::default(), mask);
|
||||
|
||||
let a1: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(1), Some(2), Some(3)]));
|
||||
let a2: Arc<dyn Array> = Arc::new(PrimitiveArray::from(vec![Some(4), Some(5), None]));
|
||||
let mask = null_mask(&vec![a1, a2], 3 + 3);
|
||||
let a1: VectorRef = Arc::new(Int32Vector::from(vec![Some(1), Some(2), Some(3)]));
|
||||
let a2: VectorRef = Arc::new(Int32Vector::from(vec![Some(4), Some(5), None]));
|
||||
let mask = null_mask(&[a1, a2], 3 + 3);
|
||||
assert_eq!(vec![0b0010_0000], mask);
|
||||
}
|
||||
|
||||
fn mock_record_batch() -> RecordBatch {
|
||||
let arrow_schema = Arc::new(ArrowSchema::new(vec![
|
||||
Field::new("c1", DataType::UInt32, false),
|
||||
Field::new("c2", DataType::UInt32, false),
|
||||
]));
|
||||
let schema = Arc::new(Schema::try_from(arrow_schema).unwrap());
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("c1", ConcreteDataType::uint32_datatype(), true),
|
||||
ColumnSchema::new("c2", ConcreteDataType::uint32_datatype(), true),
|
||||
];
|
||||
let schema = Arc::new(Schema::try_new(column_schemas).unwrap());
|
||||
|
||||
let v1 = Arc::new(UInt32Vector::from(vec![Some(1), Some(2), None]));
|
||||
let v2 = Arc::new(UInt32Vector::from(vec![Some(1), None, None]));
|
||||
|
||||
@@ -45,11 +45,11 @@ impl LinesWriter {
|
||||
pub fn write_ts(&mut self, column_name: &str, value: (i64, Precision)) -> Result<()> {
|
||||
let (idx, column) = self.mut_column(
|
||||
column_name,
|
||||
ColumnDataType::Timestamp,
|
||||
ColumnDataType::TimestampMillisecond,
|
||||
SemanticType::Timestamp,
|
||||
);
|
||||
ensure!(
|
||||
column.datatype == ColumnDataType::Timestamp as i32,
|
||||
column.datatype == ColumnDataType::TimestampMillisecond as i32,
|
||||
TypeMismatchSnafu {
|
||||
column_name,
|
||||
expected: "timestamp",
|
||||
@@ -58,7 +58,9 @@ impl LinesWriter {
|
||||
);
|
||||
// It is safe to use unwrap here, because values has been initialized in mut_column()
|
||||
let values = column.values.as_mut().unwrap();
|
||||
values.ts_millis_values.push(to_ms_ts(value.1, value.0));
|
||||
values
|
||||
.ts_millisecond_values
|
||||
.push(to_ms_ts(value.1, value.0));
|
||||
self.null_masks[idx].push(false);
|
||||
Ok(())
|
||||
}
|
||||
@@ -224,23 +226,23 @@ impl LinesWriter {
|
||||
|
||||
pub fn to_ms_ts(p: Precision, ts: i64) -> i64 {
|
||||
match p {
|
||||
Precision::NANOSECOND => ts / 1_000_000,
|
||||
Precision::MICROSECOND => ts / 1000,
|
||||
Precision::MILLISECOND => ts,
|
||||
Precision::SECOND => ts * 1000,
|
||||
Precision::MINUTE => ts * 1000 * 60,
|
||||
Precision::HOUR => ts * 1000 * 60 * 60,
|
||||
Precision::Nanosecond => ts / 1_000_000,
|
||||
Precision::Microsecond => ts / 1000,
|
||||
Precision::Millisecond => ts,
|
||||
Precision::Second => ts * 1000,
|
||||
Precision::Minute => ts * 1000 * 60,
|
||||
Precision::Hour => ts * 1000 * 60 * 60,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Precision {
|
||||
NANOSECOND,
|
||||
MICROSECOND,
|
||||
MILLISECOND,
|
||||
SECOND,
|
||||
MINUTE,
|
||||
HOUR,
|
||||
Nanosecond,
|
||||
Microsecond,
|
||||
Millisecond,
|
||||
Second,
|
||||
Minute,
|
||||
Hour,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -261,13 +263,13 @@ mod tests {
|
||||
writer.write_f64("memory", 0.4).unwrap();
|
||||
writer.write_string("name", "name1").unwrap();
|
||||
writer
|
||||
.write_ts("ts", (101011000, Precision::MILLISECOND))
|
||||
.write_ts("ts", (101011000, Precision::Millisecond))
|
||||
.unwrap();
|
||||
writer.commit();
|
||||
|
||||
writer.write_tag("host", "host2").unwrap();
|
||||
writer
|
||||
.write_ts("ts", (102011001, Precision::MILLISECOND))
|
||||
.write_ts("ts", (102011001, Precision::Millisecond))
|
||||
.unwrap();
|
||||
writer.write_bool("enable_reboot", true).unwrap();
|
||||
writer.write_u64("year_of_service", 2).unwrap();
|
||||
@@ -278,7 +280,7 @@ mod tests {
|
||||
writer.write_f64("cpu", 0.4).unwrap();
|
||||
writer.write_u64("cpu_core_num", 16).unwrap();
|
||||
writer
|
||||
.write_ts("ts", (103011002, Precision::MILLISECOND))
|
||||
.write_ts("ts", (103011002, Precision::Millisecond))
|
||||
.unwrap();
|
||||
writer.commit();
|
||||
|
||||
@@ -321,11 +323,11 @@ mod tests {
|
||||
|
||||
let column = &columns[4];
|
||||
assert_eq!("ts", column.column_name);
|
||||
assert_eq!(ColumnDataType::Timestamp as i32, column.datatype);
|
||||
assert_eq!(ColumnDataType::TimestampMillisecond as i32, column.datatype);
|
||||
assert_eq!(SemanticType::Timestamp as i32, column.semantic_type);
|
||||
assert_eq!(
|
||||
vec![101011000, 102011001, 103011002],
|
||||
column.values.as_ref().unwrap().ts_millis_values
|
||||
column.values.as_ref().unwrap().ts_millisecond_values
|
||||
);
|
||||
verify_null_mask(&column.null_mask, vec![false, false, false]);
|
||||
|
||||
@@ -367,16 +369,16 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_to_ms() {
|
||||
assert_eq!(100, to_ms_ts(Precision::NANOSECOND, 100110000));
|
||||
assert_eq!(100110, to_ms_ts(Precision::MICROSECOND, 100110000));
|
||||
assert_eq!(100110000, to_ms_ts(Precision::MILLISECOND, 100110000));
|
||||
assert_eq!(100, to_ms_ts(Precision::Nanosecond, 100110000));
|
||||
assert_eq!(100110, to_ms_ts(Precision::Microsecond, 100110000));
|
||||
assert_eq!(100110000, to_ms_ts(Precision::Millisecond, 100110000));
|
||||
assert_eq!(
|
||||
100110000 * 1000 * 60,
|
||||
to_ms_ts(Precision::MINUTE, 100110000)
|
||||
to_ms_ts(Precision::Minute, 100110000)
|
||||
);
|
||||
assert_eq!(
|
||||
100110000 * 1000 * 60 * 60,
|
||||
to_ms_ts(Precision::HOUR, 100110000)
|
||||
to_ms_ts(Precision::Hour, 100110000)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,11 +9,9 @@ async-trait = "0.1"
|
||||
common-error = { path = "../error" }
|
||||
common-recordbatch = { path = "../recordbatch" }
|
||||
common-time = { path = "../time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datafusion = "14.0.0"
|
||||
datafusion-common = "14.0.0"
|
||||
datafusion-expr = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
statrs = "0.15"
|
||||
|
||||
@@ -23,16 +23,9 @@ use datatypes::error::Error as DataTypeError;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use statrs::StatsError;
|
||||
|
||||
common_error::define_opaque_error!(Error);
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum InnerError {
|
||||
#[snafu(display("Fail to cast array to {:?}, source: {}", typ, source))]
|
||||
TypeCast {
|
||||
source: ArrowError,
|
||||
typ: arrow::datatypes::DataType,
|
||||
},
|
||||
pub enum Error {
|
||||
#[snafu(display("Fail to execute function, source: {}", source))]
|
||||
ExecuteFunction {
|
||||
source: DataFusionError,
|
||||
@@ -83,8 +76,8 @@ pub enum InnerError {
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid inputs: {}", err_msg))]
|
||||
InvalidInputs {
|
||||
#[snafu(display("Invalid input type: {}", err_msg))]
|
||||
InvalidInputType {
|
||||
#[snafu(backtrace)]
|
||||
source: DataTypeError,
|
||||
err_msg: String,
|
||||
@@ -133,37 +126,74 @@ pub enum InnerError {
|
||||
#[snafu(backtrace)]
|
||||
source: BoxedError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to cast array to {:?}, source: {}", typ, source))]
|
||||
TypeCast {
|
||||
source: ArrowError,
|
||||
typ: arrow::datatypes::DataType,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Failed to perform compute operation on arrow arrays, source: {}",
|
||||
source
|
||||
))]
|
||||
ArrowCompute {
|
||||
source: ArrowError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Query engine fail to cast value: {}", source))]
|
||||
ToScalarValue {
|
||||
#[snafu(backtrace)]
|
||||
source: DataTypeError,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get scalar vector, {}", source))]
|
||||
GetScalarVector {
|
||||
#[snafu(backtrace)]
|
||||
source: DataTypeError,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid function args: {}", err_msg))]
|
||||
InvalidFuncArgs {
|
||||
err_msg: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl ErrorExt for InnerError {
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
InnerError::ExecuteFunction { .. }
|
||||
| InnerError::GenerateFunction { .. }
|
||||
| InnerError::CreateAccumulator { .. }
|
||||
| InnerError::DowncastVector { .. }
|
||||
| InnerError::InvalidInputState { .. }
|
||||
| InnerError::InvalidInputCol { .. }
|
||||
| InnerError::BadAccumulatorImpl { .. } => StatusCode::EngineExecuteQuery,
|
||||
Error::ExecuteFunction { .. }
|
||||
| Error::GenerateFunction { .. }
|
||||
| Error::CreateAccumulator { .. }
|
||||
| Error::DowncastVector { .. }
|
||||
| Error::InvalidInputState { .. }
|
||||
| Error::InvalidInputCol { .. }
|
||||
| Error::BadAccumulatorImpl { .. }
|
||||
| Error::ToScalarValue { .. }
|
||||
| Error::GetScalarVector { .. }
|
||||
| Error::ArrowCompute { .. } => StatusCode::EngineExecuteQuery,
|
||||
|
||||
InnerError::InvalidInputs { source, .. }
|
||||
| InnerError::IntoVector { source, .. }
|
||||
| InnerError::FromScalarValue { source }
|
||||
| InnerError::ConvertArrowSchema { source }
|
||||
| InnerError::FromArrowArray { source } => source.status_code(),
|
||||
Error::InvalidInputType { source, .. }
|
||||
| Error::IntoVector { source, .. }
|
||||
| Error::FromScalarValue { source }
|
||||
| Error::ConvertArrowSchema { source }
|
||||
| Error::FromArrowArray { source } => source.status_code(),
|
||||
|
||||
InnerError::ExecuteRepeatedly { .. }
|
||||
| InnerError::GeneralDataFusion { .. }
|
||||
| InnerError::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
|
||||
Error::ExecuteRepeatedly { .. }
|
||||
| Error::GeneralDataFusion { .. }
|
||||
| Error::DataFusionExecutionPlan { .. } => StatusCode::Unexpected,
|
||||
|
||||
InnerError::UnsupportedInputDataType { .. } | InnerError::TypeCast { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
Error::UnsupportedInputDataType { .. }
|
||||
| Error::TypeCast { .. }
|
||||
| Error::InvalidFuncArgs { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
InnerError::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
|
||||
InnerError::ExecutePhysicalPlan { source } => source.status_code(),
|
||||
Error::ConvertDfRecordBatchStream { source, .. } => source.status_code(),
|
||||
Error::ExecutePhysicalPlan { source } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -176,12 +206,6 @@ impl ErrorExt for InnerError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<InnerError> for Error {
|
||||
fn from(e: InnerError) -> Error {
|
||||
Error::new(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Error> for DataFusionError {
|
||||
fn from(e: Error) -> DataFusionError {
|
||||
DataFusionError::External(Box::new(e))
|
||||
@@ -190,7 +214,7 @@ impl From<Error> for DataFusionError {
|
||||
|
||||
impl From<BoxedError> for Error {
|
||||
fn from(source: BoxedError) -> Self {
|
||||
InnerError::ExecutePhysicalPlan { source }.into()
|
||||
Error::ExecutePhysicalPlan { source }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,60 +230,51 @@ mod tests {
|
||||
}
|
||||
|
||||
fn assert_error(err: &Error, code: StatusCode) {
|
||||
let inner_err = err.as_any().downcast_ref::<InnerError>().unwrap();
|
||||
let inner_err = err.as_any().downcast_ref::<Error>().unwrap();
|
||||
assert_eq!(code, inner_err.status_code());
|
||||
assert!(inner_err.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_datafusion_as_source() {
|
||||
let err: Error = throw_df_error()
|
||||
let err = throw_df_error()
|
||||
.context(ExecuteFunctionSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
.unwrap();
|
||||
assert_error(&err, StatusCode::EngineExecuteQuery);
|
||||
|
||||
let err: Error = throw_df_error()
|
||||
.context(GeneralDataFusionSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
.unwrap();
|
||||
assert_error(&err, StatusCode::Unexpected);
|
||||
|
||||
let err: Error = throw_df_error()
|
||||
let err = throw_df_error()
|
||||
.context(DataFusionExecutionPlanSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
.unwrap();
|
||||
assert_error(&err, StatusCode::Unexpected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_execute_repeatedly_error() {
|
||||
let error: Error = None::<i32>
|
||||
.context(ExecuteRepeatedlySnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
assert_eq!(error.inner.status_code(), StatusCode::Unexpected);
|
||||
let error = None::<i32>.context(ExecuteRepeatedlySnafu).err().unwrap();
|
||||
assert_eq!(error.status_code(), StatusCode::Unexpected);
|
||||
assert!(error.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_df_recordbatch_stream_error() {
|
||||
let result: std::result::Result<i32, common_recordbatch::error::Error> =
|
||||
Err(common_recordbatch::error::InnerError::PollStream {
|
||||
source: ArrowError::Overflow,
|
||||
Err(common_recordbatch::error::Error::PollStream {
|
||||
source: ArrowError::DivideByZero,
|
||||
backtrace: Backtrace::generate(),
|
||||
}
|
||||
.into());
|
||||
let error: Error = result
|
||||
});
|
||||
let error = result
|
||||
.context(ConvertDfRecordBatchStreamSnafu)
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
assert_eq!(error.inner.status_code(), StatusCode::Internal);
|
||||
.unwrap();
|
||||
assert_eq!(error.status_code(), StatusCode::Internal);
|
||||
assert!(error.backtrace_opt().is_some());
|
||||
}
|
||||
|
||||
@@ -272,13 +287,12 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_into_vector_error() {
|
||||
let err: Error = raise_datatype_error()
|
||||
let err = raise_datatype_error()
|
||||
.context(IntoVectorSnafu {
|
||||
data_type: ArrowDatatype::Int32,
|
||||
})
|
||||
.err()
|
||||
.unwrap()
|
||||
.into();
|
||||
.unwrap();
|
||||
assert!(err.backtrace_opt().is_some());
|
||||
let datatype_err = raise_datatype_error().err().unwrap();
|
||||
assert_eq!(datatype_err.status_code(), err.status_code());
|
||||
|
||||
@@ -22,7 +22,7 @@ use std::sync::Arc;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
|
||||
pub use self::accumulator::{Accumulator, AggregateFunctionCreator, AggregateFunctionCreatorRef};
|
||||
pub use self::expr::Expr;
|
||||
pub use self::expr::{DfExpr, Expr};
|
||||
pub use self::udaf::AggregateFunction;
|
||||
pub use self::udf::ScalarUdf;
|
||||
use crate::function::{ReturnTypeFunction, ScalarFunctionImplementation};
|
||||
@@ -148,9 +148,7 @@ mod tests {
|
||||
|
||||
let args = vec![
|
||||
DfColumnarValue::Scalar(ScalarValue::Boolean(Some(true))),
|
||||
DfColumnarValue::Array(Arc::new(BooleanArray::from_slice(vec![
|
||||
true, false, false, true,
|
||||
]))),
|
||||
DfColumnarValue::Array(Arc::new(BooleanArray::from(vec![true, false, false, true]))),
|
||||
];
|
||||
|
||||
// call the function
|
||||
@@ -17,12 +17,10 @@
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datafusion_common::Result as DfResult;
|
||||
use datafusion_expr::Accumulator as DfAccumulator;
|
||||
use datafusion_expr::{Accumulator as DfAccumulator, AggregateState};
|
||||
use datatypes::arrow::array::ArrayRef;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::value::ListValue;
|
||||
use datatypes::vectors::{Helper as VectorHelper, VectorRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -128,356 +126,53 @@ impl DfAccumulatorAdaptor {
|
||||
}
|
||||
|
||||
impl DfAccumulator for DfAccumulatorAdaptor {
|
||||
fn state(&self) -> DfResult<Vec<ScalarValue>> {
|
||||
fn state(&self) -> DfResult<Vec<AggregateState>> {
|
||||
let state_values = self.accumulator.state()?;
|
||||
let state_types = self.creator.state_types()?;
|
||||
if state_values.len() != state_types.len() {
|
||||
return error::BadAccumulatorImplSnafu {
|
||||
err_msg: format!("Accumulator {:?} returned state values size do not match its state types size.", self),
|
||||
}
|
||||
.fail()
|
||||
.map_err(Error::from)?;
|
||||
.fail()?;
|
||||
}
|
||||
Ok(state_values
|
||||
.into_iter()
|
||||
.zip(state_types.iter())
|
||||
.map(|(v, t)| try_into_scalar_value(v, t))
|
||||
.collect::<Result<Vec<_>>>()
|
||||
.map_err(Error::from)?)
|
||||
.map(|(v, t)| {
|
||||
let scalar = v
|
||||
.try_to_scalar_value(t)
|
||||
.context(error::ToScalarValueSnafu)?;
|
||||
Ok(AggregateState::Scalar(scalar))
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?)
|
||||
}
|
||||
|
||||
fn update_batch(&mut self, values: &[ArrayRef]) -> DfResult<()> {
|
||||
let vectors = VectorHelper::try_into_vectors(values)
|
||||
.context(FromScalarValueSnafu)
|
||||
.map_err(Error::from)?;
|
||||
self.accumulator
|
||||
.update_batch(&vectors)
|
||||
.map_err(|e| e.into())
|
||||
let vectors = VectorHelper::try_into_vectors(values).context(FromScalarValueSnafu)?;
|
||||
self.accumulator.update_batch(&vectors)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn merge_batch(&mut self, states: &[ArrayRef]) -> DfResult<()> {
|
||||
let mut vectors = Vec::with_capacity(states.len());
|
||||
for array in states.iter() {
|
||||
vectors.push(
|
||||
VectorHelper::try_into_vector(array)
|
||||
.context(IntoVectorSnafu {
|
||||
data_type: array.data_type().clone(),
|
||||
})
|
||||
.map_err(Error::from)?,
|
||||
VectorHelper::try_into_vector(array).context(IntoVectorSnafu {
|
||||
data_type: array.data_type().clone(),
|
||||
})?,
|
||||
);
|
||||
}
|
||||
self.accumulator.merge_batch(&vectors).map_err(|e| e.into())
|
||||
self.accumulator.merge_batch(&vectors)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn evaluate(&self) -> DfResult<ScalarValue> {
|
||||
let value = self.accumulator.evaluate()?;
|
||||
let output_type = self.creator.output_type()?;
|
||||
Ok(try_into_scalar_value(value, &output_type)?)
|
||||
}
|
||||
}
|
||||
|
||||
fn try_into_scalar_value(value: Value, datatype: &ConcreteDataType) -> Result<ScalarValue> {
|
||||
if !matches!(value, Value::Null) && datatype != &value.data_type() {
|
||||
return error::BadAccumulatorImplSnafu {
|
||||
err_msg: format!(
|
||||
"expect value to return datatype {:?}, actual: {:?}",
|
||||
datatype,
|
||||
value.data_type()
|
||||
),
|
||||
}
|
||||
.fail()?;
|
||||
}
|
||||
|
||||
Ok(match value {
|
||||
Value::Boolean(v) => ScalarValue::Boolean(Some(v)),
|
||||
Value::UInt8(v) => ScalarValue::UInt8(Some(v)),
|
||||
Value::UInt16(v) => ScalarValue::UInt16(Some(v)),
|
||||
Value::UInt32(v) => ScalarValue::UInt32(Some(v)),
|
||||
Value::UInt64(v) => ScalarValue::UInt64(Some(v)),
|
||||
Value::Int8(v) => ScalarValue::Int8(Some(v)),
|
||||
Value::Int16(v) => ScalarValue::Int16(Some(v)),
|
||||
Value::Int32(v) => ScalarValue::Int32(Some(v)),
|
||||
Value::Int64(v) => ScalarValue::Int64(Some(v)),
|
||||
Value::Float32(v) => ScalarValue::Float32(Some(v.0)),
|
||||
Value::Float64(v) => ScalarValue::Float64(Some(v.0)),
|
||||
Value::String(v) => ScalarValue::Utf8(Some(v.as_utf8().to_string())),
|
||||
Value::Binary(v) => ScalarValue::LargeBinary(Some(v.to_vec())),
|
||||
Value::Date(v) => ScalarValue::Date32(Some(v.val())),
|
||||
Value::DateTime(v) => ScalarValue::Date64(Some(v.val())),
|
||||
Value::Null => try_convert_null_value(datatype)?,
|
||||
Value::List(list) => try_convert_list_value(list)?,
|
||||
Value::Timestamp(t) => timestamp_to_scalar_value(t.unit(), Some(t.value())),
|
||||
})
|
||||
}
|
||||
|
||||
fn timestamp_to_scalar_value(unit: TimeUnit, val: Option<i64>) -> ScalarValue {
|
||||
match unit {
|
||||
TimeUnit::Second => ScalarValue::TimestampSecond(val, None),
|
||||
TimeUnit::Millisecond => ScalarValue::TimestampMillisecond(val, None),
|
||||
TimeUnit::Microsecond => ScalarValue::TimestampMicrosecond(val, None),
|
||||
TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(val, None),
|
||||
}
|
||||
}
|
||||
|
||||
fn try_convert_null_value(datatype: &ConcreteDataType) -> Result<ScalarValue> {
|
||||
Ok(match datatype {
|
||||
ConcreteDataType::Boolean(_) => ScalarValue::Boolean(None),
|
||||
ConcreteDataType::Int8(_) => ScalarValue::Int8(None),
|
||||
ConcreteDataType::Int16(_) => ScalarValue::Int16(None),
|
||||
ConcreteDataType::Int32(_) => ScalarValue::Int32(None),
|
||||
ConcreteDataType::Int64(_) => ScalarValue::Int64(None),
|
||||
ConcreteDataType::UInt8(_) => ScalarValue::UInt8(None),
|
||||
ConcreteDataType::UInt16(_) => ScalarValue::UInt16(None),
|
||||
ConcreteDataType::UInt32(_) => ScalarValue::UInt32(None),
|
||||
ConcreteDataType::UInt64(_) => ScalarValue::UInt64(None),
|
||||
ConcreteDataType::Float32(_) => ScalarValue::Float32(None),
|
||||
ConcreteDataType::Float64(_) => ScalarValue::Float64(None),
|
||||
ConcreteDataType::Binary(_) => ScalarValue::LargeBinary(None),
|
||||
ConcreteDataType::String(_) => ScalarValue::Utf8(None),
|
||||
ConcreteDataType::Timestamp(t) => timestamp_to_scalar_value(t.unit, None),
|
||||
_ => {
|
||||
return error::BadAccumulatorImplSnafu {
|
||||
err_msg: format!(
|
||||
"undefined transition from null value to datatype {:?}",
|
||||
datatype
|
||||
),
|
||||
}
|
||||
.fail()?
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn try_convert_list_value(list: ListValue) -> Result<ScalarValue> {
|
||||
let vs = if let Some(items) = list.items() {
|
||||
Some(Box::new(
|
||||
items
|
||||
.iter()
|
||||
.map(|v| try_into_scalar_value(v.clone(), list.datatype()))
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
Ok(ScalarValue::List(
|
||||
vs,
|
||||
Box::new(list.datatype().as_arrow_type()),
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_base::bytes::{Bytes, StringBytes};
|
||||
use datafusion_common::ScalarValue;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use datatypes::value::{ListValue, OrderedFloat};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_not_null_value_to_scalar_value() {
|
||||
assert_eq!(
|
||||
ScalarValue::Boolean(Some(true)),
|
||||
try_into_scalar_value(Value::Boolean(true), &ConcreteDataType::boolean_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Boolean(Some(false)),
|
||||
try_into_scalar_value(Value::Boolean(false), &ConcreteDataType::boolean_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt8(Some(u8::MIN + 1)),
|
||||
try_into_scalar_value(
|
||||
Value::UInt8(u8::MIN + 1),
|
||||
&ConcreteDataType::uint8_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt16(Some(u16::MIN + 2)),
|
||||
try_into_scalar_value(
|
||||
Value::UInt16(u16::MIN + 2),
|
||||
&ConcreteDataType::uint16_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt32(Some(u32::MIN + 3)),
|
||||
try_into_scalar_value(
|
||||
Value::UInt32(u32::MIN + 3),
|
||||
&ConcreteDataType::uint32_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt64(Some(u64::MIN + 4)),
|
||||
try_into_scalar_value(
|
||||
Value::UInt64(u64::MIN + 4),
|
||||
&ConcreteDataType::uint64_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int8(Some(i8::MIN + 4)),
|
||||
try_into_scalar_value(Value::Int8(i8::MIN + 4), &ConcreteDataType::int8_datatype())
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int16(Some(i16::MIN + 5)),
|
||||
try_into_scalar_value(
|
||||
Value::Int16(i16::MIN + 5),
|
||||
&ConcreteDataType::int16_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int32(Some(i32::MIN + 6)),
|
||||
try_into_scalar_value(
|
||||
Value::Int32(i32::MIN + 6),
|
||||
&ConcreteDataType::int32_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int64(Some(i64::MIN + 7)),
|
||||
try_into_scalar_value(
|
||||
Value::Int64(i64::MIN + 7),
|
||||
&ConcreteDataType::int64_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Float32(Some(8.0f32)),
|
||||
try_into_scalar_value(
|
||||
Value::Float32(OrderedFloat(8.0f32)),
|
||||
&ConcreteDataType::float32_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Float64(Some(9.0f64)),
|
||||
try_into_scalar_value(
|
||||
Value::Float64(OrderedFloat(9.0f64)),
|
||||
&ConcreteDataType::float64_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Utf8(Some("hello".to_string())),
|
||||
try_into_scalar_value(
|
||||
Value::String(StringBytes::from("hello")),
|
||||
&ConcreteDataType::string_datatype(),
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::LargeBinary(Some("world".as_bytes().to_vec())),
|
||||
try_into_scalar_value(
|
||||
Value::Binary(Bytes::from("world".as_bytes())),
|
||||
&ConcreteDataType::binary_datatype()
|
||||
)
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_value_to_scalar_value() {
|
||||
assert_eq!(
|
||||
ScalarValue::Boolean(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::boolean_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt8(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint8_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt16(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint16_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt32(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint32_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::UInt64(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::uint64_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int8(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int8_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int16(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int16_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int32(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int32_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Int64(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::int64_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Float32(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::float32_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Float64(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::float64_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::Utf8(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::string_datatype()).unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::LargeBinary(None),
|
||||
try_into_scalar_value(Value::Null, &ConcreteDataType::binary_datatype()).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_value_to_scalar_value() {
|
||||
let items = Some(Box::new(vec![Value::Int32(-1), Value::Null]));
|
||||
let list = Value::List(ListValue::new(items, ConcreteDataType::int32_datatype()));
|
||||
let df_list = try_into_scalar_value(
|
||||
list,
|
||||
&ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(matches!(df_list, ScalarValue::List(_, _)));
|
||||
match df_list {
|
||||
ScalarValue::List(vs, datatype) => {
|
||||
assert_eq!(*datatype, DataType::Int32);
|
||||
|
||||
assert!(vs.is_some());
|
||||
let vs = *vs.unwrap();
|
||||
assert_eq!(
|
||||
vs,
|
||||
vec![ScalarValue::Int32(Some(-1)), ScalarValue::Int32(None)]
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_timestamp_to_scalar_value() {
|
||||
assert_eq!(
|
||||
ScalarValue::TimestampSecond(Some(1), None),
|
||||
timestamp_to_scalar_value(TimeUnit::Second, Some(1))
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::TimestampMillisecond(Some(1), None),
|
||||
timestamp_to_scalar_value(TimeUnit::Millisecond, Some(1))
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::TimestampMicrosecond(Some(1), None),
|
||||
timestamp_to_scalar_value(TimeUnit::Microsecond, Some(1))
|
||||
);
|
||||
assert_eq!(
|
||||
ScalarValue::TimestampNanosecond(Some(1), None),
|
||||
timestamp_to_scalar_value(TimeUnit::Nanosecond, Some(1))
|
||||
);
|
||||
let scalar_value = value
|
||||
.try_to_scalar_value(&output_type)
|
||||
.context(error::ToScalarValueSnafu)
|
||||
.map_err(Error::from)?;
|
||||
Ok(scalar_value)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,11 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use datafusion::logical_plan::Expr as DfExpr;
|
||||
pub use datafusion_expr::expr::Expr as DfExpr;
|
||||
|
||||
/// Central struct of query API.
|
||||
/// Represent logical expressions such as `A + 1`, or `CAST(c1 AS int)`.
|
||||
#[derive(Clone, PartialEq, Hash, Debug)]
|
||||
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
|
||||
pub struct Expr {
|
||||
df_expr: DfExpr,
|
||||
}
|
||||
|
||||
@@ -104,7 +104,7 @@ fn to_df_accumulator_func(
|
||||
accumulator: AccumulatorFunctionImpl,
|
||||
creator: AggregateFunctionCreatorRef,
|
||||
) -> DfAccumulatorFunctionImplementation {
|
||||
Arc::new(move || {
|
||||
Arc::new(move |_| {
|
||||
let accumulator = accumulator()?;
|
||||
let creator = creator.clone();
|
||||
Ok(Box::new(DfAccumulatorAdaptor::new(accumulator, creator)))
|
||||
|
||||
@@ -16,12 +16,11 @@ use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_recordbatch::adapter::{AsyncRecordBatchStreamAdapter, DfRecordBatchStreamAdapter};
|
||||
use common_recordbatch::adapter::{DfRecordBatchStreamAdapter, RecordBatchStreamAdapter};
|
||||
use common_recordbatch::{DfSendableRecordBatchStream, SendableRecordBatchStream};
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::error::Result as DfResult;
|
||||
pub use datafusion::execution::runtime_env::RuntimeEnv;
|
||||
pub use datafusion::execution::context::{SessionContext, TaskContext};
|
||||
use datafusion::physical_plan::expressions::PhysicalSortExpr;
|
||||
pub use datafusion::physical_plan::Partitioning;
|
||||
use datafusion::physical_plan::Statistics;
|
||||
@@ -63,7 +62,7 @@ pub trait PhysicalPlan: Debug + Send + Sync {
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
runtime: Arc<RuntimeEnv>,
|
||||
context: Arc<TaskContext>,
|
||||
) -> Result<SendableRecordBatchStream>;
|
||||
}
|
||||
|
||||
@@ -111,6 +110,7 @@ impl PhysicalPlan for PhysicalPlanAdapter {
|
||||
.collect();
|
||||
let plan = self
|
||||
.df_plan
|
||||
.clone()
|
||||
.with_new_children(children)
|
||||
.context(error::GeneralDataFusionSnafu)?;
|
||||
Ok(Arc::new(PhysicalPlanAdapter::new(self.schema(), plan)))
|
||||
@@ -119,20 +119,22 @@ impl PhysicalPlan for PhysicalPlanAdapter {
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
runtime: Arc<RuntimeEnv>,
|
||||
context: Arc<TaskContext>,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let df_plan = self.df_plan.clone();
|
||||
let stream = Box::pin(async move { df_plan.execute(partition, runtime).await });
|
||||
let stream = AsyncRecordBatchStreamAdapter::new(self.schema(), stream);
|
||||
let stream = df_plan
|
||||
.execute(partition, context)
|
||||
.context(error::GeneralDataFusionSnafu)?;
|
||||
let adapter = RecordBatchStreamAdapter::try_new(stream)
|
||||
.context(error::ConvertDfRecordBatchStreamSnafu)?;
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
Ok(Box::pin(adapter))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DfPhysicalPlanAdapter(pub PhysicalPlanRef);
|
||||
|
||||
#[async_trait]
|
||||
impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
@@ -159,15 +161,14 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
&self,
|
||||
self: Arc<Self>,
|
||||
children: Vec<Arc<dyn DfPhysicalPlan>>,
|
||||
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
|
||||
let df_schema = self.schema();
|
||||
let schema: SchemaRef = Arc::new(
|
||||
df_schema
|
||||
.try_into()
|
||||
.context(error::ConvertArrowSchemaSnafu)
|
||||
.map_err(error::Error::from)?,
|
||||
.context(error::ConvertArrowSchemaSnafu)?,
|
||||
);
|
||||
let children = children
|
||||
.into_iter()
|
||||
@@ -177,12 +178,12 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
||||
Ok(Arc::new(DfPhysicalPlanAdapter(plan)))
|
||||
}
|
||||
|
||||
async fn execute(
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
runtime: Arc<RuntimeEnv>,
|
||||
context: Arc<TaskContext>,
|
||||
) -> DfResult<DfSendableRecordBatchStream> {
|
||||
let stream = self.0.execute(partition, runtime)?;
|
||||
let stream = self.0.execute(partition, context)?;
|
||||
Ok(Box::pin(DfRecordBatchStreamAdapter::new(stream)))
|
||||
}
|
||||
|
||||
@@ -194,16 +195,16 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use async_trait::async_trait;
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::arrow_print;
|
||||
use datafusion::datasource::TableProvider as DfTableProvider;
|
||||
use datafusion::logical_plan::LogicalPlanBuilder;
|
||||
use datafusion::datasource::{DefaultTableSource, TableProvider as DfTableProvider, TableType};
|
||||
use datafusion::execution::context::{SessionContext, SessionState};
|
||||
use datafusion::physical_plan::collect;
|
||||
use datafusion::physical_plan::empty::EmptyExec;
|
||||
use datafusion::prelude::ExecutionContext;
|
||||
use datafusion_common::field_util::SchemaExt;
|
||||
use datafusion_expr::Expr;
|
||||
use datafusion_expr::logical_plan::builder::LogicalPlanBuilder;
|
||||
use datafusion_expr::{Expr, TableSource};
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||
use datatypes::arrow::util::pretty;
|
||||
use datatypes::schema::Schema;
|
||||
use datatypes::vectors::Int32Vector;
|
||||
|
||||
@@ -225,8 +226,13 @@ mod test {
|
||||
)]))
|
||||
}
|
||||
|
||||
fn table_type(&self) -> TableType {
|
||||
TableType::Base
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
&self,
|
||||
_ctx: &SessionState,
|
||||
_projection: &Option<Vec<usize>>,
|
||||
_filters: &[Expr],
|
||||
_limit: Option<usize>,
|
||||
@@ -240,6 +246,14 @@ mod test {
|
||||
}
|
||||
}
|
||||
|
||||
impl MyDfTableProvider {
|
||||
fn table_source() -> Arc<dyn TableSource> {
|
||||
Arc::new(DefaultTableSource {
|
||||
table_provider: Arc::new(Self),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MyExecutionPlan {
|
||||
schema: SchemaRef,
|
||||
@@ -269,7 +283,7 @@ mod test {
|
||||
fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
_runtime: Arc<RuntimeEnv>,
|
||||
_context: Arc<TaskContext>,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let schema = self.schema();
|
||||
let recordbatches = RecordBatches::try_new(
|
||||
@@ -295,20 +309,26 @@ mod test {
|
||||
// Test our physical plan can be executed by DataFusion, through adapters.
|
||||
#[tokio::test]
|
||||
async fn test_execute_physical_plan() {
|
||||
let ctx = ExecutionContext::new();
|
||||
let logical_plan = LogicalPlanBuilder::scan("test", Arc::new(MyDfTableProvider), None)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
let ctx = SessionContext::new();
|
||||
let logical_plan =
|
||||
LogicalPlanBuilder::scan("test", MyDfTableProvider::table_source(), None)
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
let physical_plan = ctx.create_physical_plan(&logical_plan).await.unwrap();
|
||||
let df_recordbatches = collect(physical_plan, Arc::new(RuntimeEnv::default()))
|
||||
let df_recordbatches = collect(physical_plan, Arc::new(TaskContext::from(&ctx)))
|
||||
.await
|
||||
.unwrap();
|
||||
let pretty_print = arrow_print::write(&df_recordbatches);
|
||||
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
|
||||
let pretty_print = pretty::pretty_format_batches(&df_recordbatches).unwrap();
|
||||
assert_eq!(
|
||||
pretty_print,
|
||||
vec!["+---+", "| a |", "+---+", "| 1 |", "| 2 |", "| 3 |", "+---+",]
|
||||
pretty_print.to_string(),
|
||||
r#"+---+
|
||||
| a |
|
||||
+---+
|
||||
| 1 |
|
||||
| 2 |
|
||||
| 3 |
|
||||
+---+"#
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
//! Signature module contains foundational types that are used to represent signatures, types,
|
||||
//! and return types of functions.
|
||||
//! Copied and modified from datafusion.
|
||||
pub use datafusion::physical_plan::functions::Volatility;
|
||||
pub use datafusion_expr::Volatility;
|
||||
use datafusion_expr::{Signature as DfSignature, TypeSignature as DfTypeSignature};
|
||||
use datatypes::arrow::datatypes::DataType as ArrowDataType;
|
||||
use datatypes::data_type::DataType;
|
||||
|
||||
@@ -6,10 +6,8 @@ license = "Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
common-error = { path = "../error" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datafusion = "14.0.0"
|
||||
datafusion-common = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
futures = "0.3"
|
||||
paste = "1.0"
|
||||
|
||||
@@ -19,7 +19,6 @@ use std::task::{Context, Poll};
|
||||
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::arrow::error::{ArrowError, Result as ArrowResult};
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
@@ -28,7 +27,8 @@ use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::{
|
||||
DfSendableRecordBatchStream, RecordBatch, RecordBatchStream, SendableRecordBatchStream, Stream,
|
||||
DfRecordBatch, DfSendableRecordBatchStream, RecordBatch, RecordBatchStream,
|
||||
SendableRecordBatchStream, Stream,
|
||||
};
|
||||
|
||||
type FutureStream = Pin<
|
||||
@@ -63,8 +63,8 @@ impl Stream for DfRecordBatchStreamAdapter {
|
||||
match Pin::new(&mut self.stream).poll_next(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Some(recordbatch)) => match recordbatch {
|
||||
Ok(recordbatch) => Poll::Ready(Some(Ok(recordbatch.df_recordbatch))),
|
||||
Err(e) => Poll::Ready(Some(Err(ArrowError::External("".to_owned(), Box::new(e))))),
|
||||
Ok(recordbatch) => Poll::Ready(Some(Ok(recordbatch.into_df_record_batch()))),
|
||||
Err(e) => Poll::Ready(Some(Err(ArrowError::ExternalError(Box::new(e))))),
|
||||
},
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
}
|
||||
@@ -102,10 +102,13 @@ impl Stream for RecordBatchStreamAdapter {
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
match Pin::new(&mut self.stream).poll_next(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Some(df_recordbatch)) => Poll::Ready(Some(Ok(RecordBatch {
|
||||
schema: self.schema(),
|
||||
df_recordbatch: df_recordbatch.context(error::PollStreamSnafu)?,
|
||||
}))),
|
||||
Poll::Ready(Some(df_record_batch)) => {
|
||||
let df_record_batch = df_record_batch.context(error::PollStreamSnafu)?;
|
||||
Poll::Ready(Some(RecordBatch::try_from_df_record_batch(
|
||||
self.schema(),
|
||||
df_record_batch,
|
||||
)))
|
||||
}
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
}
|
||||
}
|
||||
@@ -157,10 +160,8 @@ impl Stream for AsyncRecordBatchStreamAdapter {
|
||||
AsyncRecordBatchStreamAdapterState::Inited(stream) => match stream {
|
||||
Ok(stream) => {
|
||||
return Poll::Ready(ready!(Pin::new(stream).poll_next(cx)).map(|df| {
|
||||
Ok(RecordBatch {
|
||||
schema: self.schema(),
|
||||
df_recordbatch: df.context(error::PollStreamSnafu)?,
|
||||
})
|
||||
let df_record_batch = df.context(error::PollStreamSnafu)?;
|
||||
RecordBatch::try_from_df_record_batch(self.schema(), df_record_batch)
|
||||
}));
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -168,8 +169,7 @@ impl Stream for AsyncRecordBatchStreamAdapter {
|
||||
error::CreateRecordBatchesSnafu {
|
||||
reason: format!("Read error {:?} from stream", e),
|
||||
}
|
||||
.fail()
|
||||
.map_err(|e| e.into()),
|
||||
.fail(),
|
||||
))
|
||||
}
|
||||
},
|
||||
|
||||
@@ -17,13 +17,12 @@ use std::any::Any;
|
||||
|
||||
use common_error::ext::BoxedError;
|
||||
use common_error::prelude::*;
|
||||
common_error::define_opaque_error!(Error);
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum InnerError {
|
||||
pub enum Error {
|
||||
#[snafu(display("Fail to create datafusion record batch, source: {}", source))]
|
||||
NewDfRecordBatch {
|
||||
source: datatypes::arrow::error::ArrowError,
|
||||
@@ -59,20 +58,27 @@ pub enum InnerError {
|
||||
source: datatypes::arrow::error::ArrowError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
|
||||
#[snafu(display("Fail to format record batch, source: {}", source))]
|
||||
Format {
|
||||
source: datatypes::arrow::error::ArrowError,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for InnerError {
|
||||
impl ErrorExt for Error {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match self {
|
||||
InnerError::NewDfRecordBatch { .. } => StatusCode::InvalidArguments,
|
||||
Error::NewDfRecordBatch { .. } => StatusCode::InvalidArguments,
|
||||
|
||||
InnerError::DataTypes { .. }
|
||||
| InnerError::CreateRecordBatches { .. }
|
||||
| InnerError::PollStream { .. } => StatusCode::Internal,
|
||||
Error::DataTypes { .. }
|
||||
| Error::CreateRecordBatches { .. }
|
||||
| Error::PollStream { .. }
|
||||
| Error::Format { .. } => StatusCode::Internal,
|
||||
|
||||
InnerError::External { source } => source.status_code(),
|
||||
Error::External { source } => source.status_code(),
|
||||
|
||||
InnerError::SchemaConversion { source, .. } => source.status_code(),
|
||||
Error::SchemaConversion { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -84,9 +90,3 @@ impl ErrorExt for InnerError {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl From<InnerError> for Error {
|
||||
fn from(e: InnerError) -> Error {
|
||||
Error::new(e)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,16 +20,17 @@ pub mod util;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::arrow_print;
|
||||
use datafusion::physical_plan::memory::MemoryStream;
|
||||
pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
pub use datatypes::arrow::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::arrow::util::pretty;
|
||||
use datatypes::prelude::VectorRef;
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use error::Result;
|
||||
use futures::task::{Context, Poll};
|
||||
use futures::{Stream, TryStreamExt};
|
||||
pub use recordbatch::RecordBatch;
|
||||
use snafu::ensure;
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
pub trait RecordBatchStream: Stream<Item = Result<RecordBatch>> {
|
||||
fn schema(&self) -> SchemaRef;
|
||||
@@ -65,7 +66,7 @@ impl Stream for EmptyRecordBatchStream {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct RecordBatches {
|
||||
schema: SchemaRef,
|
||||
batches: Vec<RecordBatch>,
|
||||
@@ -98,17 +99,18 @@ impl RecordBatches {
|
||||
self.batches.iter()
|
||||
}
|
||||
|
||||
pub fn pretty_print(&self) -> String {
|
||||
arrow_print::write(
|
||||
&self
|
||||
.iter()
|
||||
.map(|x| x.df_recordbatch.clone())
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
pub fn pretty_print(&self) -> Result<String> {
|
||||
let df_batches = &self
|
||||
.iter()
|
||||
.map(|x| x.df_record_batch().clone())
|
||||
.collect::<Vec<_>>();
|
||||
let result = pretty::pretty_format_batches(df_batches).context(error::FormatSnafu)?;
|
||||
|
||||
Ok(result.to_string())
|
||||
}
|
||||
|
||||
pub fn try_new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Result<Self> {
|
||||
for batch in batches.iter() {
|
||||
for batch in &batches {
|
||||
ensure!(
|
||||
batch.schema == schema,
|
||||
error::CreateRecordBatchesSnafu {
|
||||
@@ -144,7 +146,7 @@ impl RecordBatches {
|
||||
let df_record_batches = self
|
||||
.batches
|
||||
.into_iter()
|
||||
.map(|batch| batch.df_recordbatch)
|
||||
.map(|batch| batch.into_df_record_batch())
|
||||
.collect();
|
||||
// unwrap safety: `MemoryStream::try_new` won't fail
|
||||
Box::pin(
|
||||
@@ -242,7 +244,7 @@ mod tests {
|
||||
| 1 | hello |
|
||||
| 2 | world |
|
||||
+---+-------+";
|
||||
assert_eq!(batches.pretty_print(), expected);
|
||||
assert_eq!(batches.pretty_print().unwrap(), expected);
|
||||
|
||||
assert_eq!(schema1, batches.schema());
|
||||
assert_eq!(vec![batch1], batches.take());
|
||||
|
||||
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::arrow_array::arrow_array_get;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{Helper, VectorRef};
|
||||
@@ -22,32 +20,88 @@ use serde::{Serialize, Serializer};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::DfRecordBatch;
|
||||
|
||||
// TODO(yingwen): We should hold vectors in the RecordBatch.
|
||||
/// A two-dimensional batch of column-oriented data with a defined schema.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct RecordBatch {
|
||||
pub schema: SchemaRef,
|
||||
pub df_recordbatch: DfRecordBatch,
|
||||
columns: Vec<VectorRef>,
|
||||
df_record_batch: DfRecordBatch,
|
||||
}
|
||||
|
||||
impl RecordBatch {
|
||||
/// Create a new [`RecordBatch`] from `schema` and `columns`.
|
||||
pub fn new<I: IntoIterator<Item = VectorRef>>(
|
||||
schema: SchemaRef,
|
||||
columns: I,
|
||||
) -> Result<RecordBatch> {
|
||||
let arrow_arrays = columns.into_iter().map(|v| v.to_arrow_array()).collect();
|
||||
let columns: Vec<_> = columns.into_iter().collect();
|
||||
let arrow_arrays = columns.iter().map(|v| v.to_arrow_array()).collect();
|
||||
|
||||
let df_recordbatch = DfRecordBatch::try_new(schema.arrow_schema().clone(), arrow_arrays)
|
||||
let df_record_batch = DfRecordBatch::try_new(schema.arrow_schema().clone(), arrow_arrays)
|
||||
.context(error::NewDfRecordBatchSnafu)?;
|
||||
|
||||
Ok(RecordBatch {
|
||||
schema,
|
||||
df_recordbatch,
|
||||
columns,
|
||||
df_record_batch,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a new [`RecordBatch`] from `schema` and `df_record_batch`.
|
||||
///
|
||||
/// This method doesn't check the schema.
|
||||
pub fn try_from_df_record_batch(
|
||||
schema: SchemaRef,
|
||||
df_record_batch: DfRecordBatch,
|
||||
) -> Result<RecordBatch> {
|
||||
let columns = df_record_batch
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|c| Helper::try_into_vector(c.clone()).context(error::DataTypesSnafu))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(RecordBatch {
|
||||
schema,
|
||||
columns,
|
||||
df_record_batch,
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn df_record_batch(&self) -> &DfRecordBatch {
|
||||
&self.df_record_batch
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn into_df_record_batch(self) -> DfRecordBatch {
|
||||
self.df_record_batch
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn columns(&self) -> &[VectorRef] {
|
||||
&self.columns
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn column(&self, idx: usize) -> &VectorRef {
|
||||
&self.columns[idx]
|
||||
}
|
||||
|
||||
pub fn column_by_name(&self, name: &str) -> Option<&VectorRef> {
|
||||
let idx = self.schema.column_index_by_name(name)?;
|
||||
Some(&self.columns[idx])
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn num_columns(&self) -> usize {
|
||||
self.columns.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn num_rows(&self) -> usize {
|
||||
self.df_recordbatch.num_rows()
|
||||
self.df_record_batch.num_rows()
|
||||
}
|
||||
|
||||
/// Create an iterator to traverse the data by row
|
||||
@@ -61,14 +115,15 @@ impl Serialize for RecordBatch {
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
// TODO(yingwen): arrow and arrow2's schemas have different fields, so
|
||||
// it might be better to use our `RawSchema` as serialized field.
|
||||
let mut s = serializer.serialize_struct("record", 2)?;
|
||||
s.serialize_field("schema", &self.schema.arrow_schema())?;
|
||||
s.serialize_field("schema", &**self.schema.arrow_schema())?;
|
||||
|
||||
let df_columns = self.df_recordbatch.columns();
|
||||
|
||||
let vec = df_columns
|
||||
let vec = self
|
||||
.columns
|
||||
.iter()
|
||||
.map(|c| Helper::try_into_vector(c.clone())?.serialize_to_json())
|
||||
.map(|c| c.serialize_to_json())
|
||||
.collect::<std::result::Result<Vec<_>, _>>()
|
||||
.map_err(S::Error::custom)?;
|
||||
|
||||
@@ -88,8 +143,8 @@ impl<'a> RecordBatchRowIterator<'a> {
|
||||
fn new(record_batch: &'a RecordBatch) -> RecordBatchRowIterator {
|
||||
RecordBatchRowIterator {
|
||||
record_batch,
|
||||
rows: record_batch.df_recordbatch.num_rows(),
|
||||
columns: record_batch.df_recordbatch.num_columns(),
|
||||
rows: record_batch.df_record_batch.num_rows(),
|
||||
columns: record_batch.df_record_batch.num_columns(),
|
||||
row_cursor: 0,
|
||||
}
|
||||
}
|
||||
@@ -104,15 +159,9 @@ impl<'a> Iterator for RecordBatchRowIterator<'a> {
|
||||
} else {
|
||||
let mut row = Vec::with_capacity(self.columns);
|
||||
|
||||
// TODO(yingwen): Get from the vector if RecordBatch also holds vectors.
|
||||
for col in 0..self.columns {
|
||||
let column_array = self.record_batch.df_recordbatch.column(col);
|
||||
match arrow_array_get(column_array.as_ref(), self.row_cursor)
|
||||
.context(error::DataTypesSnafu)
|
||||
{
|
||||
Ok(field) => row.push(field),
|
||||
Err(e) => return Some(Err(e.into())),
|
||||
}
|
||||
let column = self.record_batch.column(col);
|
||||
row.push(column.get(self.row_cursor));
|
||||
}
|
||||
|
||||
self.row_cursor += 1;
|
||||
@@ -125,63 +174,60 @@ impl<'a> Iterator for RecordBatchRowIterator<'a> {
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion_common::field_util::SchemaExt;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::arrow::array::UInt32Array;
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::{StringVector, UInt32Vector, Vector};
|
||||
use datatypes::vectors::{StringVector, UInt32Vector};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_new_record_batch() {
|
||||
fn test_record_batch() {
|
||||
let arrow_schema = Arc::new(ArrowSchema::new(vec![
|
||||
Field::new("c1", DataType::UInt32, false),
|
||||
Field::new("c2", DataType::UInt32, false),
|
||||
]));
|
||||
let schema = Arc::new(Schema::try_from(arrow_schema).unwrap());
|
||||
|
||||
let v = Arc::new(UInt32Vector::from_slice(&[1, 2, 3]));
|
||||
let columns: Vec<VectorRef> = vec![v.clone(), v.clone()];
|
||||
let c1 = Arc::new(UInt32Vector::from_slice(&[1, 2, 3]));
|
||||
let c2 = Arc::new(UInt32Vector::from_slice(&[4, 5, 6]));
|
||||
let columns: Vec<VectorRef> = vec![c1, c2];
|
||||
|
||||
let batch = RecordBatch::new(schema.clone(), columns).unwrap();
|
||||
let expect = v.to_arrow_array();
|
||||
for column in batch.df_recordbatch.columns() {
|
||||
let array = column.as_any().downcast_ref::<UInt32Array>().unwrap();
|
||||
assert_eq!(
|
||||
expect.as_any().downcast_ref::<UInt32Array>().unwrap(),
|
||||
array
|
||||
);
|
||||
let batch = RecordBatch::new(schema.clone(), columns.clone()).unwrap();
|
||||
assert_eq!(3, batch.num_rows());
|
||||
assert_eq!(&columns, batch.columns());
|
||||
for (i, expect) in columns.iter().enumerate().take(batch.num_columns()) {
|
||||
let column = batch.column(i);
|
||||
assert_eq!(expect, column);
|
||||
}
|
||||
assert_eq!(schema, batch.schema);
|
||||
|
||||
assert_eq!(columns[0], *batch.column_by_name("c1").unwrap());
|
||||
assert_eq!(columns[1], *batch.column_by_name("c2").unwrap());
|
||||
assert!(batch.column_by_name("c3").is_none());
|
||||
|
||||
let converted =
|
||||
RecordBatch::try_from_df_record_batch(schema, batch.df_record_batch().clone()).unwrap();
|
||||
assert_eq!(batch, converted);
|
||||
assert_eq!(*batch.df_record_batch(), converted.into_df_record_batch());
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_serialize_recordbatch() {
|
||||
let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
|
||||
let column_schemas = vec![ColumnSchema::new(
|
||||
"number",
|
||||
DataType::UInt32,
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
false,
|
||||
)]));
|
||||
let schema = Arc::new(Schema::try_from(arrow_schema.clone()).unwrap());
|
||||
)];
|
||||
let schema = Arc::new(Schema::try_new(column_schemas).unwrap());
|
||||
|
||||
let numbers: Vec<u32> = (0..10).collect();
|
||||
let df_batch = DfRecordBatch::try_new(
|
||||
arrow_schema,
|
||||
vec![Arc::new(UInt32Array::from_slice(&numbers))],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let batch = RecordBatch {
|
||||
schema,
|
||||
df_recordbatch: df_batch,
|
||||
};
|
||||
let columns = vec![Arc::new(UInt32Vector::from_slice(&numbers)) as VectorRef];
|
||||
let batch = RecordBatch::new(schema, columns).unwrap();
|
||||
|
||||
let output = serde_json::to_string(&batch).unwrap();
|
||||
assert_eq!(
|
||||
r#"{"schema":{"fields":[{"name":"number","data_type":"UInt32","is_nullable":false,"metadata":{}}],"metadata":{}},"columns":[[0,1,2,3,4,5,6,7,8,9]]}"#,
|
||||
r#"{"schema":{"fields":[{"name":"number","data_type":"UInt32","nullable":false,"dict_id":0,"dict_is_ordered":false}],"metadata":{"greptime:version":"0"}},"columns":[[0,1,2,3,4,5,6,7,8,9]]}"#,
|
||||
output
|
||||
);
|
||||
}
|
||||
|
||||
@@ -15,23 +15,29 @@
|
||||
use futures::TryStreamExt;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::{RecordBatch, SendableRecordBatchStream};
|
||||
use crate::{RecordBatch, RecordBatches, SendableRecordBatchStream};
|
||||
|
||||
/// Collect all the items from the stream into a vector of [`RecordBatch`].
|
||||
pub async fn collect(stream: SendableRecordBatchStream) -> Result<Vec<RecordBatch>> {
|
||||
stream.try_collect::<Vec<_>>().await
|
||||
}
|
||||
|
||||
/// Collect all the items from the stream into [RecordBatches].
|
||||
pub async fn collect_batches(stream: SendableRecordBatchStream) -> Result<RecordBatches> {
|
||||
let schema = stream.schema();
|
||||
let batches = stream.try_collect::<Vec<_>>().await?;
|
||||
RecordBatches::try_new(schema, batches)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::mem;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion_common::field_util::SchemaExt;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::arrow::array::UInt32Array;
|
||||
use datatypes::arrow::datatypes::{DataType, Field, Schema as ArrowSchema};
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::vectors::UInt32Vector;
|
||||
use futures::task::{Context, Poll};
|
||||
use futures::Stream;
|
||||
|
||||
@@ -65,12 +71,13 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_collect() {
|
||||
let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
|
||||
let column_schemas = vec![ColumnSchema::new(
|
||||
"number",
|
||||
DataType::UInt32,
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
false,
|
||||
)]));
|
||||
let schema = Arc::new(Schema::try_from(arrow_schema.clone()).unwrap());
|
||||
)];
|
||||
|
||||
let schema = Arc::new(Schema::try_new(column_schemas).unwrap());
|
||||
|
||||
let stream = MockRecordBatchStream {
|
||||
schema: schema.clone(),
|
||||
@@ -81,24 +88,23 @@ mod tests {
|
||||
assert_eq!(0, batches.len());
|
||||
|
||||
let numbers: Vec<u32> = (0..10).collect();
|
||||
let df_batch = DfRecordBatch::try_new(
|
||||
arrow_schema.clone(),
|
||||
vec![Arc::new(UInt32Array::from_slice(&numbers))],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let batch = RecordBatch {
|
||||
schema: schema.clone(),
|
||||
df_recordbatch: df_batch,
|
||||
};
|
||||
let columns = [Arc::new(UInt32Vector::from_vec(numbers)) as _];
|
||||
let batch = RecordBatch::new(schema.clone(), columns).unwrap();
|
||||
|
||||
let stream = MockRecordBatchStream {
|
||||
schema: Arc::new(Schema::try_from(arrow_schema).unwrap()),
|
||||
schema: schema.clone(),
|
||||
batch: Some(batch.clone()),
|
||||
};
|
||||
let batches = collect(Box::pin(stream)).await.unwrap();
|
||||
assert_eq!(1, batches.len());
|
||||
|
||||
assert_eq!(batch, batches[0]);
|
||||
|
||||
let stream = MockRecordBatchStream {
|
||||
schema: schema.clone(),
|
||||
batch: Some(batch.clone()),
|
||||
};
|
||||
let batches = collect_batches(Box::pin(stream)).await.unwrap();
|
||||
let expect_batches = RecordBatches::try_new(schema.clone(), vec![batch]).unwrap();
|
||||
assert_eq!(expect_batches, batches);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,10 +10,8 @@ catalog = { path = "../../catalog" }
|
||||
common-catalog = { path = "../catalog" }
|
||||
common-error = { path = "../error" }
|
||||
common-telemetry = { path = "../telemetry" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion-expr = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datafusion = "14.0.0"
|
||||
datafusion-expr = "14.0.0"
|
||||
datatypes = { path = "../../datatypes" }
|
||||
futures = "0.3"
|
||||
prost = "0.9"
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use datafusion::logical_plan::DFSchemaRef;
|
||||
use datafusion::common::DFSchemaRef;
|
||||
use substrait_proto::protobuf::extensions::simple_extension_declaration::{
|
||||
ExtensionFunction, MappingType,
|
||||
};
|
||||
|
||||
@@ -15,8 +15,8 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::str::FromStr;
|
||||
|
||||
use datafusion::logical_plan::{Column, Expr};
|
||||
use datafusion_expr::{expr_fn, lit, BuiltinScalarFunction, Operator};
|
||||
use datafusion::common::Column;
|
||||
use datafusion_expr::{expr_fn, lit, Between, BinaryExpr, BuiltinScalarFunction, Expr, Operator};
|
||||
use datatypes::schema::Schema;
|
||||
use snafu::{ensure, OptionExt};
|
||||
use substrait_proto::protobuf::expression::field_reference::ReferenceType as FieldReferenceType;
|
||||
@@ -311,21 +311,21 @@ pub fn convert_scalar_function(
|
||||
// skip GetIndexedField, unimplemented.
|
||||
"between" => {
|
||||
ensure_arg_len(3)?;
|
||||
Expr::Between {
|
||||
Expr::Between(Between {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
negated: false,
|
||||
low: Box::new(inputs.pop_front().unwrap()),
|
||||
high: Box::new(inputs.pop_front().unwrap()),
|
||||
}
|
||||
})
|
||||
}
|
||||
"not_between" => {
|
||||
ensure_arg_len(3)?;
|
||||
Expr::Between {
|
||||
Expr::Between(Between {
|
||||
expr: Box::new(inputs.pop_front().unwrap()),
|
||||
negated: true,
|
||||
low: Box::new(inputs.pop_front().unwrap()),
|
||||
high: Box::new(inputs.pop_front().unwrap()),
|
||||
}
|
||||
})
|
||||
}
|
||||
// skip Case, is covered in substrait::SwitchExpression.
|
||||
// skip Cast and TryCast, is covered in substrait::Cast.
|
||||
@@ -477,7 +477,7 @@ pub fn expression_from_df_expr(
|
||||
rex_type: Some(RexType::Literal(l)),
|
||||
}
|
||||
}
|
||||
Expr::BinaryExpr { left, op, right } => {
|
||||
Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
|
||||
let left = expression_from_df_expr(ctx, left, schema)?;
|
||||
let right = expression_from_df_expr(ctx, right, schema)?;
|
||||
let arguments = utils::expression_to_argument(vec![left, right]);
|
||||
@@ -518,12 +518,12 @@ pub fn expression_from_df_expr(
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
Expr::Between {
|
||||
Expr::Between(Between {
|
||||
expr,
|
||||
negated,
|
||||
low,
|
||||
high,
|
||||
} => {
|
||||
}) => {
|
||||
let expr = expression_from_df_expr(ctx, expr, schema)?;
|
||||
let low = expression_from_df_expr(ctx, low, schema)?;
|
||||
let high = expression_from_df_expr(ctx, high, schema)?;
|
||||
@@ -564,7 +564,21 @@ pub fn expression_from_df_expr(
|
||||
| Expr::WindowFunction { .. }
|
||||
| Expr::AggregateUDF { .. }
|
||||
| Expr::InList { .. }
|
||||
| Expr::Wildcard => UnsupportedExprSnafu {
|
||||
| Expr::Wildcard
|
||||
| Expr::Like(_)
|
||||
| Expr::ILike(_)
|
||||
| Expr::SimilarTo(_)
|
||||
| Expr::IsTrue(_)
|
||||
| Expr::IsFalse(_)
|
||||
| Expr::IsUnknown(_)
|
||||
| Expr::IsNotTrue(_)
|
||||
| Expr::IsNotFalse(_)
|
||||
| Expr::IsNotUnknown(_)
|
||||
| Expr::Exists { .. }
|
||||
| Expr::InSubquery { .. }
|
||||
| Expr::ScalarSubquery(..)
|
||||
| Expr::QualifiedWildcard { .. } => todo!(),
|
||||
Expr::GroupingSet(_) => UnsupportedExprSnafu {
|
||||
name: expr.to_string(),
|
||||
}
|
||||
.fail()?,
|
||||
@@ -628,6 +642,10 @@ mod utils {
|
||||
Operator::RegexNotIMatch => "regex_not_i_match",
|
||||
Operator::BitwiseAnd => "bitwise_and",
|
||||
Operator::BitwiseOr => "bitwise_or",
|
||||
Operator::BitwiseXor => "bitwise_xor",
|
||||
Operator::BitwiseShiftRight => "bitwise_shift_right",
|
||||
Operator::BitwiseShiftLeft => "bitwise_shift_left",
|
||||
Operator::StringConcat => "string_concat",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -679,7 +697,6 @@ mod utils {
|
||||
BuiltinScalarFunction::Sqrt => "sqrt",
|
||||
BuiltinScalarFunction::Tan => "tan",
|
||||
BuiltinScalarFunction::Trunc => "trunc",
|
||||
BuiltinScalarFunction::Array => "make_array",
|
||||
BuiltinScalarFunction::Ascii => "ascii",
|
||||
BuiltinScalarFunction::BitLength => "bit_length",
|
||||
BuiltinScalarFunction::Btrim => "btrim",
|
||||
@@ -723,6 +740,17 @@ mod utils {
|
||||
BuiltinScalarFunction::Trim => "trim",
|
||||
BuiltinScalarFunction::Upper => "upper",
|
||||
BuiltinScalarFunction::RegexpMatch => "regexp_match",
|
||||
BuiltinScalarFunction::Atan2 => "atan2",
|
||||
BuiltinScalarFunction::Coalesce => "coalesce",
|
||||
BuiltinScalarFunction::Power => "power",
|
||||
BuiltinScalarFunction::MakeArray => "make_array",
|
||||
BuiltinScalarFunction::DateBin => "date_bin",
|
||||
BuiltinScalarFunction::FromUnixtime => "from_unixtime",
|
||||
BuiltinScalarFunction::CurrentDate => "current_date",
|
||||
BuiltinScalarFunction::CurrentTime => "current_time",
|
||||
BuiltinScalarFunction::Uuid => "uuid",
|
||||
BuiltinScalarFunction::Struct => "struct",
|
||||
BuiltinScalarFunction::ArrowTypeof => "arrow_type_of",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,10 +19,10 @@ use catalog::CatalogManagerRef;
|
||||
use common_error::prelude::BoxedError;
|
||||
use common_telemetry::debug;
|
||||
use datafusion::arrow::datatypes::SchemaRef as ArrowSchemaRef;
|
||||
use datafusion::datasource::TableProvider;
|
||||
use datafusion::logical_plan::plan::Filter;
|
||||
use datafusion::logical_plan::{LogicalPlan, TableScan, ToDFSchema};
|
||||
use datafusion::common::ToDFSchema;
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::physical_plan::project_schema;
|
||||
use datafusion_expr::{Filter, LogicalPlan, TableScan, TableSource};
|
||||
use prost::Message;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use substrait_proto::protobuf::expression::mask_expression::{StructItem, StructSelect};
|
||||
@@ -144,7 +144,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let predicate = to_df_expr(ctx, *condition, &schema)?;
|
||||
|
||||
LogicalPlan::Filter(Filter { predicate, input })
|
||||
LogicalPlan::Filter(Filter::try_new(predicate, input).context(DFInternalSnafu)?)
|
||||
}
|
||||
RelType::Fetch(_fetch_rel) => UnsupportedPlanSnafu {
|
||||
name: "Fetch Relation",
|
||||
@@ -238,7 +238,9 @@ impl DFLogicalSubstraitConvertor {
|
||||
.context(TableNotFoundSnafu {
|
||||
name: format!("{}.{}.{}", catalog_name, schema_name, table_name),
|
||||
})?;
|
||||
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
|
||||
let adapter = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(table_ref),
|
||||
)));
|
||||
|
||||
// Get schema directly from the table, and compare it with the schema retrieved from substrait proto.
|
||||
let stored_schema = adapter.schema();
|
||||
@@ -267,14 +269,14 @@ impl DFLogicalSubstraitConvertor {
|
||||
|
||||
ctx.set_df_schema(projected_schema.clone());
|
||||
|
||||
// TODO(ruihang): Support limit
|
||||
// TODO(ruihang): Support limit(fetch)
|
||||
Ok(LogicalPlan::TableScan(TableScan {
|
||||
table_name: format!("{}.{}.{}", catalog_name, schema_name, table_name),
|
||||
source: adapter,
|
||||
projection,
|
||||
projected_schema,
|
||||
filters,
|
||||
limit: None,
|
||||
fetch: None,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -302,7 +304,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
.fail()?,
|
||||
LogicalPlan::Filter(filter) => {
|
||||
let input = Some(Box::new(
|
||||
self.logical_plan_to_rel(ctx, filter.input.clone())?,
|
||||
self.logical_plan_to_rel(ctx, filter.input().clone())?,
|
||||
));
|
||||
|
||||
let schema = plan
|
||||
@@ -312,7 +314,7 @@ impl DFLogicalSubstraitConvertor {
|
||||
.context(error::ConvertDfSchemaSnafu)?;
|
||||
let condition = Some(Box::new(expression_from_df_expr(
|
||||
ctx,
|
||||
&filter.predicate,
|
||||
filter.predicate(),
|
||||
&schema,
|
||||
)?));
|
||||
|
||||
@@ -368,7 +370,16 @@ impl DFLogicalSubstraitConvertor {
|
||||
name: "DataFusion Logical Limit",
|
||||
}
|
||||
.fail()?,
|
||||
LogicalPlan::CreateExternalTable(_)
|
||||
|
||||
LogicalPlan::Subquery(_)
|
||||
| LogicalPlan::SubqueryAlias(_)
|
||||
| LogicalPlan::CreateView(_)
|
||||
| LogicalPlan::CreateCatalogSchema(_)
|
||||
| LogicalPlan::CreateCatalog(_)
|
||||
| LogicalPlan::DropView(_)
|
||||
| LogicalPlan::Distinct(_)
|
||||
| LogicalPlan::SetVariable(_)
|
||||
| LogicalPlan::CreateExternalTable(_)
|
||||
| LogicalPlan::CreateMemoryTable(_)
|
||||
| LogicalPlan::DropTable(_)
|
||||
| LogicalPlan::Values(_)
|
||||
@@ -414,6 +425,10 @@ impl DFLogicalSubstraitConvertor {
|
||||
let provider = table_scan
|
||||
.source
|
||||
.as_any()
|
||||
.downcast_ref::<DefaultTableSource>()
|
||||
.context(UnknownPlanSnafu)?
|
||||
.table_provider
|
||||
.as_any()
|
||||
.downcast_ref::<DfTableProviderAdapter>()
|
||||
.context(UnknownPlanSnafu)?;
|
||||
let table_info = provider.table().table_info();
|
||||
@@ -485,7 +500,9 @@ impl DFLogicalSubstraitConvertor {
|
||||
fn same_schema_without_metadata(lhs: &ArrowSchemaRef, rhs: &ArrowSchemaRef) -> bool {
|
||||
lhs.fields.len() == rhs.fields.len()
|
||||
&& lhs.fields.iter().zip(rhs.fields.iter()).all(|(x, y)| {
|
||||
x.name == y.name && x.data_type == y.data_type && x.is_nullable == y.is_nullable
|
||||
x.name() == y.name()
|
||||
&& x.data_type() == y.data_type()
|
||||
&& x.is_nullable() == y.is_nullable()
|
||||
})
|
||||
}
|
||||
|
||||
@@ -494,7 +511,7 @@ mod test {
|
||||
use catalog::local::{LocalCatalogManager, MemoryCatalogProvider, MemorySchemaProvider};
|
||||
use catalog::{CatalogList, CatalogProvider, RegisterTableRequest};
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use datafusion::logical_plan::DFSchema;
|
||||
use datafusion::common::{DFSchema, ToDFSchema};
|
||||
use datatypes::schema::Schema;
|
||||
use table::requests::CreateTableRequest;
|
||||
use table::test_util::{EmptyTable, MockTableEngine};
|
||||
@@ -564,7 +581,9 @@ mod test {
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let adapter = Arc::new(DfTableProviderAdapter::new(table_ref));
|
||||
let adapter = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(table_ref),
|
||||
)));
|
||||
|
||||
let projection = vec![1, 3, 5];
|
||||
let df_schema = adapter.schema().to_dfschema().unwrap();
|
||||
@@ -584,7 +603,7 @@ mod test {
|
||||
projection: Some(projection),
|
||||
projected_schema,
|
||||
filters: vec![],
|
||||
limit: None,
|
||||
fetch: None,
|
||||
});
|
||||
|
||||
logical_plan_round_trip(table_scan_plan, catalog_manager).await;
|
||||
|
||||
@@ -55,8 +55,11 @@ impl From<i32> for Date {
|
||||
impl Display for Date {
|
||||
/// [Date] is formatted according to ISO-8601 standard.
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let abs_date = NaiveDate::from_num_days_from_ce(UNIX_EPOCH_FROM_CE + self.0);
|
||||
f.write_str(&abs_date.format("%F").to_string())
|
||||
if let Some(abs_date) = NaiveDate::from_num_days_from_ce_opt(UNIX_EPOCH_FROM_CE + self.0) {
|
||||
write!(f, "{}", abs_date.format("%F"))
|
||||
} else {
|
||||
write!(f, "Date({})", self.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,7 +98,7 @@ mod tests {
|
||||
Date::from_str("1969-01-01").unwrap().to_string()
|
||||
);
|
||||
|
||||
let now = Utc::now().date().format("%F").to_string();
|
||||
let now = Utc::now().date_naive().format("%F").to_string();
|
||||
assert_eq!(now, Date::from_str(&now).unwrap().to_string());
|
||||
}
|
||||
|
||||
|
||||
@@ -31,8 +31,11 @@ pub struct DateTime(i64);
|
||||
|
||||
impl Display for DateTime {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let abs_time = NaiveDateTime::from_timestamp(self.0, 0);
|
||||
write!(f, "{}", abs_time.format(DATETIME_FORMAT))
|
||||
if let Some(abs_time) = NaiveDateTime::from_timestamp_opt(self.0, 0) {
|
||||
write!(f, "{}", abs_time.format(DATETIME_FORMAT))
|
||||
} else {
|
||||
write!(f, "DateTime({})", self.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
use core::default::Default;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -34,13 +35,34 @@ impl Timestamp {
|
||||
Self { unit, value }
|
||||
}
|
||||
|
||||
pub fn from_millis(value: i64) -> Self {
|
||||
pub fn new_second(value: i64) -> Self {
|
||||
Self {
|
||||
value,
|
||||
unit: TimeUnit::Second,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_millisecond(value: i64) -> Self {
|
||||
Self {
|
||||
value,
|
||||
unit: TimeUnit::Millisecond,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_microsecond(value: i64) -> Self {
|
||||
Self {
|
||||
value,
|
||||
unit: TimeUnit::Microsecond,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_nanosecond(value: i64) -> Self {
|
||||
Self {
|
||||
value,
|
||||
unit: TimeUnit::Nanosecond,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unit(&self) -> TimeUnit {
|
||||
self.unit
|
||||
}
|
||||
@@ -54,6 +76,8 @@ impl Timestamp {
|
||||
self.value * self.unit.factor() / unit.factor()
|
||||
}
|
||||
|
||||
/// Format timestamp to ISO8601 string. If the timestamp exceeds what chrono timestamp can
|
||||
/// represent, this function simply print the timestamp unit and value in plain string.
|
||||
pub fn to_iso8601_string(&self) -> String {
|
||||
let nano_factor = TimeUnit::Second.factor() / TimeUnit::Nanosecond.factor();
|
||||
|
||||
@@ -65,8 +89,11 @@ impl Timestamp {
|
||||
nsecs += nano_factor;
|
||||
}
|
||||
|
||||
let datetime = Utc.timestamp(secs, nsecs as u32);
|
||||
format!("{}", datetime.format("%Y-%m-%d %H:%M:%S%.f%z"))
|
||||
if let LocalResult::Single(datetime) = Utc.timestamp_opt(secs, nsecs as u32) {
|
||||
format!("{}", datetime.format("%Y-%m-%d %H:%M:%S%.f%z"))
|
||||
} else {
|
||||
format!("[Timestamp{}: {}]", self.unit, self.value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,6 +195,25 @@ pub enum TimeUnit {
|
||||
Nanosecond,
|
||||
}
|
||||
|
||||
impl Display for TimeUnit {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
TimeUnit::Second => {
|
||||
write!(f, "Second")
|
||||
}
|
||||
TimeUnit::Millisecond => {
|
||||
write!(f, "Millisecond")
|
||||
}
|
||||
TimeUnit::Microsecond => {
|
||||
write!(f, "Microsecond")
|
||||
}
|
||||
TimeUnit::Nanosecond => {
|
||||
write!(f, "Nanosecond")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TimeUnit {
|
||||
pub fn factor(&self) -> i64 {
|
||||
match self {
|
||||
@@ -249,10 +295,11 @@ mod tests {
|
||||
// but expected timestamp is in UTC timezone
|
||||
fn check_from_str(s: &str, expect: &str) {
|
||||
let ts = Timestamp::from_str(s).unwrap();
|
||||
let time = NaiveDateTime::from_timestamp(
|
||||
let time = NaiveDateTime::from_timestamp_opt(
|
||||
ts.value / 1_000_000_000,
|
||||
(ts.value % 1_000_000_000) as u32,
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(expect, time.to_string());
|
||||
}
|
||||
|
||||
@@ -265,7 +312,13 @@ mod tests {
|
||||
check_from_str(
|
||||
"2020-09-08 13:42:29",
|
||||
&NaiveDateTime::from_timestamp_opt(
|
||||
1599572549 - Local.timestamp(0, 0).offset().fix().local_minus_utc() as i64,
|
||||
1599572549
|
||||
- Local
|
||||
.timestamp_opt(0, 0)
|
||||
.unwrap()
|
||||
.offset()
|
||||
.fix()
|
||||
.local_minus_utc() as i64,
|
||||
0,
|
||||
)
|
||||
.unwrap()
|
||||
@@ -275,7 +328,13 @@ mod tests {
|
||||
check_from_str(
|
||||
"2020-09-08T13:42:29",
|
||||
&NaiveDateTime::from_timestamp_opt(
|
||||
1599572549 - Local.timestamp(0, 0).offset().fix().local_minus_utc() as i64,
|
||||
1599572549
|
||||
- Local
|
||||
.timestamp_opt(0, 0)
|
||||
.unwrap()
|
||||
.offset()
|
||||
.fix()
|
||||
.local_minus_utc() as i64,
|
||||
0,
|
||||
)
|
||||
.unwrap()
|
||||
@@ -285,7 +344,13 @@ mod tests {
|
||||
check_from_str(
|
||||
"2020-09-08 13:42:29.042",
|
||||
&NaiveDateTime::from_timestamp_opt(
|
||||
1599572549 - Local.timestamp(0, 0).offset().fix().local_minus_utc() as i64,
|
||||
1599572549
|
||||
- Local
|
||||
.timestamp_opt(0, 0)
|
||||
.unwrap()
|
||||
.offset()
|
||||
.fix()
|
||||
.local_minus_utc() as i64,
|
||||
42000000,
|
||||
)
|
||||
.unwrap()
|
||||
@@ -296,7 +361,13 @@ mod tests {
|
||||
check_from_str(
|
||||
"2020-09-08T13:42:29.042",
|
||||
&NaiveDateTime::from_timestamp_opt(
|
||||
1599572549 - Local.timestamp(0, 0).offset().fix().local_minus_utc() as i64,
|
||||
1599572549
|
||||
- Local
|
||||
.timestamp_opt(0, 0)
|
||||
.unwrap()
|
||||
.offset()
|
||||
.fix()
|
||||
.local_minus_utc() as i64,
|
||||
42000000,
|
||||
)
|
||||
.unwrap()
|
||||
@@ -316,19 +387,19 @@ mod tests {
|
||||
assert_eq!(datetime_str, ts.to_iso8601_string());
|
||||
|
||||
let ts_millis = 1668070237000;
|
||||
let ts = Timestamp::from_millis(ts_millis);
|
||||
let ts = Timestamp::new_millisecond(ts_millis);
|
||||
assert_eq!("2022-11-10 08:50:37+0000", ts.to_iso8601_string());
|
||||
|
||||
let ts_millis = -1000;
|
||||
let ts = Timestamp::from_millis(ts_millis);
|
||||
let ts = Timestamp::new_millisecond(ts_millis);
|
||||
assert_eq!("1969-12-31 23:59:59+0000", ts.to_iso8601_string());
|
||||
|
||||
let ts_millis = -1;
|
||||
let ts = Timestamp::from_millis(ts_millis);
|
||||
let ts = Timestamp::new_millisecond(ts_millis);
|
||||
assert_eq!("1969-12-31 23:59:59.999+0000", ts.to_iso8601_string());
|
||||
|
||||
let ts_millis = -1001;
|
||||
let ts = Timestamp::from_millis(ts_millis);
|
||||
let ts = Timestamp::new_millisecond(ts_millis);
|
||||
assert_eq!("1969-12-31 23:59:58.999+0000", ts.to_iso8601_string());
|
||||
}
|
||||
|
||||
|
||||
@@ -33,8 +33,8 @@ mod tests {
|
||||
.duration_since(time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_millis() as i64;
|
||||
let datetime_now = chrono::Utc.timestamp_millis(now);
|
||||
let datetime_std = chrono::Utc.timestamp_millis(millis_from_std);
|
||||
let datetime_now = chrono::Utc.timestamp_millis_opt(now).unwrap();
|
||||
let datetime_std = chrono::Utc.timestamp_millis_opt(millis_from_std).unwrap();
|
||||
|
||||
assert_eq!(datetime_std.year(), datetime_now.year());
|
||||
assert_eq!(datetime_std.month(), datetime_now.month());
|
||||
|
||||
@@ -25,9 +25,7 @@ common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-runtime = { path = "../common/runtime" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion = "14.0.0"
|
||||
datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
@@ -59,8 +57,5 @@ tower-http = { version = "0.3", features = ["full"] }
|
||||
axum-test-helper = { git = "https://github.com/sunng87/axum-test-helper.git", branch = "patch-1" }
|
||||
client = { path = "../client" }
|
||||
common-query = { path = "../common/query" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = [
|
||||
"simd",
|
||||
] }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datafusion-common = "14.0.0"
|
||||
tempdir = "0.3"
|
||||
|
||||
@@ -260,7 +260,7 @@ mod tests {
|
||||
},
|
||||
ColumnDef {
|
||||
name: "ts".to_string(),
|
||||
datatype: ColumnDataType::Timestamp as i32,
|
||||
datatype: ColumnDataType::TimestampMillisecond as i32,
|
||||
is_nullable: false,
|
||||
default_constraint: None,
|
||||
},
|
||||
@@ -295,8 +295,12 @@ mod tests {
|
||||
fn expected_table_schema() -> SchemaRef {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new("cpu", ConcreteDataType::float32_datatype(), true),
|
||||
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
|
||||
];
|
||||
|
||||
@@ -154,8 +154,12 @@ mod tests {
|
||||
ColumnSchema::new("host", ConcreteDataType::string_datatype(), false),
|
||||
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
|
||||
ColumnSchema::new("memory", ConcreteDataType::float64_datatype(), true),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), true)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
|
||||
Arc::new(
|
||||
@@ -284,11 +288,11 @@ mod tests {
|
||||
let ts = &columns_values["ts"];
|
||||
assert_eq!(2, ts.len());
|
||||
assert_eq!(
|
||||
Value::from(Timestamp::from_millis(1655276557000i64)),
|
||||
Value::from(Timestamp::new_millisecond(1655276557000i64)),
|
||||
ts.get(0)
|
||||
);
|
||||
assert_eq!(
|
||||
Value::from(Timestamp::from_millis(1655276558000i64)),
|
||||
Value::from(Timestamp::new_millisecond(1655276558000i64)),
|
||||
ts.get(1)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -375,7 +375,7 @@ mod tests {
|
||||
.data_type
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
request
|
||||
.schema
|
||||
.column_schema_by_name("ts")
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
|
||||
use catalog::CatalogManagerRef;
|
||||
use common_query::Output;
|
||||
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
|
||||
use datatypes::data_type::DataType;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::vectors::MutableVector;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sql::ast::Value as SqlValue;
|
||||
use sql::statements::insert::Insert;
|
||||
@@ -70,7 +72,7 @@ impl SqlHandler {
|
||||
};
|
||||
let rows_num = values.len();
|
||||
|
||||
let mut columns_builders: Vec<(&String, &ConcreteDataType, VectorBuilder)> =
|
||||
let mut columns_builders: Vec<(&String, &ConcreteDataType, Box<dyn MutableVector>)> =
|
||||
Vec::with_capacity(columns_num);
|
||||
|
||||
if columns.is_empty() {
|
||||
@@ -79,7 +81,7 @@ impl SqlHandler {
|
||||
columns_builders.push((
|
||||
&column_schema.name,
|
||||
data_type,
|
||||
VectorBuilder::with_capacity(data_type.clone(), rows_num),
|
||||
data_type.create_mutable_vector(rows_num),
|
||||
));
|
||||
}
|
||||
} else {
|
||||
@@ -95,7 +97,7 @@ impl SqlHandler {
|
||||
columns_builders.push((
|
||||
column_name,
|
||||
data_type,
|
||||
VectorBuilder::with_capacity(data_type.clone(), rows_num),
|
||||
data_type.create_mutable_vector(rows_num),
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -123,7 +125,7 @@ impl SqlHandler {
|
||||
table_name: table_ref.table.to_string(),
|
||||
columns_values: columns_builders
|
||||
.into_iter()
|
||||
.map(|(c, _, mut b)| (c.to_owned(), b.finish()))
|
||||
.map(|(c, _, mut b)| (c.to_owned(), b.to_vector()))
|
||||
.collect(),
|
||||
}))
|
||||
}
|
||||
@@ -133,11 +135,11 @@ fn add_row_to_vector(
|
||||
column_name: &str,
|
||||
data_type: &ConcreteDataType,
|
||||
sql_val: &SqlValue,
|
||||
builder: &mut VectorBuilder,
|
||||
builder: &mut Box<dyn MutableVector>,
|
||||
) -> Result<()> {
|
||||
let value = statements::sql_value_to_value(column_name, data_type, sql_val)
|
||||
.context(ParseSqlValueSnafu)?;
|
||||
builder.push(&value);
|
||||
builder.push_value_ref(value.as_value_ref()).unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -17,11 +17,8 @@ use std::sync::Arc;
|
||||
use common_catalog::consts::DEFAULT_SCHEMA_NAME;
|
||||
use common_query::Output;
|
||||
use common_recordbatch::util;
|
||||
use datafusion::arrow_print;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::arrow::array::{Int64Array, UInt64Array, Utf8Array};
|
||||
use datatypes::arrow_array::StringArray;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::vectors::{Int64Vector, StringVector, UInt64Vector, VectorRef};
|
||||
use session::context::QueryContext;
|
||||
|
||||
use crate::instance::Instance;
|
||||
@@ -66,11 +63,13 @@ async fn test_create_database_and_insert_query() {
|
||||
match query_output {
|
||||
Output::Stream(s) => {
|
||||
let batches = util::collect(s).await.unwrap();
|
||||
let columns = batches[0].df_recordbatch.columns();
|
||||
assert_eq!(1, columns.len());
|
||||
assert_eq!(1, batches[0].num_columns());
|
||||
assert_eq!(
|
||||
&Int64Array::from_slice(&[1655276557000, 1655276558000]),
|
||||
columns[0].as_any().downcast_ref::<Int64Array>().unwrap()
|
||||
Arc::new(Int64Vector::from_vec(vec![
|
||||
1655276557000_i64,
|
||||
1655276558000_i64
|
||||
])) as VectorRef,
|
||||
*batches[0].column(0)
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -155,18 +154,15 @@ async fn assert_query_result(instance: &Instance, sql: &str, ts: i64, host: &str
|
||||
match query_output {
|
||||
Output::Stream(s) => {
|
||||
let batches = util::collect(s).await.unwrap();
|
||||
let columns = batches[0].df_recordbatch.columns();
|
||||
assert_eq!(2, columns.len());
|
||||
// let columns = batches[0].df_recordbatch.columns();
|
||||
assert_eq!(2, batches[0].num_columns());
|
||||
assert_eq!(
|
||||
&Utf8Array::<i32>::from_slice(&[host]),
|
||||
columns[0]
|
||||
.as_any()
|
||||
.downcast_ref::<Utf8Array<i32>>()
|
||||
.unwrap()
|
||||
Arc::new(StringVector::from(vec![host])) as VectorRef,
|
||||
*batches[0].column(0)
|
||||
);
|
||||
assert_eq!(
|
||||
&Int64Array::from_slice(&[ts]),
|
||||
columns[1].as_any().downcast_ref::<Int64Array>().unwrap()
|
||||
Arc::new(Int64Vector::from_vec(vec![ts])) as VectorRef,
|
||||
*batches[0].column(1)
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -183,7 +179,7 @@ async fn setup_test_instance(test_name: &str) -> Instance {
|
||||
test_util::create_test_table(
|
||||
instance.catalog_manager(),
|
||||
instance.sql_handler(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -235,11 +231,13 @@ async fn test_execute_insert_query_with_i64_timestamp() {
|
||||
match query_output {
|
||||
Output::Stream(s) => {
|
||||
let batches = util::collect(s).await.unwrap();
|
||||
let columns = batches[0].df_recordbatch.columns();
|
||||
assert_eq!(1, columns.len());
|
||||
assert_eq!(1, batches[0].num_columns());
|
||||
assert_eq!(
|
||||
&Int64Array::from_slice(&[1655276557000, 1655276558000]),
|
||||
columns[0].as_any().downcast_ref::<Int64Array>().unwrap()
|
||||
Arc::new(Int64Vector::from_vec(vec![
|
||||
1655276557000_i64,
|
||||
1655276558000_i64
|
||||
])) as VectorRef,
|
||||
*batches[0].column(0)
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -249,11 +247,13 @@ async fn test_execute_insert_query_with_i64_timestamp() {
|
||||
match query_output {
|
||||
Output::Stream(s) => {
|
||||
let batches = util::collect(s).await.unwrap();
|
||||
let columns = batches[0].df_recordbatch.columns();
|
||||
assert_eq!(1, columns.len());
|
||||
assert_eq!(1, batches[0].num_columns());
|
||||
assert_eq!(
|
||||
&Int64Array::from_slice(&[1655276557000, 1655276558000]),
|
||||
columns[0].as_any().downcast_ref::<Int64Array>().unwrap()
|
||||
Arc::new(Int64Vector::from_vec(vec![
|
||||
1655276557000_i64,
|
||||
1655276558000_i64
|
||||
])) as VectorRef,
|
||||
*batches[0].column(0)
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -270,13 +270,12 @@ async fn test_execute_query() {
|
||||
match output {
|
||||
Output::Stream(recordbatch) => {
|
||||
let numbers = util::collect(recordbatch).await.unwrap();
|
||||
let columns = numbers[0].df_recordbatch.columns();
|
||||
assert_eq!(1, columns.len());
|
||||
assert_eq!(columns[0].len(), 1);
|
||||
assert_eq!(1, numbers[0].num_columns());
|
||||
assert_eq!(numbers[0].column(0).len(), 1);
|
||||
|
||||
assert_eq!(
|
||||
*columns[0].as_any().downcast_ref::<UInt64Array>().unwrap(),
|
||||
UInt64Array::from_slice(&[4950])
|
||||
Arc::new(UInt64Vector::from_vec(vec![4950_u64])) as VectorRef,
|
||||
*numbers[0].column(0),
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -294,13 +293,12 @@ async fn test_execute_show_databases_tables() {
|
||||
match output {
|
||||
Output::RecordBatches(databases) => {
|
||||
let databases = databases.take();
|
||||
let columns = databases[0].df_recordbatch.columns();
|
||||
assert_eq!(1, columns.len());
|
||||
assert_eq!(columns[0].len(), 1);
|
||||
assert_eq!(1, databases[0].num_columns());
|
||||
assert_eq!(databases[0].column(0).len(), 1);
|
||||
|
||||
assert_eq!(
|
||||
*columns[0].as_any().downcast_ref::<StringArray>().unwrap(),
|
||||
StringArray::from(vec![Some("public")])
|
||||
*databases[0].column(0),
|
||||
Arc::new(StringVector::from(vec![Some("public")])) as VectorRef
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -310,13 +308,12 @@ async fn test_execute_show_databases_tables() {
|
||||
match output {
|
||||
Output::RecordBatches(databases) => {
|
||||
let databases = databases.take();
|
||||
let columns = databases[0].df_recordbatch.columns();
|
||||
assert_eq!(1, columns.len());
|
||||
assert_eq!(columns[0].len(), 1);
|
||||
assert_eq!(1, databases[0].num_columns());
|
||||
assert_eq!(databases[0].column(0).len(), 1);
|
||||
|
||||
assert_eq!(
|
||||
*columns[0].as_any().downcast_ref::<StringArray>().unwrap(),
|
||||
StringArray::from(vec![Some("public")])
|
||||
*databases[0].column(0),
|
||||
Arc::new(StringVector::from(vec![Some("public")])) as VectorRef
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -326,9 +323,8 @@ async fn test_execute_show_databases_tables() {
|
||||
match output {
|
||||
Output::RecordBatches(databases) => {
|
||||
let databases = databases.take();
|
||||
let columns = databases[0].df_recordbatch.columns();
|
||||
assert_eq!(1, columns.len());
|
||||
assert_eq!(columns[0].len(), 2);
|
||||
assert_eq!(1, databases[0].num_columns());
|
||||
assert_eq!(databases[0].column(0).len(), 2);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
@@ -337,7 +333,7 @@ async fn test_execute_show_databases_tables() {
|
||||
test_util::create_test_table(
|
||||
instance.catalog_manager(),
|
||||
instance.sql_handler(),
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -346,9 +342,8 @@ async fn test_execute_show_databases_tables() {
|
||||
match output {
|
||||
Output::RecordBatches(databases) => {
|
||||
let databases = databases.take();
|
||||
let columns = databases[0].df_recordbatch.columns();
|
||||
assert_eq!(1, columns.len());
|
||||
assert_eq!(columns[0].len(), 3);
|
||||
assert_eq!(1, databases[0].num_columns());
|
||||
assert_eq!(databases[0].column(0).len(), 3);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
@@ -358,13 +353,12 @@ async fn test_execute_show_databases_tables() {
|
||||
match output {
|
||||
Output::RecordBatches(databases) => {
|
||||
let databases = databases.take();
|
||||
let columns = databases[0].df_recordbatch.columns();
|
||||
assert_eq!(1, columns.len());
|
||||
assert_eq!(columns[0].len(), 1);
|
||||
assert_eq!(1, databases[0].num_columns());
|
||||
assert_eq!(databases[0].column(0).len(), 1);
|
||||
|
||||
assert_eq!(
|
||||
*columns[0].as_any().downcast_ref::<StringArray>().unwrap(),
|
||||
StringArray::from(vec![Some("demo")])
|
||||
*databases[0].column(0),
|
||||
Arc::new(StringVector::from(vec![Some("demo")])) as VectorRef
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
@@ -394,18 +388,13 @@ pub async fn test_execute_create() {
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
}
|
||||
|
||||
async fn check_output_stream(output: Output, expected: Vec<&str>) {
|
||||
async fn check_output_stream(output: Output, expected: String) {
|
||||
let recordbatches = match output {
|
||||
Output::Stream(stream) => util::collect(stream).await.unwrap(),
|
||||
Output::RecordBatches(recordbatches) => recordbatches.take(),
|
||||
Output::Stream(stream) => util::collect_batches(stream).await.unwrap(),
|
||||
Output::RecordBatches(recordbatches) => recordbatches,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let recordbatches = recordbatches
|
||||
.into_iter()
|
||||
.map(|r| r.df_recordbatch)
|
||||
.collect::<Vec<DfRecordBatch>>();
|
||||
let pretty_print = arrow_print::write(&recordbatches);
|
||||
let pretty_print = pretty_print.lines().collect::<Vec<&str>>();
|
||||
let pretty_print = recordbatches.pretty_print().unwrap();
|
||||
assert_eq!(pretty_print, expected);
|
||||
}
|
||||
|
||||
@@ -438,15 +427,16 @@ async fn test_alter_table() {
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = execute_sql(&instance, "select * from demo order by ts").await;
|
||||
let expected = vec![
|
||||
"+-------+-----+--------+---------------------+--------+",
|
||||
"| host | cpu | memory | ts | my_tag |",
|
||||
"+-------+-----+--------+---------------------+--------+",
|
||||
"| host1 | 1.1 | 100 | 1970-01-01 00:00:01 | |",
|
||||
"| host2 | 2.2 | 200 | 1970-01-01 00:00:02 | hello |",
|
||||
"| host3 | 3.3 | 300 | 1970-01-01 00:00:03 | |",
|
||||
"+-------+-----+--------+---------------------+--------+",
|
||||
];
|
||||
let expected = "\
|
||||
+-------+-----+--------+---------------------+--------+
|
||||
| host | cpu | memory | ts | my_tag |
|
||||
+-------+-----+--------+---------------------+--------+
|
||||
| host1 | 1.1 | 100 | 1970-01-01T00:00:01 | |
|
||||
| host2 | 2.2 | 200 | 1970-01-01T00:00:02 | hello |
|
||||
| host3 | 3.3 | 300 | 1970-01-01T00:00:03 | |
|
||||
+-------+-----+--------+---------------------+--------+\
|
||||
"
|
||||
.to_string();
|
||||
check_output_stream(output, expected).await;
|
||||
|
||||
// Drop a column
|
||||
@@ -454,15 +444,16 @@ async fn test_alter_table() {
|
||||
assert!(matches!(output, Output::AffectedRows(0)));
|
||||
|
||||
let output = execute_sql(&instance, "select * from demo order by ts").await;
|
||||
let expected = vec![
|
||||
"+-------+-----+---------------------+--------+",
|
||||
"| host | cpu | ts | my_tag |",
|
||||
"+-------+-----+---------------------+--------+",
|
||||
"| host1 | 1.1 | 1970-01-01 00:00:01 | |",
|
||||
"| host2 | 2.2 | 1970-01-01 00:00:02 | hello |",
|
||||
"| host3 | 3.3 | 1970-01-01 00:00:03 | |",
|
||||
"+-------+-----+---------------------+--------+",
|
||||
];
|
||||
let expected = "\
|
||||
+-------+-----+---------------------+--------+
|
||||
| host | cpu | ts | my_tag |
|
||||
+-------+-----+---------------------+--------+
|
||||
| host1 | 1.1 | 1970-01-01T00:00:01 | |
|
||||
| host2 | 2.2 | 1970-01-01T00:00:02 | hello |
|
||||
| host3 | 3.3 | 1970-01-01T00:00:03 | |
|
||||
+-------+-----+---------------------+--------+\
|
||||
"
|
||||
.to_string();
|
||||
check_output_stream(output, expected).await;
|
||||
|
||||
// insert a new row
|
||||
@@ -474,16 +465,17 @@ async fn test_alter_table() {
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = execute_sql(&instance, "select * from demo order by ts").await;
|
||||
let expected = vec![
|
||||
"+-------+-----+---------------------+--------+",
|
||||
"| host | cpu | ts | my_tag |",
|
||||
"+-------+-----+---------------------+--------+",
|
||||
"| host1 | 1.1 | 1970-01-01 00:00:01 | |",
|
||||
"| host2 | 2.2 | 1970-01-01 00:00:02 | hello |",
|
||||
"| host3 | 3.3 | 1970-01-01 00:00:03 | |",
|
||||
"| host4 | 400 | 1970-01-01 00:00:04 | world |",
|
||||
"+-------+-----+---------------------+--------+",
|
||||
];
|
||||
let expected = "\
|
||||
+-------+-----+---------------------+--------+
|
||||
| host | cpu | ts | my_tag |
|
||||
+-------+-----+---------------------+--------+
|
||||
| host1 | 1.1 | 1970-01-01T00:00:01 | |
|
||||
| host2 | 2.2 | 1970-01-01T00:00:02 | hello |
|
||||
| host3 | 3.3 | 1970-01-01T00:00:03 | |
|
||||
| host4 | 400 | 1970-01-01T00:00:04 | world |
|
||||
+-------+-----+---------------------+--------+\
|
||||
"
|
||||
.to_string();
|
||||
check_output_stream(output, expected).await;
|
||||
}
|
||||
|
||||
@@ -522,14 +514,15 @@ async fn test_insert_with_default_value_for_type(type_name: &str) {
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = execute_sql(&instance, "select host, cpu from test_table").await;
|
||||
let expected = vec![
|
||||
"+-------+-----+",
|
||||
"| host | cpu |",
|
||||
"+-------+-----+",
|
||||
"| host1 | 1.1 |",
|
||||
"| host2 | 2.2 |",
|
||||
"+-------+-----+",
|
||||
];
|
||||
let expected = "\
|
||||
+-------+-----+
|
||||
| host | cpu |
|
||||
+-------+-----+
|
||||
| host1 | 1.1 |
|
||||
| host2 | 2.2 |
|
||||
+-------+-----+\
|
||||
"
|
||||
.to_string();
|
||||
check_output_stream(output, expected).await;
|
||||
}
|
||||
|
||||
@@ -559,13 +552,14 @@ async fn test_use_database() {
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = execute_sql_in_db(&instance, "show tables", "db1").await;
|
||||
let expected = vec![
|
||||
"+--------+",
|
||||
"| Tables |",
|
||||
"+--------+",
|
||||
"| tb1 |",
|
||||
"+--------+",
|
||||
];
|
||||
let expected = "\
|
||||
+--------+
|
||||
| Tables |
|
||||
+--------+
|
||||
| tb1 |
|
||||
+--------+\
|
||||
"
|
||||
.to_string();
|
||||
check_output_stream(output, expected).await;
|
||||
|
||||
let output = execute_sql_in_db(
|
||||
@@ -577,25 +571,27 @@ async fn test_use_database() {
|
||||
assert!(matches!(output, Output::AffectedRows(1)));
|
||||
|
||||
let output = execute_sql_in_db(&instance, "select col_i32 from tb1", "db1").await;
|
||||
let expected = vec![
|
||||
"+---------+",
|
||||
"| col_i32 |",
|
||||
"+---------+",
|
||||
"| 1 |",
|
||||
"+---------+",
|
||||
];
|
||||
let expected = "\
|
||||
+---------+
|
||||
| col_i32 |
|
||||
+---------+
|
||||
| 1 |
|
||||
+---------+\
|
||||
"
|
||||
.to_string();
|
||||
check_output_stream(output, expected).await;
|
||||
|
||||
// Making a particular database the default by means of the USE statement does not preclude
|
||||
// accessing tables in other databases.
|
||||
let output = execute_sql(&instance, "select number from public.numbers limit 1").await;
|
||||
let expected = vec![
|
||||
"+--------+",
|
||||
"| number |",
|
||||
"+--------+",
|
||||
"| 0 |",
|
||||
"+--------+",
|
||||
];
|
||||
let expected = "\
|
||||
+--------+
|
||||
| number |
|
||||
+--------+
|
||||
| 0 |
|
||||
+--------+\
|
||||
"
|
||||
.to_string();
|
||||
check_output_stream(output, expected).await;
|
||||
}
|
||||
|
||||
|
||||
@@ -9,10 +9,12 @@ default = []
|
||||
test = []
|
||||
|
||||
[dependencies]
|
||||
arrow = { version = "26.0" }
|
||||
arrow-schema = { version = "26.0", features = ["serde"] }
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datafusion-common = "14.0"
|
||||
enum_dispatch = "0.3"
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
@@ -21,17 +23,3 @@ paste = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
[dependencies.arrow]
|
||||
package = "arrow2"
|
||||
version = "0.10"
|
||||
features = [
|
||||
"io_csv",
|
||||
"io_json",
|
||||
"io_parquet",
|
||||
"io_parquet_compression",
|
||||
"io_ipc",
|
||||
"ahash",
|
||||
"compute",
|
||||
"serde_types",
|
||||
]
|
||||
|
||||
@@ -12,216 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::array::{
|
||||
self, Array, BinaryArray as ArrowBinaryArray, ListArray,
|
||||
MutableBinaryArray as ArrowMutableBinaryArray, MutableUtf8Array, PrimitiveArray, Utf8Array,
|
||||
};
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::error::{ConversionSnafu, Result};
|
||||
use crate::prelude::ConcreteDataType;
|
||||
use crate::value::{ListValue, Value};
|
||||
|
||||
pub type BinaryArray = ArrowBinaryArray<i64>;
|
||||
pub type MutableBinaryArray = ArrowMutableBinaryArray<i64>;
|
||||
pub type MutableStringArray = MutableUtf8Array<i32>;
|
||||
pub type StringArray = Utf8Array<i32>;
|
||||
|
||||
macro_rules! cast_array {
|
||||
($arr: ident, $CastType: ty) => {
|
||||
$arr.as_any()
|
||||
.downcast_ref::<$CastType>()
|
||||
.with_context(|| ConversionSnafu {
|
||||
from: format!("{:?}", $arr.data_type()),
|
||||
})?
|
||||
};
|
||||
}
|
||||
|
||||
pub fn arrow_array_get(array: &dyn Array, idx: usize) -> Result<Value> {
|
||||
if array.is_null(idx) {
|
||||
return Ok(Value::Null);
|
||||
}
|
||||
|
||||
let result = match array.data_type() {
|
||||
ArrowDataType::Null => Value::Null,
|
||||
ArrowDataType::Boolean => {
|
||||
Value::Boolean(cast_array!(array, array::BooleanArray).value(idx))
|
||||
}
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => {
|
||||
Value::Binary(cast_array!(array, BinaryArray).value(idx).into())
|
||||
}
|
||||
ArrowDataType::Int8 => Value::Int8(cast_array!(array, PrimitiveArray::<i8>).value(idx)),
|
||||
ArrowDataType::Int16 => Value::Int16(cast_array!(array, PrimitiveArray::<i16>).value(idx)),
|
||||
ArrowDataType::Int32 => Value::Int32(cast_array!(array, PrimitiveArray::<i32>).value(idx)),
|
||||
ArrowDataType::Int64 => Value::Int64(cast_array!(array, PrimitiveArray::<i64>).value(idx)),
|
||||
ArrowDataType::UInt8 => Value::UInt8(cast_array!(array, PrimitiveArray::<u8>).value(idx)),
|
||||
ArrowDataType::UInt16 => {
|
||||
Value::UInt16(cast_array!(array, PrimitiveArray::<u16>).value(idx))
|
||||
}
|
||||
ArrowDataType::UInt32 => {
|
||||
Value::UInt32(cast_array!(array, PrimitiveArray::<u32>).value(idx))
|
||||
}
|
||||
ArrowDataType::UInt64 => {
|
||||
Value::UInt64(cast_array!(array, PrimitiveArray::<u64>).value(idx))
|
||||
}
|
||||
ArrowDataType::Float32 => {
|
||||
Value::Float32(cast_array!(array, PrimitiveArray::<f32>).value(idx).into())
|
||||
}
|
||||
ArrowDataType::Float64 => {
|
||||
Value::Float64(cast_array!(array, PrimitiveArray::<f64>).value(idx).into())
|
||||
}
|
||||
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
|
||||
Value::String(cast_array!(array, StringArray).value(idx).into())
|
||||
}
|
||||
ArrowDataType::Timestamp(t, _) => {
|
||||
let value = cast_array!(array, PrimitiveArray::<i64>).value(idx);
|
||||
let unit = match ConcreteDataType::from_arrow_time_unit(t) {
|
||||
ConcreteDataType::Timestamp(t) => t.unit,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Value::Timestamp(Timestamp::new(value, unit))
|
||||
}
|
||||
ArrowDataType::List(_) => {
|
||||
let array = cast_array!(array, ListArray::<i32>).value(idx);
|
||||
let inner_datatype = ConcreteDataType::try_from(array.data_type())?;
|
||||
let values = (0..array.len())
|
||||
.map(|i| arrow_array_get(&*array, i))
|
||||
.collect::<Result<Vec<Value>>>()?;
|
||||
Value::List(ListValue::new(Some(Box::new(values)), inner_datatype))
|
||||
}
|
||||
_ => unimplemented!("Arrow array datatype: {:?}", array.data_type()),
|
||||
};
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use arrow::array::{
|
||||
BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
|
||||
MutableListArray, MutablePrimitiveArray, TryExtend, UInt16Array, UInt32Array, UInt64Array,
|
||||
UInt8Array,
|
||||
};
|
||||
use arrow::buffer::Buffer;
|
||||
use arrow::datatypes::{DataType, TimeUnit as ArrowTimeUnit};
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
|
||||
use super::*;
|
||||
use crate::prelude::Vector;
|
||||
use crate::vectors::TimestampVector;
|
||||
|
||||
#[test]
|
||||
fn test_arrow_array_access() {
|
||||
let array1 = BooleanArray::from_slice(vec![true, true, false, false]);
|
||||
assert_eq!(Value::Boolean(true), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int8Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int8(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt8Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt8(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int16Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int16(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt16Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt16(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int32Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int32(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt32Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt32(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array = Int64Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int64(2), arrow_array_get(&array, 1).unwrap());
|
||||
let array1 = UInt64Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt64(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Float32Array::from_vec(vec![1f32, 2f32, 3f32, 4f32]);
|
||||
assert_eq!(
|
||||
Value::Float32(2f32.into()),
|
||||
arrow_array_get(&array1, 1).unwrap()
|
||||
);
|
||||
let array1 = Float64Array::from_vec(vec![1f64, 2f64, 3f64, 4f64]);
|
||||
assert_eq!(
|
||||
Value::Float64(2f64.into()),
|
||||
arrow_array_get(&array1, 1).unwrap()
|
||||
);
|
||||
|
||||
let array2 = StringArray::from(vec![Some("hello"), None, Some("world")]);
|
||||
assert_eq!(
|
||||
Value::String("hello".into()),
|
||||
arrow_array_get(&array2, 0).unwrap()
|
||||
);
|
||||
assert_eq!(Value::Null, arrow_array_get(&array2, 1).unwrap());
|
||||
|
||||
let array3 = super::BinaryArray::from(vec![
|
||||
Some("hello".as_bytes()),
|
||||
None,
|
||||
Some("world".as_bytes()),
|
||||
]);
|
||||
assert_eq!(
|
||||
Value::Binary("hello".as_bytes().into()),
|
||||
arrow_array_get(&array3, 0).unwrap()
|
||||
);
|
||||
assert_eq!(Value::Null, arrow_array_get(&array3, 1).unwrap());
|
||||
|
||||
let vector = TimestampVector::new(Int64Array::from_vec(vec![1, 2, 3, 4]));
|
||||
let array = vector.to_boxed_arrow_array();
|
||||
let value = arrow_array_get(&*array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Millisecond))
|
||||
);
|
||||
|
||||
let array4 = PrimitiveArray::<i64>::from_data(
|
||||
DataType::Timestamp(ArrowTimeUnit::Millisecond, None),
|
||||
Buffer::from_slice(&vec![1, 2, 3, 4]),
|
||||
None,
|
||||
);
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1, TimeUnit::Millisecond)),
|
||||
arrow_array_get(&array4, 0).unwrap()
|
||||
);
|
||||
|
||||
let array4 = PrimitiveArray::<i64>::from_data(
|
||||
DataType::Timestamp(ArrowTimeUnit::Nanosecond, None),
|
||||
Buffer::from_slice(&vec![1, 2, 3, 4]),
|
||||
None,
|
||||
);
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1, TimeUnit::Nanosecond)),
|
||||
arrow_array_get(&array4, 0).unwrap()
|
||||
);
|
||||
|
||||
// test list array
|
||||
let data = vec![
|
||||
Some(vec![Some(1i32), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i32>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ListArray<i32> = arrow_array.into();
|
||||
|
||||
let v0 = arrow_array_get(&arrow_array, 0).unwrap();
|
||||
match v0 {
|
||||
Value::List(list) => {
|
||||
assert!(matches!(list.datatype(), ConcreteDataType::Int32(_)));
|
||||
let items = list.items().as_ref().unwrap();
|
||||
assert_eq!(
|
||||
**items,
|
||||
vec![Value::Int32(1), Value::Int32(2), Value::Int32(3)]
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
assert_eq!(Value::Null, arrow_array_get(&arrow_array, 1).unwrap());
|
||||
let v2 = arrow_array_get(&arrow_array, 2).unwrap();
|
||||
match v2 {
|
||||
Value::List(list) => {
|
||||
assert!(matches!(list.datatype(), ConcreteDataType::Int32(_)));
|
||||
let items = list.items().as_ref().unwrap();
|
||||
assert_eq!(**items, vec![Value::Int32(4), Value::Null, Value::Int32(6)]);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
pub type BinaryArray = arrow::array::LargeBinaryArray;
|
||||
pub type MutableBinaryArray = arrow::array::LargeBinaryBuilder;
|
||||
pub type StringArray = arrow::array::StringArray;
|
||||
pub type MutableStringArray = arrow::array::StringBuilder;
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -23,13 +23,14 @@ use crate::error::{self, Error, Result};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{
|
||||
BinaryType, BooleanType, DateTimeType, DateType, Float32Type, Float64Type, Int16Type,
|
||||
Int32Type, Int64Type, Int8Type, ListType, NullType, StringType, TimestampType, UInt16Type,
|
||||
UInt32Type, UInt64Type, UInt8Type,
|
||||
Int32Type, Int64Type, Int8Type, ListType, NullType, StringType, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::MutableVector;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[enum_dispatch::enum_dispatch(DataType)]
|
||||
pub enum ConcreteDataType {
|
||||
Null(NullType),
|
||||
@@ -47,17 +48,21 @@ pub enum ConcreteDataType {
|
||||
Float32(Float32Type),
|
||||
Float64(Float64Type),
|
||||
|
||||
// String types
|
||||
// String types:
|
||||
Binary(BinaryType),
|
||||
String(StringType),
|
||||
|
||||
// Date types:
|
||||
Date(DateType),
|
||||
DateTime(DateTimeType),
|
||||
Timestamp(TimestampType),
|
||||
|
||||
// Compound types:
|
||||
List(ListType),
|
||||
}
|
||||
|
||||
// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
|
||||
// returning all these properties to the `DataType` trait
|
||||
impl ConcreteDataType {
|
||||
pub fn is_float(&self) -> bool {
|
||||
matches!(
|
||||
@@ -70,7 +75,7 @@ impl ConcreteDataType {
|
||||
matches!(self, ConcreteDataType::Boolean(_))
|
||||
}
|
||||
|
||||
pub fn stringifiable(&self) -> bool {
|
||||
pub fn is_stringifiable(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::String(_)
|
||||
@@ -103,13 +108,6 @@ impl ConcreteDataType {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_timestamp(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::Timestamp(_) | ConcreteDataType::Int64(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn numerics() -> Vec<ConcreteDataType> {
|
||||
vec![
|
||||
ConcreteDataType::int8_datatype(),
|
||||
@@ -136,6 +134,14 @@ impl ConcreteDataType {
|
||||
pub fn is_null(&self) -> bool {
|
||||
matches!(self, ConcreteDataType::Null(NullType))
|
||||
}
|
||||
|
||||
/// Try to cast the type as a [`ListType`].
|
||||
pub fn as_list(&self) -> Option<&ListType> {
|
||||
match self {
|
||||
ConcreteDataType::List(t) => Some(t),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ArrowDataType> for ConcreteDataType {
|
||||
@@ -161,7 +167,7 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType {
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
|
||||
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => Self::string_datatype(),
|
||||
ArrowDataType::List(field) => Self::List(ListType::new(
|
||||
ConcreteDataType::from_arrow_type(&field.data_type),
|
||||
ConcreteDataType::from_arrow_type(field.data_type()),
|
||||
)),
|
||||
_ => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
@@ -191,38 +197,52 @@ macro_rules! impl_new_concrete_type_functions {
|
||||
|
||||
impl_new_concrete_type_functions!(
|
||||
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
|
||||
Binary, String, Date, DateTime
|
||||
Binary, Date, DateTime, String
|
||||
);
|
||||
|
||||
impl ConcreteDataType {
|
||||
pub fn list_datatype(inner_type: ConcreteDataType) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(inner_type))
|
||||
pub fn timestamp_second_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType::default()))
|
||||
}
|
||||
|
||||
pub fn timestamp_millisecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Millisecond(
|
||||
TimestampMillisecondType::default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timestamp_microsecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Microsecond(
|
||||
TimestampMicrosecondType::default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timestamp_nanosecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType::default()))
|
||||
}
|
||||
|
||||
pub fn timestamp_datatype(unit: TimeUnit) -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::new(unit))
|
||||
}
|
||||
|
||||
pub fn timestamp_millis_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::new(TimeUnit::Millisecond))
|
||||
match unit {
|
||||
TimeUnit::Second => Self::timestamp_second_datatype(),
|
||||
TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
|
||||
TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
|
||||
TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts from arrow timestamp unit to
|
||||
// TODO(hl): maybe impl From<ArrowTimestamp> for our timestamp ?
|
||||
pub fn from_arrow_time_unit(t: &arrow::datatypes::TimeUnit) -> Self {
|
||||
pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
|
||||
match t {
|
||||
arrow::datatypes::TimeUnit::Second => Self::timestamp_datatype(TimeUnit::Second),
|
||||
arrow::datatypes::TimeUnit::Millisecond => {
|
||||
Self::timestamp_datatype(TimeUnit::Millisecond)
|
||||
}
|
||||
arrow::datatypes::TimeUnit::Microsecond => {
|
||||
Self::timestamp_datatype(TimeUnit::Microsecond)
|
||||
}
|
||||
arrow::datatypes::TimeUnit::Nanosecond => {
|
||||
Self::timestamp_datatype(TimeUnit::Nanosecond)
|
||||
}
|
||||
ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
|
||||
ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
|
||||
ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
|
||||
ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(item_type))
|
||||
}
|
||||
}
|
||||
|
||||
/// Data type abstraction.
|
||||
@@ -237,11 +257,15 @@ pub trait DataType: std::fmt::Debug + Send + Sync {
|
||||
/// Returns the default value of this type.
|
||||
fn default_value(&self) -> Value;
|
||||
|
||||
/// Convert this type as [arrow2::datatypes::DataType].
|
||||
/// Convert this type as [arrow::datatypes::DataType].
|
||||
fn as_arrow_type(&self) -> ArrowDataType;
|
||||
|
||||
/// Create a mutable vector with given `capacity` of this type.
|
||||
/// Creates a mutable vector with given `capacity` of this type.
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
|
||||
|
||||
/// Returns true if the data type is compatible with timestamp type so we can
|
||||
/// use it as a timestamp.
|
||||
fn is_timestamp_compatible(&self) -> bool;
|
||||
}
|
||||
|
||||
pub type DataTypeRef = Arc<dyn DataType>;
|
||||
@@ -324,10 +348,6 @@ mod tests {
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
|
||||
ConcreteDataType::String(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
|
||||
ConcreteDataType::String(_)
|
||||
));
|
||||
assert_eq!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::List(Box::new(Field::new(
|
||||
"item",
|
||||
@@ -345,31 +365,48 @@ mod tests {
|
||||
#[test]
|
||||
fn test_from_arrow_timestamp() {
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Millisecond)
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Microsecond)
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Nanosecond)
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Second),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Second)
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_timestamp() {
|
||||
assert!(ConcreteDataType::timestamp_millis_datatype().is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp());
|
||||
assert!(ConcreteDataType::int64_datatype().is_timestamp());
|
||||
fn test_is_timestamp_compatible() {
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp_compatible());
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::int64_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::null_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::binary_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::boolean_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::date_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::string_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::uint64_datatype().is_timestamp_compatible());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -377,4 +414,91 @@ mod tests {
|
||||
assert!(ConcreteDataType::null_datatype().is_null());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_float() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_float());
|
||||
assert!(ConcreteDataType::float32_datatype().is_float());
|
||||
assert!(ConcreteDataType::float64_datatype().is_float());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_boolean() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_boolean());
|
||||
assert!(!ConcreteDataType::float32_datatype().is_boolean());
|
||||
assert!(ConcreteDataType::boolean_datatype().is_boolean());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_stringifiable() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
|
||||
assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::string_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::date_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_signed() {
|
||||
assert!(ConcreteDataType::int8_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int16_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int32_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int64_datatype().is_signed());
|
||||
assert!(ConcreteDataType::date_datatype().is_signed());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
|
||||
|
||||
assert!(!ConcreteDataType::uint8_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint16_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint32_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint64_datatype().is_signed());
|
||||
|
||||
assert!(!ConcreteDataType::float32_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::float64_datatype().is_signed());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_unsigned() {
|
||||
assert!(!ConcreteDataType::int8_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int16_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int64_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::date_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
|
||||
|
||||
assert!(ConcreteDataType::uint8_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint16_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint32_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint64_datatype().is_unsigned());
|
||||
|
||||
assert!(!ConcreteDataType::float32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::float64_datatype().is_unsigned());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_numerics() {
|
||||
let nums = ConcreteDataType::numerics();
|
||||
assert_eq!(10, nums.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_as_list() {
|
||||
let list_type = ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype());
|
||||
assert_eq!(
|
||||
ListType::new(ConcreteDataType::int32_datatype()),
|
||||
*list_type.as_list().unwrap()
|
||||
);
|
||||
assert!(ConcreteDataType::int32_datatype().as_list().is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -99,6 +99,12 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Duplicated metadata for {}", key))]
|
||||
DuplicateMeta { key: String, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("Failed to convert value into scalar value, reason: {}", reason))]
|
||||
ToScalarValue {
|
||||
reason: String,
|
||||
backtrace: Backtrace,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
|
||||
@@ -20,9 +20,10 @@ pub mod data_type;
|
||||
pub mod error;
|
||||
pub mod macros;
|
||||
pub mod prelude;
|
||||
mod scalars;
|
||||
pub mod scalars;
|
||||
pub mod schema;
|
||||
pub mod serialize;
|
||||
pub mod timestamp;
|
||||
pub mod type_id;
|
||||
pub mod types;
|
||||
pub mod value;
|
||||
|
||||
@@ -12,27 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
///! Some helper macros for datatypes, copied from databend.
|
||||
#[macro_export]
|
||||
macro_rules! for_all_scalar_types {
|
||||
($macro:tt $(, $x:tt)*) => {
|
||||
$macro! {
|
||||
[$($x),*],
|
||||
{ i8 },
|
||||
{ i16 },
|
||||
{ i32 },
|
||||
{ i64 },
|
||||
{ u8 },
|
||||
{ u16 },
|
||||
{ u32 },
|
||||
{ u64 },
|
||||
{ f32 },
|
||||
{ f64 },
|
||||
{ bool },
|
||||
}
|
||||
};
|
||||
}
|
||||
//! Some helper macros for datatypes, copied from databend.
|
||||
|
||||
/// Apply the macro rules to all primitive types.
|
||||
#[macro_export]
|
||||
macro_rules! for_all_primitive_types {
|
||||
($macro:tt $(, $x:tt)*) => {
|
||||
@@ -52,6 +34,8 @@ macro_rules! for_all_primitive_types {
|
||||
};
|
||||
}
|
||||
|
||||
/// Match the logical type and apply `$body` to all primitive types and
|
||||
/// `nbody` to other types.
|
||||
#[macro_export]
|
||||
macro_rules! with_match_primitive_type_id {
|
||||
($key_type:expr, | $_:tt $T:ident | $body:tt, $nbody:tt) => {{
|
||||
@@ -62,17 +46,21 @@ macro_rules! with_match_primitive_type_id {
|
||||
}
|
||||
|
||||
use $crate::type_id::LogicalTypeId;
|
||||
use $crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
|
||||
UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
match $key_type {
|
||||
LogicalTypeId::Int8 => __with_ty__! { i8 },
|
||||
LogicalTypeId::Int16 => __with_ty__! { i16 },
|
||||
LogicalTypeId::Int32 => __with_ty__! { i32 },
|
||||
LogicalTypeId::Int64 => __with_ty__! { i64 },
|
||||
LogicalTypeId::UInt8 => __with_ty__! { u8 },
|
||||
LogicalTypeId::UInt16 => __with_ty__! { u16 },
|
||||
LogicalTypeId::UInt32 => __with_ty__! { u32 },
|
||||
LogicalTypeId::UInt64 => __with_ty__! { u64 },
|
||||
LogicalTypeId::Float32 => __with_ty__! { f32 },
|
||||
LogicalTypeId::Float64 => __with_ty__! { f64 },
|
||||
LogicalTypeId::Int8 => __with_ty__! { Int8Type },
|
||||
LogicalTypeId::Int16 => __with_ty__! { Int16Type },
|
||||
LogicalTypeId::Int32 => __with_ty__! { Int32Type },
|
||||
LogicalTypeId::Int64 => __with_ty__! { Int64Type },
|
||||
LogicalTypeId::UInt8 => __with_ty__! { UInt8Type },
|
||||
LogicalTypeId::UInt16 => __with_ty__! { UInt16Type },
|
||||
LogicalTypeId::UInt32 => __with_ty__! { UInt32Type },
|
||||
LogicalTypeId::UInt64 => __with_ty__! { UInt64Type },
|
||||
LogicalTypeId::Float32 => __with_ty__! { Float32Type },
|
||||
LogicalTypeId::Float64 => __with_ty__! { Float64Type },
|
||||
|
||||
_ => $nbody,
|
||||
}
|
||||
|
||||
@@ -16,8 +16,6 @@ pub use crate::data_type::{ConcreteDataType, DataType, DataTypeRef};
|
||||
pub use crate::macros::*;
|
||||
pub use crate::scalars::{Scalar, ScalarRef, ScalarVector, ScalarVectorBuilder};
|
||||
pub use crate::type_id::LogicalTypeId;
|
||||
pub use crate::types::Primitive;
|
||||
pub use crate::types::{LogicalPrimitiveType, WrapperType};
|
||||
pub use crate::value::{Value, ValueRef};
|
||||
pub use crate::vectors::{
|
||||
Helper as VectorHelper, MutableVector, Validity, Vector, VectorBuilder, VectorRef,
|
||||
};
|
||||
pub use crate::vectors::{MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
@@ -14,11 +14,17 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::value::{ListValue, ListValueRef};
|
||||
use crate::vectors::*;
|
||||
use crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
|
||||
UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::{ListValue, ListValueRef, Value};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, ListVector, MutableVector,
|
||||
PrimitiveVector, StringVector, Vector,
|
||||
};
|
||||
|
||||
fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
|
||||
match iter.size_hint() {
|
||||
@@ -35,7 +41,7 @@ where
|
||||
for<'a> Self::VectorType: ScalarVector<RefItem<'a> = Self::RefType<'a>>,
|
||||
{
|
||||
type VectorType: ScalarVector<OwnedItem = Self>;
|
||||
type RefType<'a>: ScalarRef<'a, ScalarType = Self, VectorType = Self::VectorType>
|
||||
type RefType<'a>: ScalarRef<'a, ScalarType = Self>
|
||||
where
|
||||
Self: 'a;
|
||||
/// Get a reference of the current value.
|
||||
@@ -46,7 +52,6 @@ where
|
||||
}
|
||||
|
||||
pub trait ScalarRef<'a>: std::fmt::Debug + Clone + Copy + Send + 'a {
|
||||
type VectorType: ScalarVector<RefItem<'a> = Self>;
|
||||
/// The corresponding [`Scalar`] type.
|
||||
type ScalarType: Scalar<RefType<'a> = Self>;
|
||||
|
||||
@@ -63,7 +68,7 @@ where
|
||||
{
|
||||
type OwnedItem: Scalar<VectorType = Self>;
|
||||
/// The reference item of this vector.
|
||||
type RefItem<'a>: ScalarRef<'a, ScalarType = Self::OwnedItem, VectorType = Self>
|
||||
type RefItem<'a>: ScalarRef<'a, ScalarType = Self::OwnedItem>
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
@@ -137,47 +142,46 @@ pub trait ScalarVectorBuilder: MutableVector {
|
||||
fn finish(&mut self) -> Self::VectorType;
|
||||
}
|
||||
|
||||
macro_rules! impl_primitive_scalar_type {
|
||||
($native:ident) => {
|
||||
impl Scalar for $native {
|
||||
type VectorType = PrimitiveVector<$native>;
|
||||
type RefType<'a> = $native;
|
||||
macro_rules! impl_scalar_for_native {
|
||||
($Native: ident, $DataType: ident) => {
|
||||
impl Scalar for $Native {
|
||||
type VectorType = PrimitiveVector<$DataType>;
|
||||
type RefType<'a> = $Native;
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> $native {
|
||||
fn as_scalar_ref(&self) -> $Native {
|
||||
*self
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: $native) -> $native {
|
||||
fn upcast_gat<'short, 'long: 'short>(long: $Native) -> $Native {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement [`ScalarRef`] for primitive types. Note that primitive types are both [`Scalar`] and [`ScalarRef`].
|
||||
impl<'a> ScalarRef<'a> for $native {
|
||||
type VectorType = PrimitiveVector<$native>;
|
||||
type ScalarType = $native;
|
||||
impl<'a> ScalarRef<'a> for $Native {
|
||||
type ScalarType = $Native;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> $native {
|
||||
fn to_owned_scalar(&self) -> $Native {
|
||||
*self
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_primitive_scalar_type!(u8);
|
||||
impl_primitive_scalar_type!(u16);
|
||||
impl_primitive_scalar_type!(u32);
|
||||
impl_primitive_scalar_type!(u64);
|
||||
impl_primitive_scalar_type!(i8);
|
||||
impl_primitive_scalar_type!(i16);
|
||||
impl_primitive_scalar_type!(i32);
|
||||
impl_primitive_scalar_type!(i64);
|
||||
impl_primitive_scalar_type!(f32);
|
||||
impl_primitive_scalar_type!(f64);
|
||||
impl_scalar_for_native!(u8, UInt8Type);
|
||||
impl_scalar_for_native!(u16, UInt16Type);
|
||||
impl_scalar_for_native!(u32, UInt32Type);
|
||||
impl_scalar_for_native!(u64, UInt64Type);
|
||||
impl_scalar_for_native!(i8, Int8Type);
|
||||
impl_scalar_for_native!(i16, Int16Type);
|
||||
impl_scalar_for_native!(i32, Int32Type);
|
||||
impl_scalar_for_native!(i64, Int64Type);
|
||||
impl_scalar_for_native!(f32, Float32Type);
|
||||
impl_scalar_for_native!(f64, Float64Type);
|
||||
|
||||
impl Scalar for bool {
|
||||
type VectorType = BooleanVector;
|
||||
@@ -196,7 +200,6 @@ impl Scalar for bool {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for bool {
|
||||
type VectorType = BooleanVector;
|
||||
type ScalarType = bool;
|
||||
|
||||
#[inline]
|
||||
@@ -221,7 +224,6 @@ impl Scalar for String {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for &'a str {
|
||||
type VectorType = StringVector;
|
||||
type ScalarType = String;
|
||||
|
||||
#[inline]
|
||||
@@ -246,7 +248,6 @@ impl Scalar for Vec<u8> {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for &'a [u8] {
|
||||
type VectorType = BinaryVector;
|
||||
type ScalarType = Vec<u8>;
|
||||
|
||||
#[inline]
|
||||
@@ -269,7 +270,6 @@ impl Scalar for Date {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for Date {
|
||||
type VectorType = DateVector;
|
||||
type ScalarType = Date;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
@@ -291,7 +291,6 @@ impl Scalar for DateTime {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for DateTime {
|
||||
type VectorType = DateTimeVector;
|
||||
type ScalarType = DateTime;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
@@ -299,27 +298,7 @@ impl<'a> ScalarRef<'a> for DateTime {
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for Timestamp {
|
||||
type VectorType = TimestampVector;
|
||||
type RefType<'a> = Timestamp;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for Timestamp {
|
||||
type VectorType = TimestampVector;
|
||||
type ScalarType = Timestamp;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.
|
||||
|
||||
impl Scalar for ListValue {
|
||||
type VectorType = ListVector;
|
||||
@@ -335,7 +314,6 @@ impl Scalar for ListValue {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for ListValueRef<'a> {
|
||||
type VectorType = ListVector;
|
||||
type ScalarType = ListValue;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
@@ -357,8 +335,9 @@ impl<'a> ScalarRef<'a> for ListValueRef<'a> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::vectors::binary::BinaryVector;
|
||||
use crate::vectors::primitive::Int32Vector;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::timestamp::TimestampSecond;
|
||||
use crate::vectors::{BinaryVector, Int32Vector, ListVectorBuilder, TimestampSecondVector};
|
||||
|
||||
fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
|
||||
let mut builder = T::Builder::with_capacity(items.len());
|
||||
@@ -454,11 +433,11 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_build_timestamp_vector() {
|
||||
let expect: Vec<Option<Timestamp>> = vec![Some(10.into()), None, Some(42.into())];
|
||||
let vector: TimestampVector = build_vector_from_slice(&expect);
|
||||
let expect: Vec<Option<TimestampSecond>> = vec![Some(10.into()), None, Some(42.into())];
|
||||
let vector: TimestampSecondVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
let val = vector.get_data(0).unwrap();
|
||||
assert_eq!(val, val.as_scalar_ref());
|
||||
assert_eq!(10, val.to_owned_scalar().value());
|
||||
assert_eq!(TimestampSecond::from(10), val.to_owned_scalar());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,129 +12,28 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod column_schema;
|
||||
mod constraint;
|
||||
mod raw;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use arrow::datatypes::Metadata;
|
||||
use arrow::datatypes::{Field, Schema as ArrowSchema};
|
||||
use datafusion_common::DFSchemaRef;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, DeserializeSnafu, Error, Result, SerializeSnafu};
|
||||
use crate::data_type::DataType;
|
||||
use crate::error::{self, Error, Result};
|
||||
pub use crate::schema::column_schema::{ColumnSchema, Metadata};
|
||||
pub use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
pub use crate::schema::raw::RawSchema;
|
||||
use crate::vectors::VectorRef;
|
||||
|
||||
/// Key used to store whether the column is time index in arrow field's metadata.
|
||||
const TIME_INDEX_KEY: &str = "greptime:time_index";
|
||||
/// Key used to store version number of the schema in metadata.
|
||||
const VERSION_KEY: &str = "greptime:version";
|
||||
/// Key used to store default constraint in arrow field's metadata.
|
||||
const ARROW_FIELD_DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
|
||||
|
||||
/// Schema of a column, used as an immutable struct.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ColumnSchema {
|
||||
pub name: String,
|
||||
pub data_type: ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
is_time_index: bool,
|
||||
default_constraint: Option<ColumnDefaultConstraint>,
|
||||
metadata: Metadata,
|
||||
}
|
||||
|
||||
impl ColumnSchema {
|
||||
pub fn new<T: Into<String>>(
|
||||
name: T,
|
||||
data_type: ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
) -> ColumnSchema {
|
||||
ColumnSchema {
|
||||
name: name.into(),
|
||||
data_type,
|
||||
is_nullable,
|
||||
is_time_index: false,
|
||||
default_constraint: None,
|
||||
metadata: Metadata::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_time_index(&self) -> bool {
|
||||
self.is_time_index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_nullable(&self) -> bool {
|
||||
self.is_nullable
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn default_constraint(&self) -> Option<&ColumnDefaultConstraint> {
|
||||
self.default_constraint.as_ref()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn metadata(&self) -> &Metadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
pub fn with_time_index(mut self, is_time_index: bool) -> Self {
|
||||
self.is_time_index = is_time_index;
|
||||
if is_time_index {
|
||||
self.metadata
|
||||
.insert(TIME_INDEX_KEY.to_string(), "true".to_string());
|
||||
} else {
|
||||
self.metadata.remove(TIME_INDEX_KEY);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_default_constraint(
|
||||
mut self,
|
||||
default_constraint: Option<ColumnDefaultConstraint>,
|
||||
) -> Result<Self> {
|
||||
if let Some(constraint) = &default_constraint {
|
||||
constraint.validate(&self.data_type, self.is_nullable)?;
|
||||
}
|
||||
|
||||
self.default_constraint = default_constraint;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Creates a new [`ColumnSchema`] with given metadata.
|
||||
pub fn with_metadata(mut self, metadata: Metadata) -> Self {
|
||||
self.metadata = metadata;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn create_default_vector(&self, num_rows: usize) -> Result<Option<VectorRef>> {
|
||||
match &self.default_constraint {
|
||||
Some(c) => c
|
||||
.create_default_vector(&self.data_type, self.is_nullable, num_rows)
|
||||
.map(Some),
|
||||
None => {
|
||||
if self.is_nullable {
|
||||
// No default constraint, use null as default value.
|
||||
// TODO(yingwen): Use NullVector once it supports setting logical type.
|
||||
ColumnDefaultConstraint::null_value()
|
||||
.create_default_vector(&self.data_type, self.is_nullable, num_rows)
|
||||
.map(Some)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A common schema, should be immutable.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Schema {
|
||||
column_schemas: Vec<ColumnSchema>,
|
||||
name_to_index: HashMap<String, usize>,
|
||||
@@ -232,7 +131,7 @@ impl Schema {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn metadata(&self) -> &Metadata {
|
||||
pub fn metadata(&self) -> &HashMap<String, String> {
|
||||
&self.arrow_schema.metadata
|
||||
}
|
||||
}
|
||||
@@ -244,7 +143,7 @@ pub struct SchemaBuilder {
|
||||
fields: Vec<Field>,
|
||||
timestamp_index: Option<usize>,
|
||||
version: u32,
|
||||
metadata: Metadata,
|
||||
metadata: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<ColumnSchema>> for SchemaBuilder {
|
||||
@@ -293,7 +192,7 @@ impl SchemaBuilder {
|
||||
self.metadata
|
||||
.insert(VERSION_KEY.to_string(), self.version.to_string());
|
||||
|
||||
let arrow_schema = ArrowSchema::from(self.fields).with_metadata(self.metadata);
|
||||
let arrow_schema = ArrowSchema::new(self.fields).with_metadata(self.metadata);
|
||||
|
||||
Ok(Schema {
|
||||
column_schemas: self.column_schemas,
|
||||
@@ -348,7 +247,7 @@ fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: us
|
||||
|
||||
let column_schema = &column_schemas[timestamp_index];
|
||||
ensure!(
|
||||
column_schema.data_type.is_timestamp(),
|
||||
column_schema.data_type.is_timestamp_compatible(),
|
||||
error::InvalidTimestampIndexSnafu {
|
||||
index: timestamp_index,
|
||||
}
|
||||
@@ -365,58 +264,6 @@ fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: us
|
||||
|
||||
pub type SchemaRef = Arc<Schema>;
|
||||
|
||||
impl TryFrom<&Field> for ColumnSchema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(field: &Field) -> Result<ColumnSchema> {
|
||||
let data_type = ConcreteDataType::try_from(&field.data_type)?;
|
||||
let mut metadata = field.metadata.clone();
|
||||
let default_constraint = match metadata.remove(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY) {
|
||||
Some(json) => Some(serde_json::from_str(&json).context(DeserializeSnafu { json })?),
|
||||
None => None,
|
||||
};
|
||||
let is_time_index = metadata.contains_key(TIME_INDEX_KEY);
|
||||
|
||||
Ok(ColumnSchema {
|
||||
name: field.name.clone(),
|
||||
data_type,
|
||||
is_nullable: field.is_nullable,
|
||||
is_time_index,
|
||||
default_constraint,
|
||||
metadata,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ColumnSchema> for Field {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(column_schema: &ColumnSchema) -> Result<Field> {
|
||||
let mut metadata = column_schema.metadata.clone();
|
||||
if let Some(value) = &column_schema.default_constraint {
|
||||
// Adds an additional metadata to store the default constraint.
|
||||
let old = metadata.insert(
|
||||
ARROW_FIELD_DEFAULT_CONSTRAINT_KEY.to_string(),
|
||||
serde_json::to_string(&value).context(SerializeSnafu)?,
|
||||
);
|
||||
|
||||
ensure!(
|
||||
old.is_none(),
|
||||
error::DuplicateMetaSnafu {
|
||||
key: ARROW_FIELD_DEFAULT_CONSTRAINT_KEY,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Field::new(
|
||||
column_schema.name.clone(),
|
||||
column_schema.data_type.as_arrow_type(),
|
||||
column_schema.is_nullable(),
|
||||
)
|
||||
.with_metadata(metadata))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Arc<ArrowSchema>> for Schema {
|
||||
type Error = Error;
|
||||
|
||||
@@ -425,7 +272,7 @@ impl TryFrom<Arc<ArrowSchema>> for Schema {
|
||||
let mut name_to_index = HashMap::with_capacity(arrow_schema.fields.len());
|
||||
for field in &arrow_schema.fields {
|
||||
let column_schema = ColumnSchema::try_from(field)?;
|
||||
name_to_index.insert(field.name.clone(), column_schemas.len());
|
||||
name_to_index.insert(field.name().to_string(), column_schemas.len());
|
||||
column_schemas.push(column_schema);
|
||||
}
|
||||
|
||||
@@ -475,7 +322,7 @@ impl TryFrom<DFSchemaRef> for Schema {
|
||||
}
|
||||
}
|
||||
|
||||
fn try_parse_version(metadata: &Metadata, key: &str) -> Result<u32> {
|
||||
fn try_parse_version(metadata: &HashMap<String, String>, key: &str) -> Result<u32> {
|
||||
if let Some(value) = metadata.get(key) {
|
||||
let version = value
|
||||
.parse()
|
||||
@@ -489,127 +336,8 @@ fn try_parse_version(metadata: &Metadata, key: &str) -> Result<u32> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use super::*;
|
||||
use crate::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_column_schema() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("test", field.name);
|
||||
assert_eq!(ArrowDataType::Int32, field.data_type);
|
||||
assert!(field.is_nullable);
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_default_constraint() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::from(99))))
|
||||
.unwrap();
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.is_none());
|
||||
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("test", field.name);
|
||||
assert_eq!(ArrowDataType::Int32, field.data_type);
|
||||
assert!(field.is_nullable);
|
||||
assert_eq!(
|
||||
"{\"Value\":{\"Int32\":99}}",
|
||||
field
|
||||
.metadata
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_metadata() {
|
||||
let mut metadata = Metadata::new();
|
||||
metadata.insert("k1".to_string(), "v1".to_string());
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.is_none());
|
||||
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("v1", field.metadata.get("k1").unwrap());
|
||||
assert!(field
|
||||
.metadata
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.is_some());
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_duplicate_metadata() {
|
||||
let mut metadata = Metadata::new();
|
||||
metadata.insert(
|
||||
ARROW_FIELD_DEFAULT_CONSTRAINT_KEY.to_string(),
|
||||
"v1".to_string(),
|
||||
);
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
Field::try_from(&column_schema).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_invalid_default_constraint() {
|
||||
ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_default_constraint_try_into_from() {
|
||||
let default_constraint = ColumnDefaultConstraint::Value(Value::from(42i64));
|
||||
|
||||
let bytes: Vec<u8> = default_constraint.clone().try_into().unwrap();
|
||||
let from_value = ColumnDefaultConstraint::try_from(&bytes[..]).unwrap();
|
||||
|
||||
assert_eq!(default_constraint, from_value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_create_default_null() {
|
||||
// Implicit default null.
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
|
||||
let v = column_schema.create_default_vector(5).unwrap().unwrap();
|
||||
assert_eq!(5, v.len());
|
||||
assert!(v.only_null());
|
||||
|
||||
// Explicit default null.
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
let v = column_schema.create_default_vector(5).unwrap().unwrap();
|
||||
assert_eq!(5, v.len());
|
||||
assert!(v.only_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_no_default() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
|
||||
assert!(column_schema.create_default_vector(5).unwrap().is_none());
|
||||
}
|
||||
use crate::data_type::ConcreteDataType;
|
||||
|
||||
#[test]
|
||||
fn test_build_empty_schema() {
|
||||
@@ -664,8 +392,12 @@ mod tests {
|
||||
fn test_schema_with_timestamp() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas.clone())
|
||||
.unwrap()
|
||||
|
||||
@@ -22,7 +22,7 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{Int64Vector, TimestampVector, VectorRef};
|
||||
use crate::vectors::{Int64Vector, TimestampMillisecondVector, VectorRef};
|
||||
|
||||
const CURRENT_TIMESTAMP: &str = "current_timestamp()";
|
||||
|
||||
@@ -81,7 +81,7 @@ impl ColumnDefaultConstraint {
|
||||
error::UnsupportedDefaultExprSnafu { expr }
|
||||
);
|
||||
ensure!(
|
||||
data_type.is_timestamp(),
|
||||
data_type.is_timestamp_compatible(),
|
||||
error::DefaultValueTypeSnafu {
|
||||
reason: "return value of the function must has timestamp type",
|
||||
}
|
||||
@@ -162,8 +162,10 @@ fn create_current_timestamp_vector(
|
||||
data_type: &ConcreteDataType,
|
||||
num_rows: usize,
|
||||
) -> Result<VectorRef> {
|
||||
// FIXME(yingwen): We should implements cast in VectorOp so we could cast the millisecond vector
|
||||
// to other data type and avoid this match.
|
||||
match data_type {
|
||||
ConcreteDataType::Timestamp(_) => Ok(Arc::new(TimestampVector::from_values(
|
||||
ConcreteDataType::Timestamp(_) => Ok(Arc::new(TimestampMillisecondVector::from_values(
|
||||
std::iter::repeat(util::current_time_millis()).take(num_rows),
|
||||
))),
|
||||
ConcreteDataType::Int64(_) => Ok(Arc::new(Int64Vector::from_values(
|
||||
@@ -217,7 +219,7 @@ mod tests {
|
||||
fn test_validate_function_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
constraint
|
||||
.validate(&ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.validate(&ConcreteDataType::timestamp_millisecond_datatype(), false)
|
||||
.unwrap();
|
||||
constraint
|
||||
.validate(&ConcreteDataType::boolean_datatype(), false)
|
||||
@@ -225,7 +227,7 @@ mod tests {
|
||||
|
||||
let constraint = ColumnDefaultConstraint::Function("hello()".to_string());
|
||||
constraint
|
||||
.validate(&ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.validate(&ConcreteDataType::timestamp_millisecond_datatype(), false)
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
@@ -262,7 +264,7 @@ mod tests {
|
||||
fn test_create_default_vector_by_func() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
// Timestamp type.
|
||||
let data_type = ConcreteDataType::timestamp_millis_datatype();
|
||||
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap();
|
||||
@@ -286,7 +288,7 @@ mod tests {
|
||||
);
|
||||
|
||||
let constraint = ColumnDefaultConstraint::Function("no".to_string());
|
||||
let data_type = ConcreteDataType::timestamp_millis_datatype();
|
||||
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
|
||||
constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap_err();
|
||||
|
||||
@@ -20,7 +20,7 @@ use crate::schema::{ColumnSchema, Schema, SchemaBuilder};
|
||||
/// Struct used to serialize and deserialize [`Schema`](crate::schema::Schema).
|
||||
///
|
||||
/// This struct only contains necessary data to recover the Schema.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct RawSchema {
|
||||
pub column_schemas: Vec<ColumnSchema>,
|
||||
pub timestamp_index: Option<usize>,
|
||||
@@ -56,8 +56,12 @@ mod tests {
|
||||
fn test_raw_convert() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
|
||||
@@ -104,6 +104,12 @@ macro_rules! define_timestamp_with_unit {
|
||||
[<Timestamp $unit>]::from_native(val)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[<Timestamp $unit>]> for i64{
|
||||
fn from(val: [<Timestamp $unit>]) -> Self {
|
||||
val.0.value()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -117,6 +123,18 @@ define_timestamp_with_unit!(Nanosecond);
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_to_serde_json_value() {
|
||||
let ts = TimestampSecond::new(123);
|
||||
let val = serde_json::Value::from(ts);
|
||||
match val {
|
||||
serde_json::Value::String(s) => {
|
||||
assert_eq!("1970-01-01 00:02:03+0000", s);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_scalar() {
|
||||
let ts = TimestampSecond::new(123);
|
||||
@@ -42,7 +42,10 @@ pub enum LogicalTypeId {
|
||||
/// seconds/milliseconds/microseconds/nanoseconds, determined by precision.
|
||||
DateTime,
|
||||
|
||||
Timestamp,
|
||||
TimestampSecond,
|
||||
TimestampMillisecond,
|
||||
TimestampMicrosecond,
|
||||
TimestampNanosecond,
|
||||
|
||||
List,
|
||||
}
|
||||
@@ -74,7 +77,14 @@ impl LogicalTypeId {
|
||||
LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
|
||||
LogicalTypeId::Date => ConcreteDataType::date_datatype(),
|
||||
LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
|
||||
LogicalTypeId::Timestamp => ConcreteDataType::timestamp_millis_datatype(), // to timestamp type with default time unit
|
||||
LogicalTypeId::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
LogicalTypeId::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
}
|
||||
LogicalTypeId::TimestampMicrosecond => {
|
||||
ConcreteDataType::timestamp_microsecond_datatype()
|
||||
}
|
||||
LogicalTypeId::TimestampNanosecond => ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
LogicalTypeId::List => {
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
|
||||
}
|
||||
|
||||
@@ -14,25 +14,27 @@
|
||||
|
||||
mod binary_type;
|
||||
mod boolean_type;
|
||||
mod date;
|
||||
mod datetime;
|
||||
mod date_type;
|
||||
mod datetime_type;
|
||||
mod list_type;
|
||||
mod null_type;
|
||||
mod primitive_traits;
|
||||
mod primitive_type;
|
||||
mod string_type;
|
||||
mod timestamp;
|
||||
|
||||
mod timestamp_type;
|
||||
|
||||
pub use binary_type::BinaryType;
|
||||
pub use boolean_type::BooleanType;
|
||||
pub use date::DateType;
|
||||
pub use datetime::DateTimeType;
|
||||
pub use date_type::DateType;
|
||||
pub use datetime_type::DateTimeType;
|
||||
pub use list_type::ListType;
|
||||
pub use null_type::NullType;
|
||||
pub use primitive_traits::{OrdPrimitive, Primitive};
|
||||
pub use primitive_type::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, PrimitiveElement,
|
||||
PrimitiveType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LogicalPrimitiveType,
|
||||
NativeType, OrdPrimitive, UInt16Type, UInt32Type, UInt64Type, UInt8Type, WrapperType,
|
||||
};
|
||||
pub use string_type::StringType;
|
||||
pub use timestamp::TimestampType;
|
||||
pub use timestamp_type::{
|
||||
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
|
||||
TimestampSecondType, TimestampType,
|
||||
};
|
||||
|
||||
@@ -53,4 +53,8 @@ impl DataType for BinaryType {
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,4 +52,8 @@ impl DataType for BooleanType {
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BooleanVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{DataTypeRef, LogicalTypeId, Value};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::vectors::{DateVectorBuilder, MutableVector};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DateType;
|
||||
|
||||
impl DataType for DateType {
|
||||
fn name(&self) -> &str {
|
||||
"Date"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Date
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::Date(Default::default())
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Date32
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(DateVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
}
|
||||
|
||||
impl DateType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
@@ -59,6 +59,7 @@ impl LogicalPrimitiveType for DateType {
|
||||
type ArrowPrimitive = Date32Type;
|
||||
type Native = i32;
|
||||
type Wrapper = Date;
|
||||
type LargestType = Self;
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::date_datatype()
|
||||
@@ -1,61 +0,0 @@
|
||||
// Copyright 2022 Greptime Team
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::prelude::{LogicalTypeId, Value};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::vectors::{DateTimeVectorBuilder, MutableVector};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DateTimeType;
|
||||
|
||||
const DATE_TIME_TYPE_NAME: &str = "DateTime";
|
||||
|
||||
/// [DateTimeType] represents the seconds elapsed since UNIX EPOCH.
|
||||
impl DataType for DateTimeType {
|
||||
fn name(&self) -> &str {
|
||||
DATE_TIME_TYPE_NAME
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::DateTime
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::DateTime(Default::default())
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Date64
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(DateTimeVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
}
|
||||
|
||||
impl DateTimeType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
|
||||
pub fn name() -> &'static str {
|
||||
DATE_TIME_TYPE_NAME
|
||||
}
|
||||
}
|
||||
@@ -57,6 +57,7 @@ impl LogicalPrimitiveType for DateTimeType {
|
||||
type ArrowPrimitive = Date64Type;
|
||||
type Native = i64;
|
||||
type Wrapper = DateTime;
|
||||
type LargestType = Self;
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::datetime_datatype()
|
||||
@@ -15,15 +15,17 @@
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Field};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::value::ListValue;
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::{ListValue, Value};
|
||||
use crate::vectors::{ListVectorBuilder, MutableVector};
|
||||
|
||||
/// Used to represent the List datatype.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ListType {
|
||||
/// The type of List's inner data.
|
||||
inner: Box<ConcreteDataType>,
|
||||
/// The type of List's item.
|
||||
// Use Box to avoid recursive dependency, as enum ConcreteDataType depends on ListType.
|
||||
item_type: Box<ConcreteDataType>,
|
||||
}
|
||||
|
||||
impl Default for ListType {
|
||||
@@ -33,11 +35,18 @@ impl Default for ListType {
|
||||
}
|
||||
|
||||
impl ListType {
|
||||
pub fn new(datatype: ConcreteDataType) -> Self {
|
||||
/// Create a new `ListType` whose item's data type is `item_type`.
|
||||
pub fn new(item_type: ConcreteDataType) -> Self {
|
||||
ListType {
|
||||
inner: Box::new(datatype),
|
||||
item_type: Box::new(item_type),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the item data type.
|
||||
#[inline]
|
||||
pub fn item_type(&self) -> &ConcreteDataType {
|
||||
&self.item_type
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for ListType {
|
||||
@@ -50,20 +59,24 @@ impl DataType for ListType {
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::List(ListValue::new(None, *self.inner.clone()))
|
||||
Value::List(ListValue::new(None, *self.item_type.clone()))
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
let field = Box::new(Field::new("item", self.inner.as_arrow_type(), true));
|
||||
let field = Box::new(Field::new("item", self.item_type.as_arrow_type(), true));
|
||||
ArrowDataType::List(field)
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(ListVectorBuilder::with_type_capacity(
|
||||
*self.inner.clone(),
|
||||
*self.item_type.clone(),
|
||||
capacity,
|
||||
))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -84,5 +97,6 @@ mod tests {
|
||||
ArrowDataType::List(Box::new(Field::new("item", ArrowDataType::Boolean, true))),
|
||||
t.as_arrow_type()
|
||||
);
|
||||
assert_eq!(ConcreteDataType::boolean_datatype(), *t.item_type());
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user