mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-03 20:02:54 +00:00
feat: Switch to datatypes2
This commit is contained in:
19
Cargo.lock
generated
19
Cargo.lock
generated
@@ -2043,25 +2043,6 @@ dependencies = [
|
||||
"snafu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "datatypes2"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"arrow",
|
||||
"common-base",
|
||||
"common-error",
|
||||
"common-time",
|
||||
"datafusion-common",
|
||||
"enum_dispatch",
|
||||
"num",
|
||||
"num-traits",
|
||||
"ordered-float 3.4.0",
|
||||
"paste",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"snafu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive-new"
|
||||
version = "0.5.9"
|
||||
|
||||
@@ -20,7 +20,6 @@ members = [
|
||||
"src/common/time",
|
||||
"src/datanode",
|
||||
"src/datatypes",
|
||||
"src/datatypes2",
|
||||
"src/frontend",
|
||||
"src/log-store",
|
||||
"src/meta-client",
|
||||
|
||||
@@ -9,11 +9,10 @@ default = []
|
||||
test = []
|
||||
|
||||
[dependencies]
|
||||
arrow = "26.0.0"
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion-common = "14.0.0"
|
||||
datafusion-common = "14.0"
|
||||
enum_dispatch = "0.3"
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
@@ -22,3 +21,4 @@ paste = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
arrow = "26.0"
|
||||
|
||||
@@ -12,13 +12,18 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::array::{self, Array, ListArray, PrimitiveArray};
|
||||
use arrow::array::{
|
||||
Array, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
|
||||
Int32Array, Int64Array, Int8Array, ListArray, UInt16Array, UInt32Array, UInt64Array,
|
||||
UInt8Array,
|
||||
};
|
||||
use arrow::datatypes::DataType;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{ConversionSnafu, Result};
|
||||
use crate::prelude::ConcreteDataType;
|
||||
use crate::value::{ListValue, Value};
|
||||
|
||||
pub type BinaryArray = arrow::array::LargeBinaryArray;
|
||||
@@ -36,6 +41,7 @@ macro_rules! cast_array {
|
||||
};
|
||||
}
|
||||
|
||||
// TODO(yingwen): Remove this function.
|
||||
pub fn arrow_array_get(array: &dyn Array, idx: usize) -> Result<Value> {
|
||||
if array.is_null(idx) {
|
||||
return Ok(Value::Null);
|
||||
@@ -43,42 +49,46 @@ pub fn arrow_array_get(array: &dyn Array, idx: usize) -> Result<Value> {
|
||||
|
||||
let result = match array.data_type() {
|
||||
DataType::Null => Value::Null,
|
||||
DataType::Boolean => Value::Boolean(cast_array!(array, array::BooleanArray).value(idx)),
|
||||
DataType::Binary | DataType::LargeBinary => {
|
||||
Value::Binary(cast_array!(array, BinaryArray).value(idx).into())
|
||||
}
|
||||
DataType::Int8 => Value::Int8(cast_array!(array, PrimitiveArray::<i8>).value(idx)),
|
||||
DataType::Int16 => Value::Int16(cast_array!(array, PrimitiveArray::<i16>).value(idx)),
|
||||
DataType::Int32 => Value::Int32(cast_array!(array, PrimitiveArray::<i32>).value(idx)),
|
||||
DataType::Int64 => Value::Int64(cast_array!(array, PrimitiveArray::<i64>).value(idx)),
|
||||
DataType::UInt8 => Value::UInt8(cast_array!(array, PrimitiveArray::<u8>).value(idx)),
|
||||
DataType::UInt16 => Value::UInt16(cast_array!(array, PrimitiveArray::<u16>).value(idx)),
|
||||
DataType::UInt32 => Value::UInt32(cast_array!(array, PrimitiveArray::<u32>).value(idx)),
|
||||
DataType::UInt64 => Value::UInt64(cast_array!(array, PrimitiveArray::<u64>).value(idx)),
|
||||
DataType::Float32 => {
|
||||
Value::Float32(cast_array!(array, PrimitiveArray::<f32>).value(idx).into())
|
||||
}
|
||||
DataType::Float64 => {
|
||||
Value::Float64(cast_array!(array, PrimitiveArray::<f64>).value(idx).into())
|
||||
}
|
||||
DataType::Utf8 | DataType::LargeUtf8 => {
|
||||
Value::String(cast_array!(array, StringArray).value(idx).into())
|
||||
}
|
||||
DataType::Timestamp(t, _) => {
|
||||
let value = cast_array!(array, PrimitiveArray::<i64>).value(idx);
|
||||
let unit = match ConcreteDataType::from_arrow_time_unit(t) {
|
||||
ConcreteDataType::Timestamp(t) => t.unit,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Value::Timestamp(Timestamp::new(value, unit))
|
||||
}
|
||||
DataType::Boolean => Value::Boolean(cast_array!(array, BooleanArray).value(idx)),
|
||||
DataType::Binary => Value::Binary(cast_array!(array, BinaryArray).value(idx).into()),
|
||||
DataType::Int8 => Value::Int8(cast_array!(array, Int8Array).value(idx)),
|
||||
DataType::Int16 => Value::Int16(cast_array!(array, Int16Array).value(idx)),
|
||||
DataType::Int32 => Value::Int32(cast_array!(array, Int32Array).value(idx)),
|
||||
DataType::Int64 => Value::Int64(cast_array!(array, Int64Array).value(idx)),
|
||||
DataType::UInt8 => Value::UInt8(cast_array!(array, UInt8Array).value(idx)),
|
||||
DataType::UInt16 => Value::UInt16(cast_array!(array, UInt16Array).value(idx)),
|
||||
DataType::UInt32 => Value::UInt32(cast_array!(array, UInt32Array).value(idx)),
|
||||
DataType::UInt64 => Value::UInt64(cast_array!(array, UInt64Array).value(idx)),
|
||||
DataType::Float32 => Value::Float32(cast_array!(array, Float32Array).value(idx).into()),
|
||||
DataType::Float64 => Value::Float64(cast_array!(array, Float64Array).value(idx).into()),
|
||||
DataType::Utf8 => Value::String(cast_array!(array, StringArray).value(idx).into()),
|
||||
DataType::Date32 => Value::Date(cast_array!(array, Date32Array).value(idx).into()),
|
||||
DataType::Date64 => Value::DateTime(cast_array!(array, Date64Array).value(idx).into()),
|
||||
DataType::Timestamp(t, _) => match t {
|
||||
arrow::datatypes::TimeUnit::Second => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampSecondArray).value(idx),
|
||||
TimeUnit::Second,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Millisecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampMillisecondArray).value(idx),
|
||||
TimeUnit::Millisecond,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Microsecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampMicrosecondArray).value(idx),
|
||||
TimeUnit::Microsecond,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Nanosecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampNanosecondArray).value(idx),
|
||||
TimeUnit::Nanosecond,
|
||||
)),
|
||||
},
|
||||
DataType::List(_) => {
|
||||
let array = cast_array!(array, ListArray::<i32>).value(idx);
|
||||
let inner_datatype = ConcreteDataType::try_from(array.data_type())?;
|
||||
let array = cast_array!(array, ListArray).value(idx);
|
||||
let item_type = ConcreteDataType::try_from(array.data_type())?;
|
||||
let values = (0..array.len())
|
||||
.map(|i| arrow_array_get(&*array, i))
|
||||
.collect::<Result<Vec<Value>>>()?;
|
||||
Value::List(ListValue::new(Some(Box::new(values)), inner_datatype))
|
||||
Value::List(ListValue::new(Some(Box::new(values)), item_type))
|
||||
}
|
||||
_ => unimplemented!("Arrow array datatype: {:?}", array.data_type()),
|
||||
};
|
||||
@@ -88,45 +98,74 @@ pub fn arrow_array_get(array: &dyn Array, idx: usize) -> Result<Value> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
|
||||
MutableListArray, MutablePrimitiveArray, TryExtend, UInt16Array, UInt32Array, UInt64Array,
|
||||
LargeBinaryArray, TimestampMicrosecondArray, TimestampMillisecondArray,
|
||||
TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array,
|
||||
UInt8Array,
|
||||
};
|
||||
use arrow::buffer::Buffer;
|
||||
use arrow::datatypes::{DataType, TimeUnit as ArrowTimeUnit};
|
||||
use arrow::datatypes::Int32Type;
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
use paste::paste;
|
||||
|
||||
use super::*;
|
||||
use crate::prelude::Vector;
|
||||
use crate::vectors::TimestampVector;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::types::TimestampType;
|
||||
|
||||
macro_rules! test_arrow_array_get_for_timestamps {
|
||||
( $($unit: ident), *) => {
|
||||
$(
|
||||
paste! {
|
||||
let mut builder = arrow::array::[<Timestamp $unit Array>]::builder(3);
|
||||
builder.append_value(1);
|
||||
builder.append_value(0);
|
||||
builder.append_value(-1);
|
||||
let ts_array = Arc::new(builder.finish()) as Arc<dyn Array>;
|
||||
let v = arrow_array_get(&ts_array, 1).unwrap();
|
||||
assert_eq!(
|
||||
ConcreteDataType::Timestamp(TimestampType::$unit(
|
||||
$crate::types::[<Timestamp $unit Type>]::default(),
|
||||
)),
|
||||
v.data_type()
|
||||
);
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_array() {
|
||||
test_arrow_array_get_for_timestamps![Second, Millisecond, Microsecond, Nanosecond];
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_arrow_array_access() {
|
||||
let array1 = BooleanArray::from_slice(vec![true, true, false, false]);
|
||||
let array1 = BooleanArray::from(vec![true, true, false, false]);
|
||||
assert_eq!(Value::Boolean(true), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int8Array::from_vec(vec![1, 2, 3, 4]);
|
||||
let array1 = Int8Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int8(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt8Array::from_vec(vec![1, 2, 3, 4]);
|
||||
let array1 = UInt8Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt8(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int16Array::from_vec(vec![1, 2, 3, 4]);
|
||||
let array1 = Int16Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int16(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt16Array::from_vec(vec![1, 2, 3, 4]);
|
||||
let array1 = UInt16Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt16(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int32Array::from_vec(vec![1, 2, 3, 4]);
|
||||
let array1 = Int32Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int32(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt32Array::from_vec(vec![1, 2, 3, 4]);
|
||||
let array1 = UInt32Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt32(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array = Int64Array::from_vec(vec![1, 2, 3, 4]);
|
||||
let array = Int64Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int64(2), arrow_array_get(&array, 1).unwrap());
|
||||
let array1 = UInt64Array::from_vec(vec![1, 2, 3, 4]);
|
||||
let array1 = UInt64Array::from(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt64(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Float32Array::from_vec(vec![1f32, 2f32, 3f32, 4f32]);
|
||||
let array1 = Float32Array::from(vec![1f32, 2f32, 3f32, 4f32]);
|
||||
assert_eq!(
|
||||
Value::Float32(2f32.into()),
|
||||
arrow_array_get(&array1, 1).unwrap()
|
||||
);
|
||||
let array1 = Float64Array::from_vec(vec![1f64, 2f64, 3f64, 4f64]);
|
||||
let array1 = Float64Array::from(vec![1f64, 2f64, 3f64, 4f64]);
|
||||
assert_eq!(
|
||||
Value::Float64(2f64.into()),
|
||||
arrow_array_get(&array1, 1).unwrap()
|
||||
@@ -139,55 +178,42 @@ mod test {
|
||||
);
|
||||
assert_eq!(Value::Null, arrow_array_get(&array2, 1).unwrap());
|
||||
|
||||
let array3 = super::BinaryArray::from(vec![
|
||||
let array3 = LargeBinaryArray::from(vec![
|
||||
Some("hello".as_bytes()),
|
||||
None,
|
||||
Some("world".as_bytes()),
|
||||
]);
|
||||
assert_eq!(
|
||||
Value::Binary("hello".as_bytes().into()),
|
||||
arrow_array_get(&array3, 0).unwrap()
|
||||
);
|
||||
assert_eq!(Value::Null, arrow_array_get(&array3, 1).unwrap());
|
||||
|
||||
let vector = TimestampVector::new(Int64Array::from_vec(vec![1, 2, 3, 4]));
|
||||
let array = vector.to_boxed_arrow_array();
|
||||
let value = arrow_array_get(&*array, 1).unwrap();
|
||||
let array = TimestampSecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(value, Value::Timestamp(Timestamp::new(2, TimeUnit::Second)));
|
||||
let array = TimestampMillisecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Millisecond))
|
||||
);
|
||||
|
||||
let array4 = PrimitiveArray::<i64>::from_data(
|
||||
DataType::Timestamp(ArrowTimeUnit::Millisecond, None),
|
||||
Buffer::from_slice(&vec![1, 2, 3, 4]),
|
||||
None,
|
||||
);
|
||||
let array = TimestampMicrosecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1, TimeUnit::Millisecond)),
|
||||
arrow_array_get(&array4, 0).unwrap()
|
||||
);
|
||||
|
||||
let array4 = PrimitiveArray::<i64>::from_data(
|
||||
DataType::Timestamp(ArrowTimeUnit::Nanosecond, None),
|
||||
Buffer::from_slice(&vec![1, 2, 3, 4]),
|
||||
None,
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Microsecond))
|
||||
);
|
||||
let array = TimestampNanosecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1, TimeUnit::Nanosecond)),
|
||||
arrow_array_get(&array4, 0).unwrap()
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Nanosecond))
|
||||
);
|
||||
|
||||
// test list array
|
||||
let data = vec![
|
||||
Some(vec![Some(1i32), Some(2), Some(3)]),
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i32>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ListArray<i32> = arrow_array.into();
|
||||
let arrow_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
|
||||
|
||||
let v0 = arrow_array_get(&arrow_array, 0).unwrap();
|
||||
match v0 {
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit};
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -23,13 +23,14 @@ use crate::error::{self, Error, Result};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{
|
||||
BinaryType, BooleanType, DateTimeType, DateType, Float32Type, Float64Type, Int16Type,
|
||||
Int32Type, Int64Type, Int8Type, ListType, NullType, StringType, TimestampType, UInt16Type,
|
||||
UInt32Type, UInt64Type, UInt8Type,
|
||||
Int32Type, Int64Type, Int8Type, ListType, NullType, StringType, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::MutableVector;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[enum_dispatch::enum_dispatch(DataType)]
|
||||
pub enum ConcreteDataType {
|
||||
Null(NullType),
|
||||
@@ -47,17 +48,21 @@ pub enum ConcreteDataType {
|
||||
Float32(Float32Type),
|
||||
Float64(Float64Type),
|
||||
|
||||
// String types
|
||||
// String types:
|
||||
Binary(BinaryType),
|
||||
String(StringType),
|
||||
|
||||
// Date types:
|
||||
Date(DateType),
|
||||
DateTime(DateTimeType),
|
||||
Timestamp(TimestampType),
|
||||
|
||||
// Compound types:
|
||||
List(ListType),
|
||||
}
|
||||
|
||||
// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
|
||||
// returning all these properties to the `DataType` trait
|
||||
impl ConcreteDataType {
|
||||
pub fn is_float(&self) -> bool {
|
||||
matches!(
|
||||
@@ -70,7 +75,7 @@ impl ConcreteDataType {
|
||||
matches!(self, ConcreteDataType::Boolean(_))
|
||||
}
|
||||
|
||||
pub fn stringifiable(&self) -> bool {
|
||||
pub fn is_stringifiable(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::String(_)
|
||||
@@ -103,13 +108,6 @@ impl ConcreteDataType {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_timestamp(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::Timestamp(_) | ConcreteDataType::Int64(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn numerics() -> Vec<ConcreteDataType> {
|
||||
vec![
|
||||
ConcreteDataType::int8_datatype(),
|
||||
@@ -161,7 +159,7 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType {
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
|
||||
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => Self::string_datatype(),
|
||||
ArrowDataType::List(field) => Self::List(ListType::new(
|
||||
ConcreteDataType::from_arrow_type(&field.data_type),
|
||||
ConcreteDataType::from_arrow_type(field.data_type()),
|
||||
)),
|
||||
_ => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
@@ -191,38 +189,52 @@ macro_rules! impl_new_concrete_type_functions {
|
||||
|
||||
impl_new_concrete_type_functions!(
|
||||
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
|
||||
Binary, String, Date, DateTime
|
||||
Binary, Date, DateTime, String
|
||||
);
|
||||
|
||||
impl ConcreteDataType {
|
||||
pub fn list_datatype(inner_type: ConcreteDataType) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(inner_type))
|
||||
pub fn timestamp_second_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType::default()))
|
||||
}
|
||||
|
||||
pub fn timestamp_millisecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Millisecond(
|
||||
TimestampMillisecondType::default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timestamp_microsecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Microsecond(
|
||||
TimestampMicrosecondType::default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timestamp_nanosecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType::default()))
|
||||
}
|
||||
|
||||
pub fn timestamp_datatype(unit: TimeUnit) -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::new(unit))
|
||||
}
|
||||
|
||||
pub fn timestamp_millis_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::new(TimeUnit::Millisecond))
|
||||
match unit {
|
||||
TimeUnit::Second => Self::timestamp_second_datatype(),
|
||||
TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
|
||||
TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
|
||||
TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts from arrow timestamp unit to
|
||||
// TODO(hl): maybe impl From<ArrowTimestamp> for our timestamp ?
|
||||
pub fn from_arrow_time_unit(t: &arrow::datatypes::TimeUnit) -> Self {
|
||||
pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
|
||||
match t {
|
||||
arrow::datatypes::TimeUnit::Second => Self::timestamp_datatype(TimeUnit::Second),
|
||||
arrow::datatypes::TimeUnit::Millisecond => {
|
||||
Self::timestamp_datatype(TimeUnit::Millisecond)
|
||||
}
|
||||
arrow::datatypes::TimeUnit::Microsecond => {
|
||||
Self::timestamp_datatype(TimeUnit::Microsecond)
|
||||
}
|
||||
arrow::datatypes::TimeUnit::Nanosecond => {
|
||||
Self::timestamp_datatype(TimeUnit::Nanosecond)
|
||||
}
|
||||
ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
|
||||
ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
|
||||
ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
|
||||
ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(item_type))
|
||||
}
|
||||
}
|
||||
|
||||
/// Data type abstraction.
|
||||
@@ -237,11 +249,15 @@ pub trait DataType: std::fmt::Debug + Send + Sync {
|
||||
/// Returns the default value of this type.
|
||||
fn default_value(&self) -> Value;
|
||||
|
||||
/// Convert this type as [arrow2::datatypes::DataType].
|
||||
/// Convert this type as [arrow::datatypes::DataType].
|
||||
fn as_arrow_type(&self) -> ArrowDataType;
|
||||
|
||||
/// Create a mutable vector with given `capacity` of this type.
|
||||
/// Creates a mutable vector with given `capacity` of this type.
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
|
||||
|
||||
/// Returns true if the data type is compatible with timestamp type so we can
|
||||
/// use it as a timestamp.
|
||||
fn is_timestamp_compatible(&self) -> bool;
|
||||
}
|
||||
|
||||
pub type DataTypeRef = Arc<dyn DataType>;
|
||||
@@ -324,10 +340,6 @@ mod tests {
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
|
||||
ConcreteDataType::String(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
|
||||
ConcreteDataType::String(_)
|
||||
));
|
||||
assert_eq!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::List(Box::new(Field::new(
|
||||
"item",
|
||||
@@ -345,31 +357,48 @@ mod tests {
|
||||
#[test]
|
||||
fn test_from_arrow_timestamp() {
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Millisecond)
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Microsecond)
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Nanosecond)
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Second),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Second)
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_timestamp() {
|
||||
assert!(ConcreteDataType::timestamp_millis_datatype().is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp());
|
||||
assert!(ConcreteDataType::int64_datatype().is_timestamp());
|
||||
fn test_is_timestamp_compatible() {
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp_compatible());
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::int64_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::null_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::binary_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::boolean_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::date_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::string_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::uint64_datatype().is_timestamp_compatible());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -377,4 +406,81 @@ mod tests {
|
||||
assert!(ConcreteDataType::null_datatype().is_null());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_float() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_float());
|
||||
assert!(ConcreteDataType::float32_datatype().is_float());
|
||||
assert!(ConcreteDataType::float64_datatype().is_float());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_boolean() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_boolean());
|
||||
assert!(!ConcreteDataType::float32_datatype().is_boolean());
|
||||
assert!(ConcreteDataType::boolean_datatype().is_boolean());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_stringifiable() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
|
||||
assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::string_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::date_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_signed() {
|
||||
assert!(ConcreteDataType::int8_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int16_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int32_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int64_datatype().is_signed());
|
||||
assert!(ConcreteDataType::date_datatype().is_signed());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
|
||||
|
||||
assert!(!ConcreteDataType::uint8_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint16_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint32_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint64_datatype().is_signed());
|
||||
|
||||
assert!(!ConcreteDataType::float32_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::float64_datatype().is_signed());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_unsigned() {
|
||||
assert!(!ConcreteDataType::int8_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int16_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int64_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::date_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
|
||||
|
||||
assert!(ConcreteDataType::uint8_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint16_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint32_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint64_datatype().is_unsigned());
|
||||
|
||||
assert!(!ConcreteDataType::float32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::float64_datatype().is_unsigned());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_numerics() {
|
||||
let nums = ConcreteDataType::numerics();
|
||||
assert_eq!(10, nums.len());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,6 +23,7 @@ pub mod prelude;
|
||||
mod scalars;
|
||||
pub mod schema;
|
||||
pub mod serialize;
|
||||
mod timestamp;
|
||||
pub mod type_id;
|
||||
pub mod types;
|
||||
pub mod value;
|
||||
|
||||
@@ -12,27 +12,9 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
///! Some helper macros for datatypes, copied from databend.
|
||||
#[macro_export]
|
||||
macro_rules! for_all_scalar_types {
|
||||
($macro:tt $(, $x:tt)*) => {
|
||||
$macro! {
|
||||
[$($x),*],
|
||||
{ i8 },
|
||||
{ i16 },
|
||||
{ i32 },
|
||||
{ i64 },
|
||||
{ u8 },
|
||||
{ u16 },
|
||||
{ u32 },
|
||||
{ u64 },
|
||||
{ f32 },
|
||||
{ f64 },
|
||||
{ bool },
|
||||
}
|
||||
};
|
||||
}
|
||||
//! Some helper macros for datatypes, copied from databend.
|
||||
|
||||
/// Apply the macro rules to all primitive types.
|
||||
#[macro_export]
|
||||
macro_rules! for_all_primitive_types {
|
||||
($macro:tt $(, $x:tt)*) => {
|
||||
@@ -52,6 +34,8 @@ macro_rules! for_all_primitive_types {
|
||||
};
|
||||
}
|
||||
|
||||
/// Match the logical type and apply `$body` to all primitive types and
|
||||
/// `nbody` to other types.
|
||||
#[macro_export]
|
||||
macro_rules! with_match_primitive_type_id {
|
||||
($key_type:expr, | $_:tt $T:ident | $body:tt, $nbody:tt) => {{
|
||||
@@ -62,17 +46,21 @@ macro_rules! with_match_primitive_type_id {
|
||||
}
|
||||
|
||||
use $crate::type_id::LogicalTypeId;
|
||||
use $crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
|
||||
UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
match $key_type {
|
||||
LogicalTypeId::Int8 => __with_ty__! { i8 },
|
||||
LogicalTypeId::Int16 => __with_ty__! { i16 },
|
||||
LogicalTypeId::Int32 => __with_ty__! { i32 },
|
||||
LogicalTypeId::Int64 => __with_ty__! { i64 },
|
||||
LogicalTypeId::UInt8 => __with_ty__! { u8 },
|
||||
LogicalTypeId::UInt16 => __with_ty__! { u16 },
|
||||
LogicalTypeId::UInt32 => __with_ty__! { u32 },
|
||||
LogicalTypeId::UInt64 => __with_ty__! { u64 },
|
||||
LogicalTypeId::Float32 => __with_ty__! { f32 },
|
||||
LogicalTypeId::Float64 => __with_ty__! { f64 },
|
||||
LogicalTypeId::Int8 => __with_ty__! { Int8Type },
|
||||
LogicalTypeId::Int16 => __with_ty__! { Int16Type },
|
||||
LogicalTypeId::Int32 => __with_ty__! { Int32Type },
|
||||
LogicalTypeId::Int64 => __with_ty__! { Int64Type },
|
||||
LogicalTypeId::UInt8 => __with_ty__! { UInt8Type },
|
||||
LogicalTypeId::UInt16 => __with_ty__! { UInt16Type },
|
||||
LogicalTypeId::UInt32 => __with_ty__! { UInt32Type },
|
||||
LogicalTypeId::UInt64 => __with_ty__! { UInt64Type },
|
||||
LogicalTypeId::Float32 => __with_ty__! { Float32Type },
|
||||
LogicalTypeId::Float64 => __with_ty__! { Float64Type },
|
||||
|
||||
_ => $nbody,
|
||||
}
|
||||
|
||||
@@ -16,8 +16,5 @@ pub use crate::data_type::{ConcreteDataType, DataType, DataTypeRef};
|
||||
pub use crate::macros::*;
|
||||
pub use crate::scalars::{Scalar, ScalarRef, ScalarVector, ScalarVectorBuilder};
|
||||
pub use crate::type_id::LogicalTypeId;
|
||||
pub use crate::types::Primitive;
|
||||
pub use crate::value::{Value, ValueRef};
|
||||
pub use crate::vectors::{
|
||||
Helper as VectorHelper, MutableVector, Validity, Vector, VectorBuilder, VectorRef,
|
||||
};
|
||||
pub use crate::vectors::{MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
@@ -14,11 +14,17 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::value::{ListValue, ListValueRef};
|
||||
use crate::vectors::*;
|
||||
use crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
|
||||
UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::{ListValue, ListValueRef, Value};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, ListVector, MutableVector,
|
||||
PrimitiveVector, StringVector, Vector,
|
||||
};
|
||||
|
||||
fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
|
||||
match iter.size_hint() {
|
||||
@@ -35,7 +41,7 @@ where
|
||||
for<'a> Self::VectorType: ScalarVector<RefItem<'a> = Self::RefType<'a>>,
|
||||
{
|
||||
type VectorType: ScalarVector<OwnedItem = Self>;
|
||||
type RefType<'a>: ScalarRef<'a, ScalarType = Self, VectorType = Self::VectorType>
|
||||
type RefType<'a>: ScalarRef<'a, ScalarType = Self>
|
||||
where
|
||||
Self: 'a;
|
||||
/// Get a reference of the current value.
|
||||
@@ -46,7 +52,6 @@ where
|
||||
}
|
||||
|
||||
pub trait ScalarRef<'a>: std::fmt::Debug + Clone + Copy + Send + 'a {
|
||||
type VectorType: ScalarVector<RefItem<'a> = Self>;
|
||||
/// The corresponding [`Scalar`] type.
|
||||
type ScalarType: Scalar<RefType<'a> = Self>;
|
||||
|
||||
@@ -63,7 +68,7 @@ where
|
||||
{
|
||||
type OwnedItem: Scalar<VectorType = Self>;
|
||||
/// The reference item of this vector.
|
||||
type RefItem<'a>: ScalarRef<'a, ScalarType = Self::OwnedItem, VectorType = Self>
|
||||
type RefItem<'a>: ScalarRef<'a, ScalarType = Self::OwnedItem>
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
@@ -137,47 +142,46 @@ pub trait ScalarVectorBuilder: MutableVector {
|
||||
fn finish(&mut self) -> Self::VectorType;
|
||||
}
|
||||
|
||||
macro_rules! impl_primitive_scalar_type {
|
||||
($native:ident) => {
|
||||
impl Scalar for $native {
|
||||
type VectorType = PrimitiveVector<$native>;
|
||||
type RefType<'a> = $native;
|
||||
macro_rules! impl_scalar_for_native {
|
||||
($Native: ident, $DataType: ident) => {
|
||||
impl Scalar for $Native {
|
||||
type VectorType = PrimitiveVector<$DataType>;
|
||||
type RefType<'a> = $Native;
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> $native {
|
||||
fn as_scalar_ref(&self) -> $Native {
|
||||
*self
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: $native) -> $native {
|
||||
fn upcast_gat<'short, 'long: 'short>(long: $Native) -> $Native {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement [`ScalarRef`] for primitive types. Note that primitive types are both [`Scalar`] and [`ScalarRef`].
|
||||
impl<'a> ScalarRef<'a> for $native {
|
||||
type VectorType = PrimitiveVector<$native>;
|
||||
type ScalarType = $native;
|
||||
impl<'a> ScalarRef<'a> for $Native {
|
||||
type ScalarType = $Native;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> $native {
|
||||
fn to_owned_scalar(&self) -> $Native {
|
||||
*self
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_primitive_scalar_type!(u8);
|
||||
impl_primitive_scalar_type!(u16);
|
||||
impl_primitive_scalar_type!(u32);
|
||||
impl_primitive_scalar_type!(u64);
|
||||
impl_primitive_scalar_type!(i8);
|
||||
impl_primitive_scalar_type!(i16);
|
||||
impl_primitive_scalar_type!(i32);
|
||||
impl_primitive_scalar_type!(i64);
|
||||
impl_primitive_scalar_type!(f32);
|
||||
impl_primitive_scalar_type!(f64);
|
||||
impl_scalar_for_native!(u8, UInt8Type);
|
||||
impl_scalar_for_native!(u16, UInt16Type);
|
||||
impl_scalar_for_native!(u32, UInt32Type);
|
||||
impl_scalar_for_native!(u64, UInt64Type);
|
||||
impl_scalar_for_native!(i8, Int8Type);
|
||||
impl_scalar_for_native!(i16, Int16Type);
|
||||
impl_scalar_for_native!(i32, Int32Type);
|
||||
impl_scalar_for_native!(i64, Int64Type);
|
||||
impl_scalar_for_native!(f32, Float32Type);
|
||||
impl_scalar_for_native!(f64, Float64Type);
|
||||
|
||||
impl Scalar for bool {
|
||||
type VectorType = BooleanVector;
|
||||
@@ -196,7 +200,6 @@ impl Scalar for bool {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for bool {
|
||||
type VectorType = BooleanVector;
|
||||
type ScalarType = bool;
|
||||
|
||||
#[inline]
|
||||
@@ -221,7 +224,6 @@ impl Scalar for String {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for &'a str {
|
||||
type VectorType = StringVector;
|
||||
type ScalarType = String;
|
||||
|
||||
#[inline]
|
||||
@@ -246,7 +248,6 @@ impl Scalar for Vec<u8> {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for &'a [u8] {
|
||||
type VectorType = BinaryVector;
|
||||
type ScalarType = Vec<u8>;
|
||||
|
||||
#[inline]
|
||||
@@ -269,7 +270,6 @@ impl Scalar for Date {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for Date {
|
||||
type VectorType = DateVector;
|
||||
type ScalarType = Date;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
@@ -291,7 +291,6 @@ impl Scalar for DateTime {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for DateTime {
|
||||
type VectorType = DateTimeVector;
|
||||
type ScalarType = DateTime;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
@@ -299,27 +298,7 @@ impl<'a> ScalarRef<'a> for DateTime {
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for Timestamp {
|
||||
type VectorType = TimestampVector;
|
||||
type RefType<'a> = Timestamp;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for Timestamp {
|
||||
type VectorType = TimestampVector;
|
||||
type ScalarType = Timestamp;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.
|
||||
|
||||
impl Scalar for ListValue {
|
||||
type VectorType = ListVector;
|
||||
@@ -335,7 +314,6 @@ impl Scalar for ListValue {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for ListValueRef<'a> {
|
||||
type VectorType = ListVector;
|
||||
type ScalarType = ListValue;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
@@ -357,8 +335,9 @@ impl<'a> ScalarRef<'a> for ListValueRef<'a> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::vectors::binary::BinaryVector;
|
||||
use crate::vectors::primitive::Int32Vector;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::timestamp::TimestampSecond;
|
||||
use crate::vectors::{BinaryVector, Int32Vector, ListVectorBuilder, TimestampSecondVector};
|
||||
|
||||
fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
|
||||
let mut builder = T::Builder::with_capacity(items.len());
|
||||
@@ -454,11 +433,11 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_build_timestamp_vector() {
|
||||
let expect: Vec<Option<Timestamp>> = vec![Some(10.into()), None, Some(42.into())];
|
||||
let vector: TimestampVector = build_vector_from_slice(&expect);
|
||||
let expect: Vec<Option<TimestampSecond>> = vec![Some(10.into()), None, Some(42.into())];
|
||||
let vector: TimestampSecondVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
let val = vector.get_data(0).unwrap();
|
||||
assert_eq!(val, val.as_scalar_ref());
|
||||
assert_eq!(10, val.to_owned_scalar().value());
|
||||
assert_eq!(TimestampSecond::from(10), val.to_owned_scalar());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,128 +12,27 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod column_schema;
|
||||
mod constraint;
|
||||
mod raw;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use arrow::datatypes::Metadata;
|
||||
use arrow::datatypes::{Field, Schema as ArrowSchema};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, DeserializeSnafu, Error, Result, SerializeSnafu};
|
||||
use crate::data_type::DataType;
|
||||
use crate::error::{self, Error, Result};
|
||||
pub use crate::schema::column_schema::{ColumnSchema, Metadata};
|
||||
pub use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
pub use crate::schema::raw::RawSchema;
|
||||
use crate::vectors::VectorRef;
|
||||
|
||||
/// Key used to store whether the column is time index in arrow field's metadata.
|
||||
const TIME_INDEX_KEY: &str = "greptime:time_index";
|
||||
/// Key used to store version number of the schema in metadata.
|
||||
const VERSION_KEY: &str = "greptime:version";
|
||||
/// Key used to store default constraint in arrow field's metadata.
|
||||
const ARROW_FIELD_DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
|
||||
|
||||
/// Schema of a column, used as an immutable struct.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ColumnSchema {
|
||||
pub name: String,
|
||||
pub data_type: ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
is_time_index: bool,
|
||||
default_constraint: Option<ColumnDefaultConstraint>,
|
||||
metadata: Metadata,
|
||||
}
|
||||
|
||||
impl ColumnSchema {
|
||||
pub fn new<T: Into<String>>(
|
||||
name: T,
|
||||
data_type: ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
) -> ColumnSchema {
|
||||
ColumnSchema {
|
||||
name: name.into(),
|
||||
data_type,
|
||||
is_nullable,
|
||||
is_time_index: false,
|
||||
default_constraint: None,
|
||||
metadata: Metadata::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_time_index(&self) -> bool {
|
||||
self.is_time_index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_nullable(&self) -> bool {
|
||||
self.is_nullable
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn default_constraint(&self) -> Option<&ColumnDefaultConstraint> {
|
||||
self.default_constraint.as_ref()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn metadata(&self) -> &Metadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
pub fn with_time_index(mut self, is_time_index: bool) -> Self {
|
||||
self.is_time_index = is_time_index;
|
||||
if is_time_index {
|
||||
self.metadata
|
||||
.insert(TIME_INDEX_KEY.to_string(), "true".to_string());
|
||||
} else {
|
||||
self.metadata.remove(TIME_INDEX_KEY);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_default_constraint(
|
||||
mut self,
|
||||
default_constraint: Option<ColumnDefaultConstraint>,
|
||||
) -> Result<Self> {
|
||||
if let Some(constraint) = &default_constraint {
|
||||
constraint.validate(&self.data_type, self.is_nullable)?;
|
||||
}
|
||||
|
||||
self.default_constraint = default_constraint;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Creates a new [`ColumnSchema`] with given metadata.
|
||||
pub fn with_metadata(mut self, metadata: Metadata) -> Self {
|
||||
self.metadata = metadata;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn create_default_vector(&self, num_rows: usize) -> Result<Option<VectorRef>> {
|
||||
match &self.default_constraint {
|
||||
Some(c) => c
|
||||
.create_default_vector(&self.data_type, self.is_nullable, num_rows)
|
||||
.map(Some),
|
||||
None => {
|
||||
if self.is_nullable {
|
||||
// No default constraint, use null as default value.
|
||||
// TODO(yingwen): Use NullVector once it supports setting logical type.
|
||||
ColumnDefaultConstraint::null_value()
|
||||
.create_default_vector(&self.data_type, self.is_nullable, num_rows)
|
||||
.map(Some)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A common schema, should be immutable.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Schema {
|
||||
column_schemas: Vec<ColumnSchema>,
|
||||
name_to_index: HashMap<String, usize>,
|
||||
@@ -231,7 +130,7 @@ impl Schema {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn metadata(&self) -> &Metadata {
|
||||
pub fn metadata(&self) -> &HashMap<String, String> {
|
||||
&self.arrow_schema.metadata
|
||||
}
|
||||
}
|
||||
@@ -243,7 +142,7 @@ pub struct SchemaBuilder {
|
||||
fields: Vec<Field>,
|
||||
timestamp_index: Option<usize>,
|
||||
version: u32,
|
||||
metadata: Metadata,
|
||||
metadata: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<ColumnSchema>> for SchemaBuilder {
|
||||
@@ -292,7 +191,7 @@ impl SchemaBuilder {
|
||||
self.metadata
|
||||
.insert(VERSION_KEY.to_string(), self.version.to_string());
|
||||
|
||||
let arrow_schema = ArrowSchema::from(self.fields).with_metadata(self.metadata);
|
||||
let arrow_schema = ArrowSchema::new(self.fields).with_metadata(self.metadata);
|
||||
|
||||
Ok(Schema {
|
||||
column_schemas: self.column_schemas,
|
||||
@@ -347,7 +246,7 @@ fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: us
|
||||
|
||||
let column_schema = &column_schemas[timestamp_index];
|
||||
ensure!(
|
||||
column_schema.data_type.is_timestamp(),
|
||||
column_schema.data_type.is_timestamp_compatible(),
|
||||
error::InvalidTimestampIndexSnafu {
|
||||
index: timestamp_index,
|
||||
}
|
||||
@@ -364,58 +263,6 @@ fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: us
|
||||
|
||||
pub type SchemaRef = Arc<Schema>;
|
||||
|
||||
impl TryFrom<&Field> for ColumnSchema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(field: &Field) -> Result<ColumnSchema> {
|
||||
let data_type = ConcreteDataType::try_from(&field.data_type)?;
|
||||
let mut metadata = field.metadata.clone();
|
||||
let default_constraint = match metadata.remove(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY) {
|
||||
Some(json) => Some(serde_json::from_str(&json).context(DeserializeSnafu { json })?),
|
||||
None => None,
|
||||
};
|
||||
let is_time_index = metadata.contains_key(TIME_INDEX_KEY);
|
||||
|
||||
Ok(ColumnSchema {
|
||||
name: field.name.clone(),
|
||||
data_type,
|
||||
is_nullable: field.is_nullable,
|
||||
is_time_index,
|
||||
default_constraint,
|
||||
metadata,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ColumnSchema> for Field {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(column_schema: &ColumnSchema) -> Result<Field> {
|
||||
let mut metadata = column_schema.metadata.clone();
|
||||
if let Some(value) = &column_schema.default_constraint {
|
||||
// Adds an additional metadata to store the default constraint.
|
||||
let old = metadata.insert(
|
||||
ARROW_FIELD_DEFAULT_CONSTRAINT_KEY.to_string(),
|
||||
serde_json::to_string(&value).context(SerializeSnafu)?,
|
||||
);
|
||||
|
||||
ensure!(
|
||||
old.is_none(),
|
||||
error::DuplicateMetaSnafu {
|
||||
key: ARROW_FIELD_DEFAULT_CONSTRAINT_KEY,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Field::new(
|
||||
column_schema.name.clone(),
|
||||
column_schema.data_type.as_arrow_type(),
|
||||
column_schema.is_nullable(),
|
||||
)
|
||||
.with_metadata(metadata))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Arc<ArrowSchema>> for Schema {
|
||||
type Error = Error;
|
||||
|
||||
@@ -424,7 +271,7 @@ impl TryFrom<Arc<ArrowSchema>> for Schema {
|
||||
let mut name_to_index = HashMap::with_capacity(arrow_schema.fields.len());
|
||||
for field in &arrow_schema.fields {
|
||||
let column_schema = ColumnSchema::try_from(field)?;
|
||||
name_to_index.insert(field.name.clone(), column_schemas.len());
|
||||
name_to_index.insert(field.name().to_string(), column_schemas.len());
|
||||
column_schemas.push(column_schema);
|
||||
}
|
||||
|
||||
@@ -465,7 +312,7 @@ impl TryFrom<ArrowSchema> for Schema {
|
||||
}
|
||||
}
|
||||
|
||||
fn try_parse_version(metadata: &Metadata, key: &str) -> Result<u32> {
|
||||
fn try_parse_version(metadata: &HashMap<String, String>, key: &str) -> Result<u32> {
|
||||
if let Some(value) = metadata.get(key) {
|
||||
let version = value
|
||||
.parse()
|
||||
@@ -479,127 +326,8 @@ fn try_parse_version(metadata: &Metadata, key: &str) -> Result<u32> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use super::*;
|
||||
use crate::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_column_schema() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("test", field.name);
|
||||
assert_eq!(ArrowDataType::Int32, field.data_type);
|
||||
assert!(field.is_nullable);
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_default_constraint() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::from(99))))
|
||||
.unwrap();
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.is_none());
|
||||
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("test", field.name);
|
||||
assert_eq!(ArrowDataType::Int32, field.data_type);
|
||||
assert!(field.is_nullable);
|
||||
assert_eq!(
|
||||
"{\"Value\":{\"Int32\":99}}",
|
||||
field
|
||||
.metadata
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_metadata() {
|
||||
let mut metadata = Metadata::new();
|
||||
metadata.insert("k1".to_string(), "v1".to_string());
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.is_none());
|
||||
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("v1", field.metadata.get("k1").unwrap());
|
||||
assert!(field
|
||||
.metadata
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.is_some());
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_duplicate_metadata() {
|
||||
let mut metadata = Metadata::new();
|
||||
metadata.insert(
|
||||
ARROW_FIELD_DEFAULT_CONSTRAINT_KEY.to_string(),
|
||||
"v1".to_string(),
|
||||
);
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
Field::try_from(&column_schema).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_invalid_default_constraint() {
|
||||
ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_default_constraint_try_into_from() {
|
||||
let default_constraint = ColumnDefaultConstraint::Value(Value::from(42i64));
|
||||
|
||||
let bytes: Vec<u8> = default_constraint.clone().try_into().unwrap();
|
||||
let from_value = ColumnDefaultConstraint::try_from(&bytes[..]).unwrap();
|
||||
|
||||
assert_eq!(default_constraint, from_value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_create_default_null() {
|
||||
// Implicit default null.
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
|
||||
let v = column_schema.create_default_vector(5).unwrap().unwrap();
|
||||
assert_eq!(5, v.len());
|
||||
assert!(v.only_null());
|
||||
|
||||
// Explicit default null.
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
let v = column_schema.create_default_vector(5).unwrap().unwrap();
|
||||
assert_eq!(5, v.len());
|
||||
assert!(v.only_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_no_default() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
|
||||
assert!(column_schema.create_default_vector(5).unwrap().is_none());
|
||||
}
|
||||
use crate::data_type::ConcreteDataType;
|
||||
|
||||
#[test]
|
||||
fn test_build_empty_schema() {
|
||||
@@ -654,8 +382,12 @@ mod tests {
|
||||
fn test_schema_with_timestamp() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas.clone())
|
||||
.unwrap()
|
||||
|
||||
@@ -22,7 +22,7 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{Int64Vector, TimestampVector, VectorRef};
|
||||
use crate::vectors::{Int64Vector, TimestampMillisecondVector, VectorRef};
|
||||
|
||||
const CURRENT_TIMESTAMP: &str = "current_timestamp()";
|
||||
|
||||
@@ -81,7 +81,7 @@ impl ColumnDefaultConstraint {
|
||||
error::UnsupportedDefaultExprSnafu { expr }
|
||||
);
|
||||
ensure!(
|
||||
data_type.is_timestamp(),
|
||||
data_type.is_timestamp_compatible(),
|
||||
error::DefaultValueTypeSnafu {
|
||||
reason: "return value of the function must has timestamp type",
|
||||
}
|
||||
@@ -162,8 +162,10 @@ fn create_current_timestamp_vector(
|
||||
data_type: &ConcreteDataType,
|
||||
num_rows: usize,
|
||||
) -> Result<VectorRef> {
|
||||
// FIXME(yingwen): We should implements cast in VectorOp so we could cast the millisecond vector
|
||||
// to other data type and avoid this match.
|
||||
match data_type {
|
||||
ConcreteDataType::Timestamp(_) => Ok(Arc::new(TimestampVector::from_values(
|
||||
ConcreteDataType::Timestamp(_) => Ok(Arc::new(TimestampMillisecondVector::from_values(
|
||||
std::iter::repeat(util::current_time_millis()).take(num_rows),
|
||||
))),
|
||||
ConcreteDataType::Int64(_) => Ok(Arc::new(Int64Vector::from_values(
|
||||
@@ -217,7 +219,7 @@ mod tests {
|
||||
fn test_validate_function_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
constraint
|
||||
.validate(&ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.validate(&ConcreteDataType::timestamp_millisecond_datatype(), false)
|
||||
.unwrap();
|
||||
constraint
|
||||
.validate(&ConcreteDataType::boolean_datatype(), false)
|
||||
@@ -225,7 +227,7 @@ mod tests {
|
||||
|
||||
let constraint = ColumnDefaultConstraint::Function("hello()".to_string());
|
||||
constraint
|
||||
.validate(&ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.validate(&ConcreteDataType::timestamp_millisecond_datatype(), false)
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
@@ -262,7 +264,7 @@ mod tests {
|
||||
fn test_create_default_vector_by_func() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
// Timestamp type.
|
||||
let data_type = ConcreteDataType::timestamp_millis_datatype();
|
||||
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap();
|
||||
@@ -286,7 +288,7 @@ mod tests {
|
||||
);
|
||||
|
||||
let constraint = ColumnDefaultConstraint::Function("no".to_string());
|
||||
let data_type = ConcreteDataType::timestamp_millis_datatype();
|
||||
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
|
||||
constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap_err();
|
||||
|
||||
@@ -20,7 +20,7 @@ use crate::schema::{ColumnSchema, Schema, SchemaBuilder};
|
||||
/// Struct used to serialize and deserialize [`Schema`](crate::schema::Schema).
|
||||
///
|
||||
/// This struct only contains necessary data to recover the Schema.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct RawSchema {
|
||||
pub column_schemas: Vec<ColumnSchema>,
|
||||
pub timestamp_index: Option<usize>,
|
||||
@@ -56,8 +56,12 @@ mod tests {
|
||||
fn test_raw_convert() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
|
||||
@@ -42,7 +42,10 @@ pub enum LogicalTypeId {
|
||||
/// seconds/milliseconds/microseconds/nanoseconds, determined by precision.
|
||||
DateTime,
|
||||
|
||||
Timestamp,
|
||||
TimestampSecond,
|
||||
TimestampMillisecond,
|
||||
TimestampMicrosecond,
|
||||
TimestampNanosecond,
|
||||
|
||||
List,
|
||||
}
|
||||
@@ -74,7 +77,14 @@ impl LogicalTypeId {
|
||||
LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
|
||||
LogicalTypeId::Date => ConcreteDataType::date_datatype(),
|
||||
LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
|
||||
LogicalTypeId::Timestamp => ConcreteDataType::timestamp_millis_datatype(), // to timestamp type with default time unit
|
||||
LogicalTypeId::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
LogicalTypeId::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
}
|
||||
LogicalTypeId::TimestampMicrosecond => {
|
||||
ConcreteDataType::timestamp_microsecond_datatype()
|
||||
}
|
||||
LogicalTypeId::TimestampNanosecond => ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
LogicalTypeId::List => {
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
|
||||
}
|
||||
|
||||
@@ -14,25 +14,24 @@
|
||||
|
||||
mod binary_type;
|
||||
mod boolean_type;
|
||||
mod date;
|
||||
mod datetime;
|
||||
mod date_type;
|
||||
mod datetime_type;
|
||||
mod list_type;
|
||||
mod null_type;
|
||||
mod primitive_traits;
|
||||
mod primitive_type;
|
||||
mod string_type;
|
||||
mod timestamp;
|
||||
|
||||
mod timestamp_type;
|
||||
|
||||
pub use binary_type::BinaryType;
|
||||
pub use boolean_type::BooleanType;
|
||||
pub use date::DateType;
|
||||
pub use datetime::DateTimeType;
|
||||
pub use date_type::DateType;
|
||||
pub use datetime_type::DateTimeType;
|
||||
pub use list_type::ListType;
|
||||
pub use null_type::NullType;
|
||||
pub use primitive_traits::{OrdPrimitive, Primitive};
|
||||
pub use primitive_type::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, PrimitiveElement,
|
||||
PrimitiveType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LogicalPrimitiveType,
|
||||
NativeType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, WrapperType,
|
||||
};
|
||||
pub use string_type::StringType;
|
||||
pub use timestamp::TimestampType;
|
||||
pub use timestamp_type::*;
|
||||
|
||||
@@ -53,4 +53,8 @@ impl DataType for BinaryType {
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,4 +52,8 @@ impl DataType for BooleanType {
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BooleanVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,15 +15,17 @@
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Field};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::value::ListValue;
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::{ListValue, Value};
|
||||
use crate::vectors::{ListVectorBuilder, MutableVector};
|
||||
|
||||
/// Used to represent the List datatype.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ListType {
|
||||
/// The type of List's inner data.
|
||||
inner: Box<ConcreteDataType>,
|
||||
/// The type of List's item.
|
||||
// Use Box to avoid recursive dependency, as enum ConcreteDataType depends on ListType.
|
||||
item_type: Box<ConcreteDataType>,
|
||||
}
|
||||
|
||||
impl Default for ListType {
|
||||
@@ -33,9 +35,10 @@ impl Default for ListType {
|
||||
}
|
||||
|
||||
impl ListType {
|
||||
pub fn new(datatype: ConcreteDataType) -> Self {
|
||||
/// Create a new `ListType` whose item's data type is `item_type`.
|
||||
pub fn new(item_type: ConcreteDataType) -> Self {
|
||||
ListType {
|
||||
inner: Box::new(datatype),
|
||||
item_type: Box::new(item_type),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -50,20 +53,24 @@ impl DataType for ListType {
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::List(ListValue::new(None, *self.inner.clone()))
|
||||
Value::List(ListValue::new(None, *self.item_type.clone()))
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
let field = Box::new(Field::new("item", self.inner.as_arrow_type(), true));
|
||||
let field = Box::new(Field::new("item", self.item_type.as_arrow_type(), true));
|
||||
ArrowDataType::List(field)
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(ListVectorBuilder::with_type_capacity(
|
||||
*self.inner.clone(),
|
||||
*self.item_type.clone(),
|
||||
capacity,
|
||||
))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -27,7 +27,7 @@ pub struct NullType;
|
||||
|
||||
impl NullType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
Arc::new(NullType)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,4 +51,8 @@ impl DataType for NullType {
|
||||
fn create_mutable_vector(&self, _capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(NullVectorBuilder::default())
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,12 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::TypeId;
|
||||
use std::marker::PhantomData;
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use arrow::array::PrimitiveArray;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use paste::paste;
|
||||
use arrow::datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType as ArrowDataType};
|
||||
use common_time::{Date, DateTime};
|
||||
use num::NumCast;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
@@ -25,92 +24,226 @@ use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{Scalar, ScalarRef, ScalarVectorBuilder};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::primitive_traits::Primitive;
|
||||
use crate::types::{DateTimeType, DateType};
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{MutableVector, PrimitiveVector, PrimitiveVectorBuilder, Vector};
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct PrimitiveType<T: Primitive> {
|
||||
#[serde(skip)]
|
||||
_phantom: PhantomData<T>,
|
||||
/// Data types that can be used as arrow's native type.
|
||||
pub trait NativeType: ArrowNativeType + NumCast {
|
||||
/// Largest numeric type this primitive type can be cast to.
|
||||
type LargestType: NativeType;
|
||||
}
|
||||
|
||||
impl<T: Primitive, U: Primitive> PartialEq<PrimitiveType<U>> for PrimitiveType<T> {
|
||||
fn eq(&self, _other: &PrimitiveType<U>) -> bool {
|
||||
TypeId::of::<T>() == TypeId::of::<U>()
|
||||
}
|
||||
macro_rules! impl_native_type {
|
||||
($Type: ident, $LargestType: ident) => {
|
||||
impl NativeType for $Type {
|
||||
type LargestType = $LargestType;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl<T: Primitive> Eq for PrimitiveType<T> {}
|
||||
impl_native_type!(u8, u64);
|
||||
impl_native_type!(u16, u64);
|
||||
impl_native_type!(u32, u64);
|
||||
impl_native_type!(u64, u64);
|
||||
impl_native_type!(i8, i64);
|
||||
impl_native_type!(i16, i64);
|
||||
impl_native_type!(i32, i64);
|
||||
impl_native_type!(i64, i64);
|
||||
impl_native_type!(f32, f64);
|
||||
impl_native_type!(f64, f64);
|
||||
|
||||
/// A trait that provide helper methods for a primitive type to implementing the [PrimitiveVector].
|
||||
pub trait PrimitiveElement
|
||||
where
|
||||
for<'a> Self: Primitive
|
||||
+ Scalar<VectorType = PrimitiveVector<Self>>
|
||||
+ ScalarRef<'a, ScalarType = Self, VectorType = PrimitiveVector<Self>>
|
||||
+ Scalar<RefType<'a> = Self>,
|
||||
/// Represents the wrapper type that wraps a native type using the `newtype pattern`,
|
||||
/// such as [Date](`common_time::Date`) is a wrapper type for the underlying native
|
||||
/// type `i32`.
|
||||
pub trait WrapperType:
|
||||
Copy
|
||||
+ Scalar
|
||||
+ PartialEq
|
||||
+ Into<Value>
|
||||
+ Into<ValueRef<'static>>
|
||||
+ Serialize
|
||||
+ Into<serde_json::Value>
|
||||
{
|
||||
/// Logical primitive type that this wrapper type belongs to.
|
||||
type LogicalType: LogicalPrimitiveType<Wrapper = Self, Native = Self::Native>;
|
||||
/// The underlying native type.
|
||||
type Native: NativeType;
|
||||
|
||||
/// Convert native type into this wrapper type.
|
||||
fn from_native(value: Self::Native) -> Self;
|
||||
|
||||
/// Convert this wrapper type into native type.
|
||||
fn into_native(self) -> Self::Native;
|
||||
}
|
||||
|
||||
/// Trait bridging the logical primitive type with [ArrowPrimitiveType].
|
||||
pub trait LogicalPrimitiveType: 'static + Sized {
|
||||
/// Arrow primitive type of this logical type.
|
||||
type ArrowPrimitive: ArrowPrimitiveType<Native = Self::Native>;
|
||||
/// Native (physical) type of this logical type.
|
||||
type Native: NativeType;
|
||||
/// Wrapper type that the vector returns.
|
||||
type Wrapper: WrapperType<LogicalType = Self, Native = Self::Native>
|
||||
+ for<'a> Scalar<VectorType = PrimitiveVector<Self>, RefType<'a> = Self::Wrapper>
|
||||
+ for<'a> ScalarRef<'a, ScalarType = Self::Wrapper>;
|
||||
|
||||
/// Construct the data type struct.
|
||||
fn build_data_type() -> ConcreteDataType;
|
||||
|
||||
/// Returns the name of the type id.
|
||||
fn type_name() -> String;
|
||||
/// Return the name of the type.
|
||||
fn type_name() -> &'static str;
|
||||
|
||||
/// Dynamic cast the vector to the concrete vector type.
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveArray<Self>>;
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveVector<Self>>;
|
||||
|
||||
/// Cast value ref to the primitive type.
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self>>;
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self::Wrapper>>;
|
||||
}
|
||||
|
||||
macro_rules! impl_primitive_element {
|
||||
($Type:ident, $TypeId:ident) => {
|
||||
paste::paste! {
|
||||
impl PrimitiveElement for $Type {
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::$TypeId(PrimitiveType::<$Type>::default())
|
||||
}
|
||||
/// A new type for [WrapperType], complement the `Ord` feature for it. Wrapping non ordered
|
||||
/// primitive types like `f32` and `f64` in `OrdPrimitive` can make them be used in places that
|
||||
/// require `Ord`. For example, in `Median` or `Percentile` UDAFs.
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct OrdPrimitive<T: WrapperType>(pub T);
|
||||
|
||||
fn type_name() -> String {
|
||||
stringify!($TypeId).to_string()
|
||||
}
|
||||
impl<T: WrapperType> OrdPrimitive<T> {
|
||||
pub fn as_primitive(&self) -> T {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveArray<$Type>> {
|
||||
let primitive_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveVector<$Type>>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast {} to vector of primitive type {}",
|
||||
vector.vector_type_name(),
|
||||
stringify!($TypeId)
|
||||
),
|
||||
})?;
|
||||
Ok(&primitive_vector.array)
|
||||
}
|
||||
impl<T: WrapperType> Eq for OrdPrimitive<T> {}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::$TypeId(v) => Ok(Some(v.into())),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast value {:?} to primitive type {}",
|
||||
other,
|
||||
stringify!($TypeId),
|
||||
),
|
||||
}.fail(),
|
||||
impl<T: WrapperType> PartialOrd for OrdPrimitive<T> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: WrapperType> Ord for OrdPrimitive<T> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
Into::<Value>::into(self.0).cmp(&Into::<Value>::into(other.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: WrapperType> From<OrdPrimitive<T>> for Value {
|
||||
fn from(p: OrdPrimitive<T>) -> Self {
|
||||
p.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_wrapper {
|
||||
($Type: ident, $LogicalType: ident) => {
|
||||
impl WrapperType for $Type {
|
||||
type LogicalType = $LogicalType;
|
||||
type Native = $Type;
|
||||
|
||||
fn from_native(value: Self::Native) -> Self {
|
||||
value
|
||||
}
|
||||
|
||||
fn into_native(self) -> Self::Native {
|
||||
self
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_wrapper!(u8, UInt8Type);
|
||||
impl_wrapper!(u16, UInt16Type);
|
||||
impl_wrapper!(u32, UInt32Type);
|
||||
impl_wrapper!(u64, UInt64Type);
|
||||
impl_wrapper!(i8, Int8Type);
|
||||
impl_wrapper!(i16, Int16Type);
|
||||
impl_wrapper!(i32, Int32Type);
|
||||
impl_wrapper!(i64, Int64Type);
|
||||
impl_wrapper!(f32, Float32Type);
|
||||
impl_wrapper!(f64, Float64Type);
|
||||
|
||||
impl WrapperType for Date {
|
||||
type LogicalType = DateType;
|
||||
type Native = i32;
|
||||
|
||||
fn from_native(value: i32) -> Self {
|
||||
Date::new(value)
|
||||
}
|
||||
|
||||
fn into_native(self) -> i32 {
|
||||
self.val()
|
||||
}
|
||||
}
|
||||
|
||||
impl WrapperType for DateTime {
|
||||
type LogicalType = DateTimeType;
|
||||
type Native = i64;
|
||||
|
||||
fn from_native(value: Self::Native) -> Self {
|
||||
DateTime::new(value)
|
||||
}
|
||||
|
||||
fn into_native(self) -> Self::Native {
|
||||
self.val()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! define_logical_primitive_type {
|
||||
($Native: ident, $TypeId: ident, $DataType: ident) => {
|
||||
// We need to define it as an empty struct `struct DataType {}` instead of a struct-unit
|
||||
// `struct DataType;` to ensure the serialized JSON string is compatible with previous
|
||||
// implementation.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct $DataType {}
|
||||
|
||||
impl LogicalPrimitiveType for $DataType {
|
||||
type ArrowPrimitive = arrow::datatypes::$DataType;
|
||||
type Native = $Native;
|
||||
type Wrapper = $Native;
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::$TypeId($DataType::default())
|
||||
}
|
||||
|
||||
fn type_name() -> &'static str {
|
||||
stringify!($TypeId)
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveVector<$DataType>> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveVector<$DataType>>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast {} to vector of primitive type {}",
|
||||
vector.vector_type_name(),
|
||||
stringify!($TypeId)
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<$Native>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::$TypeId(v) => Ok(Some(v.into())),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast value {:?} to primitive type {}",
|
||||
other,
|
||||
stringify!($TypeId),
|
||||
),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_numeric {
|
||||
($Type:ident, $TypeId:ident) => {
|
||||
impl DataType for PrimitiveType<$Type> {
|
||||
macro_rules! define_non_timestamp_primitive {
|
||||
($Native: ident, $TypeId: ident, $DataType: ident) => {
|
||||
define_logical_primitive_type!($Native, $TypeId, $DataType);
|
||||
|
||||
impl DataType for $DataType {
|
||||
fn name(&self) -> &str {
|
||||
stringify!($TypeId)
|
||||
}
|
||||
@@ -120,7 +253,7 @@ macro_rules! impl_numeric {
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
$Type::default().into()
|
||||
$Native::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
@@ -128,61 +261,98 @@ macro_rules! impl_numeric {
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(PrimitiveVectorBuilder::<$Type>::with_capacity(capacity))
|
||||
Box::new(PrimitiveVectorBuilder::<$DataType>::with_capacity(capacity))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for PrimitiveType<$Type> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", self.name())
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PrimitiveType<$Type> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_primitive_element!($Type, $TypeId);
|
||||
|
||||
paste! {
|
||||
pub type [<$TypeId Type>]=PrimitiveType<$Type>;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_numeric!(u8, UInt8);
|
||||
impl_numeric!(u16, UInt16);
|
||||
impl_numeric!(u32, UInt32);
|
||||
impl_numeric!(u64, UInt64);
|
||||
impl_numeric!(i8, Int8);
|
||||
impl_numeric!(i16, Int16);
|
||||
impl_numeric!(i32, Int32);
|
||||
impl_numeric!(i64, Int64);
|
||||
impl_numeric!(f32, Float32);
|
||||
impl_numeric!(f64, Float64);
|
||||
define_non_timestamp_primitive!(u8, UInt8, UInt8Type);
|
||||
define_non_timestamp_primitive!(u16, UInt16, UInt16Type);
|
||||
define_non_timestamp_primitive!(u32, UInt32, UInt32Type);
|
||||
define_non_timestamp_primitive!(u64, UInt64, UInt64Type);
|
||||
define_non_timestamp_primitive!(i8, Int8, Int8Type);
|
||||
define_non_timestamp_primitive!(i16, Int16, Int16Type);
|
||||
define_non_timestamp_primitive!(i32, Int32, Int32Type);
|
||||
define_non_timestamp_primitive!(f32, Float32, Float32Type);
|
||||
define_non_timestamp_primitive!(f64, Float64, Float64Type);
|
||||
|
||||
// Timestamp primitive:
|
||||
define_logical_primitive_type!(i64, Int64, Int64Type);
|
||||
|
||||
impl DataType for Int64Type {
|
||||
fn name(&self) -> &str {
|
||||
"Int64"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Int64
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::Int64(0)
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Int64
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(PrimitiveVectorBuilder::<Int64Type>::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BinaryHeap;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_eq() {
|
||||
assert_eq!(UInt8Type::default(), UInt8Type::default());
|
||||
assert_eq!(UInt16Type::default(), UInt16Type::default());
|
||||
assert_eq!(UInt32Type::default(), UInt32Type::default());
|
||||
assert_eq!(UInt64Type::default(), UInt64Type::default());
|
||||
assert_eq!(Int8Type::default(), Int8Type::default());
|
||||
assert_eq!(Int16Type::default(), Int16Type::default());
|
||||
assert_eq!(Int32Type::default(), Int32Type::default());
|
||||
assert_eq!(Int64Type::default(), Int64Type::default());
|
||||
assert_eq!(Float32Type::default(), Float32Type::default());
|
||||
assert_eq!(Float64Type::default(), Float64Type::default());
|
||||
fn test_ord_primitive() {
|
||||
struct Foo<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
{
|
||||
heap: BinaryHeap<OrdPrimitive<T>>,
|
||||
}
|
||||
|
||||
assert_ne!(Float32Type::default(), Float64Type::default());
|
||||
assert_ne!(Float32Type::default(), Int32Type::default());
|
||||
impl<T> Foo<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
let value = OrdPrimitive::<T>(value);
|
||||
self.heap.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! test {
|
||||
($Type:ident) => {
|
||||
let mut foo = Foo::<$Type> {
|
||||
heap: BinaryHeap::new(),
|
||||
};
|
||||
foo.push($Type::default());
|
||||
};
|
||||
}
|
||||
|
||||
test!(u8);
|
||||
test!(u16);
|
||||
test!(u32);
|
||||
test!(u64);
|
||||
test!(i8);
|
||||
test!(i16);
|
||||
test!(i32);
|
||||
test!(i64);
|
||||
test!(f32);
|
||||
test!(f64);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,9 +18,10 @@ use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::StringBytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{DataTypeRef, LogicalTypeId, Value};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::prelude::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{MutableVector, StringVectorBuilder};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
@@ -52,4 +53,8 @@ impl DataType for StringType {
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(StringVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,6 +110,7 @@ impl Value {
|
||||
/// # Panics
|
||||
/// Panics if the data type is not supported.
|
||||
pub fn data_type(&self) -> ConcreteDataType {
|
||||
// TODO(yingwen): Implement this once all data types are implemented.
|
||||
match self {
|
||||
Value::Null => ConcreteDataType::null_datatype(),
|
||||
Value::Boolean(_) => ConcreteDataType::boolean_datatype(),
|
||||
@@ -125,10 +126,10 @@ impl Value {
|
||||
Value::Float64(_) => ConcreteDataType::float64_datatype(),
|
||||
Value::String(_) => ConcreteDataType::string_datatype(),
|
||||
Value::Binary(_) => ConcreteDataType::binary_datatype(),
|
||||
Value::List(list) => ConcreteDataType::list_datatype(list.datatype().clone()),
|
||||
Value::Date(_) => ConcreteDataType::date_datatype(),
|
||||
Value::DateTime(_) => ConcreteDataType::datetime_datatype(),
|
||||
Value::Timestamp(v) => ConcreteDataType::timestamp_datatype(v.unit()),
|
||||
Value::List(list) => ConcreteDataType::list_datatype(list.datatype().clone()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,7 +194,12 @@ impl Value {
|
||||
Value::List(_) => LogicalTypeId::List,
|
||||
Value::Date(_) => LogicalTypeId::Date,
|
||||
Value::DateTime(_) => LogicalTypeId::DateTime,
|
||||
Value::Timestamp(_) => LogicalTypeId::Timestamp,
|
||||
Value::Timestamp(t) => match t.unit() {
|
||||
TimeUnit::Second => LogicalTypeId::TimestampSecond,
|
||||
TimeUnit::Millisecond => LogicalTypeId::TimestampMillisecond,
|
||||
TimeUnit::Microsecond => LogicalTypeId::TimestampMicrosecond,
|
||||
TimeUnit::Nanosecond => LogicalTypeId::TimestampNanosecond,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -277,6 +283,9 @@ impl_value_from!(Float32, f32);
|
||||
impl_value_from!(Float64, f64);
|
||||
impl_value_from!(String, StringBytes);
|
||||
impl_value_from!(Binary, Bytes);
|
||||
impl_value_from!(Date, Date);
|
||||
impl_value_from!(DateTime, DateTime);
|
||||
impl_value_from!(Timestamp, Timestamp);
|
||||
|
||||
impl From<String> for Value {
|
||||
fn from(string: String) -> Value {
|
||||
@@ -296,12 +305,6 @@ impl From<Vec<u8>> for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Timestamp> for Value {
|
||||
fn from(v: Timestamp) -> Self {
|
||||
Value::Timestamp(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&[u8]> for Value {
|
||||
fn from(bytes: &[u8]) -> Value {
|
||||
Value::Binary(bytes.into())
|
||||
@@ -337,6 +340,7 @@ impl TryFrom<Value> for serde_json::Value {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(yingwen): Consider removing the `datatype` field from `ListValue`.
|
||||
/// List value.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ListValue {
|
||||
@@ -391,6 +395,7 @@ impl TryFrom<ScalarValue> for Value {
|
||||
|
||||
fn try_from(v: ScalarValue) -> Result<Self> {
|
||||
let v = match v {
|
||||
ScalarValue::Null => Value::Null,
|
||||
ScalarValue::Boolean(b) => Value::from(b),
|
||||
ScalarValue::Float32(f) => Value::from(f),
|
||||
ScalarValue::Float64(f) => Value::from(f),
|
||||
@@ -405,8 +410,10 @@ impl TryFrom<ScalarValue> for Value {
|
||||
ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) => {
|
||||
Value::from(s.map(StringBytes::from))
|
||||
}
|
||||
ScalarValue::Binary(b) | ScalarValue::LargeBinary(b) => Value::from(b.map(Bytes::from)),
|
||||
ScalarValue::List(vs, t) => {
|
||||
ScalarValue::Binary(b)
|
||||
| ScalarValue::LargeBinary(b)
|
||||
| ScalarValue::FixedSizeBinary(_, b) => Value::from(b.map(Bytes::from)),
|
||||
ScalarValue::List(vs, field) => {
|
||||
let items = if let Some(vs) = vs {
|
||||
let vs = vs
|
||||
.into_iter()
|
||||
@@ -416,7 +423,7 @@ impl TryFrom<ScalarValue> for Value {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let datatype = t.as_ref().try_into()?;
|
||||
let datatype = ConcreteDataType::try_from(field.data_type())?;
|
||||
Value::List(ListValue::new(items, datatype))
|
||||
}
|
||||
ScalarValue::Date32(d) => d.map(|x| Value::Date(Date::new(x))).unwrap_or(Value::Null),
|
||||
@@ -435,7 +442,13 @@ impl TryFrom<ScalarValue> for Value {
|
||||
ScalarValue::TimestampNanosecond(t, _) => t
|
||||
.map(|x| Value::Timestamp(Timestamp::new(x, TimeUnit::Nanosecond)))
|
||||
.unwrap_or(Value::Null),
|
||||
_ => {
|
||||
ScalarValue::Decimal128(_, _, _)
|
||||
| ScalarValue::Time64(_)
|
||||
| ScalarValue::IntervalYearMonth(_)
|
||||
| ScalarValue::IntervalDayTime(_)
|
||||
| ScalarValue::IntervalMonthDayNano(_)
|
||||
| ScalarValue::Struct(_, _)
|
||||
| ScalarValue::Dictionary(_, _) => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
arrow_type: v.get_datatype(),
|
||||
}
|
||||
@@ -545,15 +558,6 @@ impl<'a> Ord for ValueRef<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A helper trait to convert copyable types to `ValueRef`.
|
||||
///
|
||||
/// It could replace the usage of `Into<ValueRef<'a>>`, thus avoid confusion between `Into<Value>`
|
||||
/// and `Into<ValueRef<'a>>` in generic codes. One typical usage is the [`Primitive`](crate::primitive_traits::Primitive) trait.
|
||||
pub trait IntoValueRef<'a> {
|
||||
/// Convert itself to [ValueRef].
|
||||
fn into_value_ref(self) -> ValueRef<'a>;
|
||||
}
|
||||
|
||||
macro_rules! impl_value_ref_from {
|
||||
($Variant:ident, $Type:ident) => {
|
||||
impl From<$Type> for ValueRef<'_> {
|
||||
@@ -562,12 +566,6 @@ macro_rules! impl_value_ref_from {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoValueRef<'a> for $Type {
|
||||
fn into_value_ref(self) -> ValueRef<'a> {
|
||||
ValueRef::$Variant(self.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Option<$Type>> for ValueRef<'_> {
|
||||
fn from(value: Option<$Type>) -> Self {
|
||||
match value {
|
||||
@@ -576,15 +574,6 @@ macro_rules! impl_value_ref_from {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoValueRef<'a> for Option<$Type> {
|
||||
fn into_value_ref(self) -> ValueRef<'a> {
|
||||
match self {
|
||||
Some(v) => ValueRef::$Variant(v.into()),
|
||||
None => ValueRef::Null,
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -599,6 +588,9 @@ impl_value_ref_from!(Int32, i32);
|
||||
impl_value_ref_from!(Int64, i64);
|
||||
impl_value_ref_from!(Float32, f32);
|
||||
impl_value_ref_from!(Float64, f64);
|
||||
impl_value_ref_from!(Date, Date);
|
||||
impl_value_ref_from!(DateTime, DateTime);
|
||||
impl_value_ref_from!(Timestamp, Timestamp);
|
||||
|
||||
impl<'a> From<&'a str> for ValueRef<'a> {
|
||||
fn from(string: &'a str) -> ValueRef<'a> {
|
||||
@@ -628,6 +620,7 @@ impl<'a> From<Option<ListValueRef<'a>>> for ValueRef<'a> {
|
||||
/// if it becomes bottleneck.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum ListValueRef<'a> {
|
||||
// TODO(yingwen): Consider replace this by VectorRef.
|
||||
Indexed { vector: &'a ListVector, idx: usize },
|
||||
Ref { val: &'a ListValue },
|
||||
}
|
||||
@@ -785,19 +778,16 @@ mod tests {
|
||||
Some(Box::new(vec![Value::Int32(1), Value::Null])),
|
||||
ConcreteDataType::int32_datatype()
|
||||
)),
|
||||
ScalarValue::List(
|
||||
Some(Box::new(vec![
|
||||
ScalarValue::Int32(Some(1)),
|
||||
ScalarValue::Int32(None)
|
||||
])),
|
||||
Box::new(ArrowDataType::Int32)
|
||||
ScalarValue::new_list(
|
||||
Some(vec![ScalarValue::Int32(Some(1)), ScalarValue::Int32(None)]),
|
||||
ArrowDataType::Int32,
|
||||
)
|
||||
.try_into()
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(None, ConcreteDataType::uint32_datatype())),
|
||||
ScalarValue::List(None, Box::new(ArrowDataType::UInt32))
|
||||
ScalarValue::new_list(None, ArrowDataType::UInt32)
|
||||
.try_into()
|
||||
.unwrap()
|
||||
);
|
||||
@@ -980,6 +970,10 @@ mod tests {
|
||||
ConcreteDataType::int32_datatype(),
|
||||
)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::list_datatype(ConcreteDataType::null_datatype()),
|
||||
&Value::List(ListValue::default()),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::date_datatype(),
|
||||
&Value::Date(Date::new(1)),
|
||||
@@ -989,7 +983,7 @@ mod tests {
|
||||
&Value::DateTime(DateTime::new(1)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::timestamp_millis_datatype(),
|
||||
&ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
&Value::Timestamp(Timestamp::from_millis(1)),
|
||||
);
|
||||
}
|
||||
@@ -1208,59 +1202,6 @@ mod tests {
|
||||
assert!(wrong_value.as_list().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_into_value_ref() {
|
||||
macro_rules! check_into_value_ref {
|
||||
($Variant: ident, $data: expr, $PrimitiveType: ident, $Wrapper: ident) => {
|
||||
let data: $PrimitiveType = $data;
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
data.into_value_ref()
|
||||
);
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
ValueRef::from(data)
|
||||
);
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
Some(data).into_value_ref()
|
||||
);
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
ValueRef::from(Some(data))
|
||||
);
|
||||
let x: Option<$PrimitiveType> = None;
|
||||
assert_eq!(ValueRef::Null, x.into_value_ref());
|
||||
assert_eq!(ValueRef::Null, x.into());
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! check_primitive_into_value_ref {
|
||||
($Variant: ident, $data: expr, $PrimitiveType: ident) => {
|
||||
check_into_value_ref!($Variant, $data, $PrimitiveType, $PrimitiveType)
|
||||
};
|
||||
}
|
||||
|
||||
check_primitive_into_value_ref!(Boolean, true, bool);
|
||||
check_primitive_into_value_ref!(UInt8, 10, u8);
|
||||
check_primitive_into_value_ref!(UInt16, 20, u16);
|
||||
check_primitive_into_value_ref!(UInt32, 30, u32);
|
||||
check_primitive_into_value_ref!(UInt64, 40, u64);
|
||||
check_primitive_into_value_ref!(Int8, -10, i8);
|
||||
check_primitive_into_value_ref!(Int16, -20, i16);
|
||||
check_primitive_into_value_ref!(Int32, -30, i32);
|
||||
check_primitive_into_value_ref!(Int64, -40, i64);
|
||||
check_into_value_ref!(Float32, 10.0, f32, OrderedF32);
|
||||
check_into_value_ref!(Float64, 10.0, f64, OrderedF64);
|
||||
|
||||
let hello = "hello";
|
||||
assert_eq!(
|
||||
ValueRef::Binary(hello.as_bytes()),
|
||||
ValueRef::from(hello.as_bytes())
|
||||
);
|
||||
assert_eq!(ValueRef::String(hello), ValueRef::from(hello));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
assert_eq!(Value::Null.to_string(), "Null");
|
||||
@@ -1301,10 +1242,34 @@ mod tests {
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(vec![])),
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
))
|
||||
.to_string(),
|
||||
"Timestamp[]"
|
||||
"TimestampSecondType[]"
|
||||
);
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(vec![])),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
))
|
||||
.to_string(),
|
||||
"TimestampMillisecondType[]"
|
||||
);
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(vec![])),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
))
|
||||
.to_string(),
|
||||
"TimestampMicrosecondType[]"
|
||||
);
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(vec![])),
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
))
|
||||
.to_string(),
|
||||
"TimestampNanosecondType[]"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,68 +12,59 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod binary;
|
||||
pub mod boolean;
|
||||
mod builder;
|
||||
pub mod constant;
|
||||
pub mod date;
|
||||
pub mod datetime;
|
||||
mod eq;
|
||||
mod helper;
|
||||
mod list;
|
||||
pub mod mutable;
|
||||
pub mod null;
|
||||
mod operations;
|
||||
pub mod primitive;
|
||||
mod string;
|
||||
mod timestamp;
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use arrow::bitmap::Bitmap;
|
||||
pub use binary::*;
|
||||
pub use boolean::*;
|
||||
pub use builder::VectorBuilder;
|
||||
pub use constant::*;
|
||||
pub use date::*;
|
||||
pub use datetime::*;
|
||||
pub use helper::Helper;
|
||||
pub use list::*;
|
||||
pub use mutable::MutableVector;
|
||||
pub use null::*;
|
||||
pub use operations::VectorOp;
|
||||
pub use primitive::*;
|
||||
use snafu::ensure;
|
||||
pub use string::*;
|
||||
pub use timestamp::*;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::operations::VectorOp;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Validity<'a> {
|
||||
/// Whether the array slot is valid or not (null).
|
||||
Slots(&'a Bitmap),
|
||||
/// All slots are valid.
|
||||
AllValid,
|
||||
/// All slots are null.
|
||||
AllNull,
|
||||
}
|
||||
mod binary;
|
||||
mod boolean;
|
||||
mod constant;
|
||||
mod date;
|
||||
mod datetime;
|
||||
mod eq;
|
||||
mod helper;
|
||||
mod list;
|
||||
mod null;
|
||||
mod operations;
|
||||
mod primitive;
|
||||
mod string;
|
||||
mod timestamp;
|
||||
mod validity;
|
||||
|
||||
impl<'a> Validity<'a> {
|
||||
pub fn slots(&self) -> Option<&Bitmap> {
|
||||
match self {
|
||||
Validity::Slots(bitmap) => Some(bitmap),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
pub use binary::{BinaryVector, BinaryVectorBuilder};
|
||||
pub use boolean::{BooleanVector, BooleanVectorBuilder};
|
||||
pub use constant::ConstantVector;
|
||||
pub use date::{DateVector, DateVectorBuilder};
|
||||
pub use datetime::{DateTimeVector, DateTimeVectorBuilder};
|
||||
pub use helper::Helper;
|
||||
pub use list::{ListIter, ListVector, ListVectorBuilder};
|
||||
pub use null::{NullVector, NullVectorBuilder};
|
||||
pub use primitive::{
|
||||
Float32Vector, Float32VectorBuilder, Float64Vector, Float64VectorBuilder, Int16Vector,
|
||||
Int16VectorBuilder, Int32Vector, Int32VectorBuilder, Int64Vector, Int64VectorBuilder,
|
||||
Int8Vector, Int8VectorBuilder, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder,
|
||||
UInt16Vector, UInt16VectorBuilder, UInt32Vector, UInt32VectorBuilder, UInt64Vector,
|
||||
UInt64VectorBuilder, UInt8Vector, UInt8VectorBuilder,
|
||||
};
|
||||
pub use string::{StringVector, StringVectorBuilder};
|
||||
pub use timestamp::{
|
||||
TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder, TimestampMillisecondVector,
|
||||
TimestampMillisecondVectorBuilder, TimestampNanosecondVector, TimestampNanosecondVectorBuilder,
|
||||
TimestampSecondVector, TimestampSecondVectorBuilder,
|
||||
};
|
||||
pub use validity::Validity;
|
||||
|
||||
// TODO(yingwen): arrow 28.0 implements Clone for all arrays, we could upgrade to it and simplify
|
||||
// some codes in methods such as `to_arrow_array()` and `to_boxed_arrow_array()`.
|
||||
/// Vector of data values.
|
||||
pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
|
||||
/// Returns the data type of the vector.
|
||||
@@ -110,13 +101,7 @@ pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
|
||||
/// The number of null slots on this [`Vector`].
|
||||
/// # Implementation
|
||||
/// This is `O(1)`.
|
||||
fn null_count(&self) -> usize {
|
||||
match self.validity() {
|
||||
Validity::Slots(bitmap) => bitmap.null_count(),
|
||||
Validity::AllValid => 0,
|
||||
Validity::AllNull => self.len(),
|
||||
}
|
||||
}
|
||||
fn null_count(&self) -> usize;
|
||||
|
||||
/// Returns true when it's a ConstantColumn
|
||||
fn is_const(&self) -> bool {
|
||||
@@ -165,6 +150,42 @@ pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
|
||||
|
||||
pub type VectorRef = Arc<dyn Vector>;
|
||||
|
||||
/// Mutable vector that could be used to build an immutable vector.
|
||||
pub trait MutableVector: Send + Sync {
|
||||
/// Returns the data type of the vector.
|
||||
fn data_type(&self) -> ConcreteDataType;
|
||||
|
||||
/// Returns the length of the vector.
|
||||
fn len(&self) -> usize;
|
||||
|
||||
/// Returns whether the vector is empty.
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Convert to Any, to enable dynamic casting.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
|
||||
/// Convert to mutable Any, to enable dynamic casting.
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any;
|
||||
|
||||
/// Convert `self` to an (immutable) [VectorRef] and reset `self`.
|
||||
fn to_vector(&mut self) -> VectorRef;
|
||||
|
||||
/// Push value ref to this mutable vector.
|
||||
///
|
||||
/// Returns error if data type unmatch.
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()>;
|
||||
|
||||
/// Extend this mutable vector by slice of `vector`.
|
||||
///
|
||||
/// Returns error if data type unmatch.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `offset + length > vector.len()`.
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()>;
|
||||
}
|
||||
|
||||
/// Helper to define `try_from_arrow_array(array: arrow::array::ArrayRef)` function.
|
||||
macro_rules! impl_try_from_arrow_array_for_vector {
|
||||
($Array: ident, $Vector: ident) => {
|
||||
@@ -172,16 +193,20 @@ macro_rules! impl_try_from_arrow_array_for_vector {
|
||||
pub fn try_from_arrow_array(
|
||||
array: impl AsRef<dyn arrow::array::Array>,
|
||||
) -> crate::error::Result<$Vector> {
|
||||
Ok($Vector::from(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<$Array>()
|
||||
.with_context(|| crate::error::ConversionSnafu {
|
||||
from: std::format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
))
|
||||
use snafu::OptionExt;
|
||||
|
||||
let data = array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<$Array>()
|
||||
.with_context(|| crate::error::ConversionSnafu {
|
||||
from: std::format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.data()
|
||||
.clone();
|
||||
|
||||
let concrete_array = $Array::from(data);
|
||||
Ok($Vector::from(concrete_array))
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -189,10 +214,7 @@ macro_rules! impl_try_from_arrow_array_for_vector {
|
||||
|
||||
macro_rules! impl_validity_for_vector {
|
||||
($array: expr) => {
|
||||
match $array.validity() {
|
||||
Some(bitmap) => Validity::Slots(bitmap),
|
||||
None => Validity::AllValid,
|
||||
}
|
||||
Validity::from_array_data($array.data())
|
||||
};
|
||||
}
|
||||
|
||||
@@ -219,10 +241,11 @@ macro_rules! impl_get_ref_for_vector {
|
||||
}
|
||||
|
||||
macro_rules! impl_extend_for_builder {
|
||||
($mutable_array: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
|
||||
($mutable_vector: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
|
||||
use snafu::OptionExt;
|
||||
|
||||
let concrete_vector = $vector
|
||||
let sliced_vector = $vector.slice($offset, $length);
|
||||
let concrete_vector = sliced_vector
|
||||
.as_any()
|
||||
.downcast_ref::<$VectorType>()
|
||||
.with_context(|| crate::error::CastTypeSnafu {
|
||||
@@ -232,8 +255,9 @@ macro_rules! impl_extend_for_builder {
|
||||
stringify!($VectorType)
|
||||
),
|
||||
})?;
|
||||
let slice = concrete_vector.array.slice($offset, $length);
|
||||
$mutable_array.extend_trusted_len(slice.iter());
|
||||
for value in concrete_vector.iter_data() {
|
||||
$mutable_vector.push(value);
|
||||
}
|
||||
Ok(())
|
||||
}};
|
||||
}
|
||||
@@ -245,27 +269,27 @@ pub(crate) use {
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use arrow::array::{Array, PrimitiveArray};
|
||||
use arrow::array::{Array, Int32Array, UInt8Array};
|
||||
use serde_json;
|
||||
|
||||
use super::helper::Helper;
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::PrimitiveElement;
|
||||
use crate::types::{Int32Type, LogicalPrimitiveType};
|
||||
use crate::vectors::helper::Helper;
|
||||
|
||||
#[test]
|
||||
fn test_df_columns_to_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1, 2, 3]));
|
||||
let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
|
||||
let vector = Helper::try_into_vector(df_column).unwrap();
|
||||
assert_eq!(
|
||||
i32::build_data_type().as_arrow_type(),
|
||||
Int32Type::build_data_type().as_arrow_type(),
|
||||
vector.data_type().as_arrow_type()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_i32_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::<i32>::from_slice(vec![1, 2, 3]));
|
||||
let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
|
||||
let json_value = Helper::try_into_vector(df_column)
|
||||
.unwrap()
|
||||
.serialize_to_json()
|
||||
@@ -275,7 +299,7 @@ pub mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_serialize_i8_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1u8, 2u8, 3u8]));
|
||||
let df_column: Arc<dyn Array> = Arc::new(UInt8Array::from(vec![1, 2, 3]));
|
||||
let json_value = Helper::try_into_vector(df_column)
|
||||
.unwrap()
|
||||
.serialize_to_json()
|
||||
|
||||
@@ -15,9 +15,8 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use arrow::array::{ArrayIter, GenericByteArray};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use arrow::array::{Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::arrow_array::{BinaryArray, MutableBinaryArray};
|
||||
use crate::data_type::ConcreteDataType;
|
||||
@@ -37,6 +36,16 @@ impl BinaryVector {
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> BinaryVector {
|
||||
BinaryVector {
|
||||
array: BinaryArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BinaryArray> for BinaryVector {
|
||||
@@ -48,7 +57,7 @@ impl From<BinaryArray> for BinaryVector {
|
||||
impl From<Vec<Option<Vec<u8>>>> for BinaryVector {
|
||||
fn from(data: Vec<Option<Vec<u8>>>) -> Self {
|
||||
Self {
|
||||
array: BinaryArray::from(data),
|
||||
array: BinaryArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -71,11 +80,13 @@ impl Vector for BinaryVector {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Arc::new(BinaryArray::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Box::new(BinaryArray::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
@@ -83,7 +94,11 @@ impl Vector for BinaryVector {
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.values().len() + self.array.offsets().len() * std::mem::size_of::<i64>()
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
@@ -91,7 +106,8 @@ impl Vector for BinaryVector {
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self::from(self.array.slice(offset, length)))
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
@@ -148,12 +164,15 @@ impl MutableVector for BinaryVectorBuilder {
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.mutable_array.push(value.as_binary()?);
|
||||
match value.as_binary()? {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self.mutable_array, vector, BinaryVector, offset, length)
|
||||
vectors::impl_extend_for_builder!(self, vector, BinaryVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -162,17 +181,20 @@ impl ScalarVectorBuilder for BinaryVectorBuilder {
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutableBinaryArray::with_capacity(capacity),
|
||||
mutable_array: MutableBinaryArray::with_capacity(capacity, 0),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.mutable_array.push(value);
|
||||
match value {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
BinaryVector {
|
||||
array: std::mem::take(&mut self.mutable_array).into(),
|
||||
array: self.mutable_array.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -205,14 +227,17 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_binary_vector_misc() {
|
||||
let v = BinaryVector::from(BinaryArray::from_slice(&[vec![1, 2, 3], vec![1, 2, 3]]));
|
||||
let v = BinaryVector::from(BinaryArray::from_iter_values(&[
|
||||
vec![1, 2, 3],
|
||||
vec![1, 2, 3],
|
||||
]));
|
||||
|
||||
assert_eq!(2, v.len());
|
||||
assert_eq!("BinaryVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert_eq!(Validity::AllValid, v.validity());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(30, v.memory_size());
|
||||
assert_eq!(128, v.memory_size());
|
||||
|
||||
for i in 0..2 {
|
||||
assert!(!v.is_null(i));
|
||||
@@ -227,7 +252,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_serialize_binary_vector_to_json() {
|
||||
let vector = BinaryVector::from(BinaryArray::from_slice(&[vec![1, 2, 3], vec![1, 2, 3]]));
|
||||
let vector = BinaryVector::from(BinaryArray::from_iter_values(&[
|
||||
vec![1, 2, 3],
|
||||
vec![1, 2, 3],
|
||||
]));
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
@@ -253,8 +281,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let arrow_array = BinaryArray::from_slice(&[vec![1, 2, 3], vec![1, 2, 3]]);
|
||||
let original = arrow_array.clone();
|
||||
let arrow_array = BinaryArray::from_iter_values(&[vec![1, 2, 3], vec![1, 2, 3]]);
|
||||
let original = BinaryArray::from(arrow_array.data().clone());
|
||||
let vector = BinaryVector::from(arrow_array);
|
||||
assert_eq!(original, vector.array);
|
||||
}
|
||||
@@ -289,7 +317,7 @@ mod tests {
|
||||
builder.push(Some(b"world"));
|
||||
let vector = builder.finish();
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert_eq!(Validity::AllValid, vector.validity());
|
||||
assert!(vector.validity().is_all_valid());
|
||||
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(b"hello"));
|
||||
@@ -298,9 +326,10 @@ mod tests {
|
||||
let vector = builder.finish();
|
||||
assert_eq!(1, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
let slots = validity.slots().unwrap();
|
||||
assert_eq!(1, slots.null_count());
|
||||
assert!(!slots.get_bit(1));
|
||||
assert!(!validity.is_set(1));
|
||||
|
||||
assert_eq!(1, validity.null_count());
|
||||
assert!(!validity.is_set(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -16,9 +16,10 @@ use std::any::Any;
|
||||
use std::borrow::Borrow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, BooleanArray, MutableArray, MutableBooleanArray};
|
||||
use arrow::bitmap::utils::{BitmapIter, ZipValidity};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use arrow::array::{
|
||||
Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef, BooleanArray, BooleanBuilder,
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::Result;
|
||||
@@ -41,12 +42,26 @@ impl BooleanVector {
|
||||
pub(crate) fn as_boolean_array(&self) -> &BooleanArray {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> BooleanVector {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from(data),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn false_count(&self) -> usize {
|
||||
self.array.false_count()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<bool>> for BooleanVector {
|
||||
fn from(data: Vec<bool>) -> Self {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from_slice(&data),
|
||||
array: BooleanArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -91,11 +106,13 @@ impl Vector for BooleanVector {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Arc::new(BooleanArray::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Box::new(BooleanArray::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
@@ -103,7 +120,11 @@ impl Vector for BooleanVector {
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.values().as_slice().0.len()
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
@@ -111,7 +132,8 @@ impl Vector for BooleanVector {
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self::from(self.array.slice(offset, length)))
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
@@ -126,7 +148,7 @@ impl Vector for BooleanVector {
|
||||
impl ScalarVector for BooleanVector {
|
||||
type OwnedItem = bool;
|
||||
type RefItem<'a> = bool;
|
||||
type Iter<'a> = ZipValidity<'a, bool, BitmapIter<'a>>;
|
||||
type Iter<'a> = ArrayIter<&'a BooleanArray>;
|
||||
type Builder = BooleanVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
@@ -143,7 +165,7 @@ impl ScalarVector for BooleanVector {
|
||||
}
|
||||
|
||||
pub struct BooleanVectorBuilder {
|
||||
mutable_array: MutableBooleanArray,
|
||||
mutable_array: BooleanBuilder,
|
||||
}
|
||||
|
||||
impl MutableVector for BooleanVectorBuilder {
|
||||
@@ -168,12 +190,15 @@ impl MutableVector for BooleanVectorBuilder {
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.mutable_array.push(value.as_boolean()?);
|
||||
match value.as_boolean()? {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self.mutable_array, vector, BooleanVector, offset, length)
|
||||
vectors::impl_extend_for_builder!(self, vector, BooleanVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -182,17 +207,20 @@ impl ScalarVectorBuilder for BooleanVectorBuilder {
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutableBooleanArray::with_capacity(capacity),
|
||||
mutable_array: BooleanBuilder::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.mutable_array.push(value);
|
||||
match value {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
BooleanVector {
|
||||
array: std::mem::take(&mut self.mutable_array).into(),
|
||||
array: self.mutable_array.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -225,9 +253,9 @@ mod tests {
|
||||
assert_eq!(9, v.len());
|
||||
assert_eq!("BooleanVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert_eq!(Validity::AllValid, v.validity());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(2, v.memory_size());
|
||||
assert_eq!(64, v.memory_size());
|
||||
|
||||
for (i, b) in bools.iter().enumerate() {
|
||||
assert!(!v.is_null(i));
|
||||
@@ -316,13 +344,12 @@ mod tests {
|
||||
let vector = BooleanVector::from(vec![Some(true), None, Some(false)]);
|
||||
assert_eq!(1, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
let slots = validity.slots().unwrap();
|
||||
assert_eq!(1, slots.null_count());
|
||||
assert!(!slots.get_bit(1));
|
||||
assert_eq!(1, validity.null_count());
|
||||
assert!(!validity.is_set(1));
|
||||
|
||||
let vector = BooleanVector::from(vec![true, false, false]);
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert_eq!(Validity::AllValid, vector.validity());
|
||||
assert!(vector.validity().is_all_valid());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -55,6 +55,27 @@ impl ConstantVector {
|
||||
pub fn get_constant_ref(&self) -> ValueRef {
|
||||
self.vector.get_ref(0)
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_vector(&self, offsets: &[usize]) -> VectorRef {
|
||||
assert_eq!(offsets.len(), self.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return self.slice(0, 0);
|
||||
}
|
||||
|
||||
Arc::new(ConstantVector::new(
|
||||
self.vector.clone(),
|
||||
*offsets.last().unwrap(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn filter_vector(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
let length = self.len() - filter.false_count();
|
||||
if length == self.len() {
|
||||
return Ok(Arc::new(self.clone()));
|
||||
}
|
||||
Ok(Arc::new(ConstantVector::new(self.inner().clone(), length)))
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for ConstantVector {
|
||||
@@ -90,9 +111,9 @@ impl Vector for ConstantVector {
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
if self.vector.is_null(0) {
|
||||
Validity::AllNull
|
||||
Validity::all_null(self.length)
|
||||
} else {
|
||||
Validity::AllValid
|
||||
Validity::all_valid(self.length)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,6 +143,14 @@ impl Vector for ConstantVector {
|
||||
fn get_ref(&self, _index: usize) -> ValueRef {
|
||||
self.vector.get_ref(0)
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
if self.only_null() {
|
||||
self.len()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ConstantVector {
|
||||
@@ -140,33 +169,6 @@ impl Serializable for ConstantVector {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_constant(vector: &ConstantVector, offsets: &[usize]) -> VectorRef {
|
||||
assert_eq!(offsets.len(), vector.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return vector.slice(0, 0);
|
||||
}
|
||||
|
||||
Arc::new(ConstantVector::new(
|
||||
vector.vector.clone(),
|
||||
*offsets.last().unwrap(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn filter_constant(
|
||||
vector: &ConstantVector,
|
||||
filter: &BooleanVector,
|
||||
) -> Result<VectorRef> {
|
||||
let length = filter.len() - filter.as_boolean_array().values().null_count();
|
||||
if length == vector.len() {
|
||||
return Ok(Arc::new(vector.clone()));
|
||||
}
|
||||
Ok(Arc::new(ConstantVector::new(
|
||||
vector.inner().clone(),
|
||||
length,
|
||||
)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
@@ -182,9 +184,9 @@ mod tests {
|
||||
assert_eq!("ConstantVector", c.vector_type_name());
|
||||
assert!(c.is_const());
|
||||
assert_eq!(10, c.len());
|
||||
assert_eq!(Validity::AllValid, c.validity());
|
||||
assert!(c.validity().is_all_valid());
|
||||
assert!(!c.only_null());
|
||||
assert_eq!(4, c.memory_size());
|
||||
assert_eq!(64, c.memory_size());
|
||||
|
||||
for i in 0..10 {
|
||||
assert!(!c.is_null(i));
|
||||
|
||||
@@ -12,258 +12,28 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
use crate::types::DateType;
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
use arrow::array::{Array, ArrayRef, PrimitiveArray};
|
||||
use common_time::date::Date;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::prelude::*;
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::vectors::{MutableVector, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct DateVector {
|
||||
array: PrimitiveVector<i32>,
|
||||
}
|
||||
|
||||
impl DateVector {
|
||||
pub fn new(array: PrimitiveArray<i32>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector { array },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
Ok(Self::new(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<i32>>()
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
self.array.as_arrow()
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for DateVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::date_datatype()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"DateVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Arc::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Date32,
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Box::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Date32,
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
self.array.validity()
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.memory_size()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self {
|
||||
array: PrimitiveVector::new(self.array.array.slice(offset, length)),
|
||||
})
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
match self.array.get(index) {
|
||||
Value::Int32(v) => Value::Date(Date::new(v)),
|
||||
Value::Null => Value::Null,
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
match self.array.get(index) {
|
||||
Value::Int32(v) => ValueRef::Date(Date::new(v)),
|
||||
Value::Null => ValueRef::Null,
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<i32>>> for DateVector {
|
||||
fn from(data: Vec<Option<i32>>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector::<i32>::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DateIter<'a> {
|
||||
iter: PrimitiveIter<'a, i32>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for DateIter<'a> {
|
||||
type Item = Option<Date>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|v| v.map(Date::new))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for DateVector {
|
||||
type OwnedItem = Date;
|
||||
type RefItem<'a> = Date;
|
||||
type Iter<'a> = DateIter<'a>;
|
||||
|
||||
type Builder = DateVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
self.array.get_data(idx).map(Date::new)
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
DateIter {
|
||||
iter: self.array.iter_data(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for DateVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
Ok(self
|
||||
.array
|
||||
.iter_data()
|
||||
.map(|v| v.map(Date::new))
|
||||
.map(|v| match v {
|
||||
None => serde_json::Value::Null,
|
||||
Some(v) => v.into(),
|
||||
})
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DateVectorBuilder {
|
||||
buffer: PrimitiveVectorBuilder<i32>,
|
||||
}
|
||||
|
||||
impl MutableVector for DateVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::date_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.buffer.push(value.as_date()?.map(|d| d.val()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let concrete_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to convert vector from {} to DateVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
self.buffer
|
||||
.extend_slice_of(&concrete_vector.array, offset, length)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for DateVectorBuilder {
|
||||
type VectorType = DateVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
buffer: PrimitiveVectorBuilder::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.buffer.push(value.map(|d| d.val()))
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
Self::VectorType {
|
||||
array: self.buffer.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_date(vector: &DateVector, offsets: &[usize]) -> VectorRef {
|
||||
let array = crate::vectors::primitive::replicate_primitive_with_type(
|
||||
&vector.array,
|
||||
offsets,
|
||||
vector.data_type(),
|
||||
);
|
||||
Arc::new(DateVector { array })
|
||||
}
|
||||
// Vector for [`Date`](common_time::Date).
|
||||
pub type DateVector = PrimitiveVector<DateType>;
|
||||
// Builder to build DateVector.
|
||||
pub type DateVectorBuilder = PrimitiveVectorBuilder<DateType>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::Array;
|
||||
use common_time::date::Date;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::DateType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{Vector, VectorRef};
|
||||
|
||||
#[test]
|
||||
fn test_build_date_vector() {
|
||||
@@ -288,7 +58,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_date_scalar() {
|
||||
let vector = DateVector::from_slice(&[Date::new(1), Date::new(2)]);
|
||||
let vector = DateVector::from_slice(&[1, 2]);
|
||||
assert_eq!(2, vector.len());
|
||||
assert_eq!(Some(Date::new(1)), vector.get_data(0));
|
||||
assert_eq!(Some(Date::new(2)), vector.get_data(1));
|
||||
@@ -296,7 +66,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_date_vector_builder() {
|
||||
let input = DateVector::from_slice(&[Date::new(1), Date::new(2), Date::new(3)]);
|
||||
let input = DateVector::from_slice(&[1, 2, 3]);
|
||||
|
||||
let mut builder = DateType::default().create_mutable_vector(3);
|
||||
builder
|
||||
@@ -309,19 +79,25 @@ mod tests {
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateVector::from_slice(&[
|
||||
Date::new(5),
|
||||
Date::new(2),
|
||||
Date::new(3),
|
||||
]));
|
||||
let expect: VectorRef = Arc::new(DateVector::from_slice(&[5, 2, 3]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_from_arrow() {
|
||||
let vector = DateVector::from_slice(&[Date::new(1), Date::new(2)]);
|
||||
let vector = DateVector::from_slice(&[1, 2]);
|
||||
let arrow = vector.as_arrow().slice(0, vector.len());
|
||||
let vector2 = DateVector::try_from_arrow_array(&arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_date_vector() {
|
||||
let vector = DateVector::from_slice(&[-1, 0, 1]);
|
||||
let serialized_json = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(
|
||||
r#"["1969-12-31","1970-01-01","1970-01-02"]"#,
|
||||
serialized_json
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,264 +12,32 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
use crate::types::DateTimeType;
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
use arrow::array::{Array, ArrayRef, PrimitiveArray};
|
||||
use common_time::datetime::DateTime;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::prelude::{
|
||||
MutableVector, ScalarVector, ScalarVectorBuilder, Validity, Value, ValueRef, Vector, VectorRef,
|
||||
};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::vectors::{PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct DateTimeVector {
|
||||
array: PrimitiveVector<i64>,
|
||||
}
|
||||
|
||||
impl DateTimeVector {
|
||||
pub fn new(array: PrimitiveArray<i64>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector { array },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
Ok(Self::new(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<i64>>()
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
self.array.as_arrow()
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for DateTimeVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::datetime_datatype()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"DateTimeVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Arc::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Date64,
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Box::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Date64,
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
self.array.validity()
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.memory_size()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self {
|
||||
array: PrimitiveVector::new(self.array.array.slice(offset, length)),
|
||||
})
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
match self.array.get(index) {
|
||||
Value::Int64(v) => Value::DateTime(DateTime::new(v)),
|
||||
Value::Null => Value::Null,
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
match self.array.get(index) {
|
||||
Value::Int64(v) => ValueRef::DateTime(DateTime::new(v)),
|
||||
Value::Null => ValueRef::Null,
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for DateTimeVector {
|
||||
fn serialize_to_json(&self) -> crate::Result<Vec<serde_json::Value>> {
|
||||
Ok(self
|
||||
.array
|
||||
.iter_data()
|
||||
.map(|v| v.map(DateTime::new))
|
||||
.map(|v| match v {
|
||||
None => serde_json::Value::Null,
|
||||
Some(v) => v.into(),
|
||||
})
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<i64>>> for DateTimeVector {
|
||||
fn from(data: Vec<Option<i64>>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector::<i64>::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DateTimeVectorBuilder {
|
||||
buffer: PrimitiveVectorBuilder<i64>,
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for DateTimeVectorBuilder {
|
||||
type VectorType = DateTimeVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
buffer: PrimitiveVectorBuilder::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.buffer.push(value.map(|d| d.val()))
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
Self::VectorType {
|
||||
array: self.buffer.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MutableVector for DateTimeVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::datetime_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.buffer.push(value.as_datetime()?.map(|d| d.val()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let concrete_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateTimeVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to convert vector from {} to DateVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
self.buffer
|
||||
.extend_slice_of(&concrete_vector.array, offset, length)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DateTimeIter<'a> {
|
||||
iter: PrimitiveIter<'a, i64>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for DateTimeIter<'a> {
|
||||
type Item = Option<DateTime>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|v| v.map(DateTime::new))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for DateTimeVector {
|
||||
type OwnedItem = DateTime;
|
||||
type RefItem<'a> = DateTime;
|
||||
type Iter<'a> = DateTimeIter<'a>;
|
||||
type Builder = DateTimeVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
self.array.get_data(idx).map(DateTime::new)
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
DateTimeIter {
|
||||
iter: self.array.iter_data(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_datetime(vector: &DateTimeVector, offsets: &[usize]) -> VectorRef {
|
||||
let array = crate::vectors::primitive::replicate_primitive_with_type(
|
||||
&vector.array,
|
||||
offsets,
|
||||
vector.data_type(),
|
||||
);
|
||||
Arc::new(DateTimeVector { array })
|
||||
}
|
||||
/// Vector of [`DateTime`](common_time::Date)
|
||||
pub type DateTimeVector = PrimitiveVector<DateTimeType>;
|
||||
/// Builder for [`DateTimeVector`].
|
||||
pub type DateTimeVectorBuilder = PrimitiveVectorBuilder<DateTimeType>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, PrimitiveArray};
|
||||
use common_time::DateTime;
|
||||
use datafusion_common::from_slice::FromSlice;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::DateTimeType;
|
||||
use crate::prelude::{
|
||||
ConcreteDataType, ScalarVector, ScalarVectorBuilder, Value, ValueRef, Vector, VectorRef,
|
||||
};
|
||||
use crate::serialize::Serializable;
|
||||
|
||||
#[test]
|
||||
fn test_datetime_vector() {
|
||||
let v = DateTimeVector::new(PrimitiveArray::from_vec(vec![1, 2, 3]));
|
||||
let v = DateTimeVector::new(PrimitiveArray::from_slice(&[1, 2, 3]));
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
|
||||
assert_eq!(3, v.len());
|
||||
assert_eq!("DateTimeVector", v.vector_type_name());
|
||||
@@ -287,9 +55,8 @@ mod tests {
|
||||
assert_eq!(Some(DateTime::new(2)), iter.next().unwrap());
|
||||
assert_eq!(Some(DateTime::new(3)), iter.next().unwrap());
|
||||
assert!(!v.is_null(0));
|
||||
assert_eq!(24, v.memory_size()); // size of i64 * 3
|
||||
assert_eq!(64, v.memory_size());
|
||||
|
||||
assert_matches!(v.validity(), Validity::AllValid);
|
||||
if let Value::DateTime(d) = v.get(0) {
|
||||
assert_eq!(1, d.val());
|
||||
} else {
|
||||
@@ -314,8 +81,11 @@ mod tests {
|
||||
assert_eq!(Value::Null, v.get(1));
|
||||
assert_eq!(Value::DateTime(DateTime::new(-1)), v.get(2));
|
||||
|
||||
let input =
|
||||
DateTimeVector::from_slice(&[DateTime::new(1), DateTime::new(2), DateTime::new(3)]);
|
||||
let input = DateTimeVector::from_wrapper_slice(&[
|
||||
DateTime::new(1),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
]);
|
||||
|
||||
let mut builder = DateTimeType::default().create_mutable_vector(3);
|
||||
builder
|
||||
@@ -328,7 +98,7 @@ mod tests {
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateTimeVector::from_slice(&[
|
||||
let expect: VectorRef = Arc::new(DateTimeVector::from_wrapper_slice(&[
|
||||
DateTime::new(5),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
@@ -338,7 +108,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_datetime_from_arrow() {
|
||||
let vector = DateTimeVector::from_slice(&[DateTime::new(1), DateTime::new(2)]);
|
||||
let vector = DateTimeVector::from_wrapper_slice(&[DateTime::new(1), DateTime::new(2)]);
|
||||
let arrow = vector.as_arrow().slice(0, vector.len());
|
||||
let vector2 = DateTimeVector::try_from_arrow_array(&arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
|
||||
@@ -15,9 +15,12 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::TimestampType;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, ListVector,
|
||||
PrimitiveVector, StringVector, TimestampVector, Vector,
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, ListVector, PrimitiveVector,
|
||||
StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
};
|
||||
use crate::with_match_primitive_type_id;
|
||||
|
||||
@@ -76,7 +79,20 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
|
||||
String(_) => is_vector_eq!(StringVector, lhs, rhs),
|
||||
Date(_) => is_vector_eq!(DateVector, lhs, rhs),
|
||||
DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs),
|
||||
Timestamp(_) => is_vector_eq!(TimestampVector, lhs, rhs),
|
||||
Timestamp(t) => match t {
|
||||
TimestampType::Second(_) => {
|
||||
is_vector_eq!(TimestampSecondVector, lhs, rhs)
|
||||
}
|
||||
TimestampType::Millisecond(_) => {
|
||||
is_vector_eq!(TimestampMillisecondVector, lhs, rhs)
|
||||
}
|
||||
TimestampType::Microsecond(_) => {
|
||||
is_vector_eq!(TimestampMicrosecondVector, lhs, rhs)
|
||||
}
|
||||
TimestampType::Nanosecond(_) => {
|
||||
is_vector_eq!(TimestampNanosecondVector, lhs, rhs)
|
||||
}
|
||||
},
|
||||
List(_) => is_vector_eq!(ListVector, lhs, rhs),
|
||||
UInt8(_) | UInt16(_) | UInt32(_) | UInt64(_) | Int8(_) | Int16(_) | Int32(_) | Int64(_)
|
||||
| Float32(_) | Float64(_) => {
|
||||
@@ -95,13 +111,10 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{ListArray, MutableListArray, MutablePrimitiveArray, TryExtend};
|
||||
|
||||
use super::*;
|
||||
use crate::vectors::{
|
||||
Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector,
|
||||
NullVector, TimestampVector, UInt16Vector, UInt32Vector, UInt64Vector, UInt8Vector,
|
||||
VectorRef,
|
||||
list, Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector,
|
||||
NullVector, UInt16Vector, UInt32Vector, UInt64Vector, UInt8Vector, VectorRef,
|
||||
};
|
||||
|
||||
fn assert_vector_ref_eq(vector: VectorRef) {
|
||||
@@ -132,14 +145,21 @@ mod tests {
|
||||
assert_vector_ref_eq(Arc::new(BooleanVector::from(vec![true, false])));
|
||||
assert_vector_ref_eq(Arc::new(DateVector::from(vec![Some(100), Some(120)])));
|
||||
assert_vector_ref_eq(Arc::new(DateTimeVector::from(vec![Some(100), Some(120)])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampVector::from_values([100, 120])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampSecondVector::from_values([100, 120])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampMillisecondVector::from_values([
|
||||
100, 120,
|
||||
])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampMicrosecondVector::from_values([
|
||||
100, 120,
|
||||
])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampNanosecondVector::from_values([100, 120])));
|
||||
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i64>>::new();
|
||||
arrow_array
|
||||
.try_extend(vec![Some(vec![Some(1), Some(2), Some(3)])])
|
||||
.unwrap();
|
||||
let arrow_array: ListArray<i32> = arrow_array.into();
|
||||
assert_vector_ref_eq(Arc::new(ListVector::from(arrow_array)));
|
||||
let list_vector = list::tests::new_list_vector(&[
|
||||
Some(vec![Some(1), Some(2)]),
|
||||
None,
|
||||
Some(vec![Some(3), Some(4)]),
|
||||
]);
|
||||
assert_vector_ref_eq(Arc::new(list_vector));
|
||||
|
||||
assert_vector_ref_eq(Arc::new(NullVector::new(4)));
|
||||
assert_vector_ref_eq(Arc::new(StringVector::from(vec![
|
||||
|
||||
@@ -17,19 +17,26 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::Array;
|
||||
use arrow::array::{Array, ArrayRef, StringArray};
|
||||
use arrow::compute;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use arrow::compute::kernels::comparison;
|
||||
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
|
||||
use datafusion_common::ScalarValue;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::arrow_array::StringArray;
|
||||
use crate::error::{ConversionSnafu, Result, UnknownVectorSnafu};
|
||||
use crate::scalars::*;
|
||||
use crate::vectors::date::DateVector;
|
||||
use crate::vectors::datetime::DateTimeVector;
|
||||
use crate::vectors::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{Scalar, ScalarVectorBuilder};
|
||||
use crate::value::{ListValue, ListValueRef};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, Float32Vector,
|
||||
Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector, ListVector,
|
||||
ListVectorBuilder, MutableVector, NullVector, StringVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
|
||||
UInt32Vector, UInt64Vector, UInt8Vector, Vector, VectorRef,
|
||||
};
|
||||
|
||||
/// Helper functions for `Vector`.
|
||||
pub struct Helper;
|
||||
|
||||
impl Helper {
|
||||
@@ -47,7 +54,7 @@ impl Helper {
|
||||
let arr = vector
|
||||
.as_any()
|
||||
.downcast_ref::<<T as Scalar>::VectorType>()
|
||||
.with_context(|| UnknownVectorSnafu {
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
vector.vector_type_name(),
|
||||
@@ -61,7 +68,7 @@ impl Helper {
|
||||
let arr = vector
|
||||
.as_any()
|
||||
.downcast_ref::<T>()
|
||||
.with_context(|| UnknownVectorSnafu {
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
vector.vector_type_name(),
|
||||
@@ -78,7 +85,7 @@ impl Helper {
|
||||
let arr = vector
|
||||
.as_mut_any()
|
||||
.downcast_mut()
|
||||
.with_context(|| UnknownVectorSnafu {
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
ty,
|
||||
@@ -94,7 +101,7 @@ impl Helper {
|
||||
let arr = vector
|
||||
.as_any()
|
||||
.downcast_ref::<<T as Scalar>::VectorType>()
|
||||
.with_context(|| UnknownVectorSnafu {
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
vector.vector_type_name(),
|
||||
@@ -105,11 +112,9 @@ impl Helper {
|
||||
}
|
||||
|
||||
/// Try to cast an arrow scalar value into vector
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if given scalar value is not supported.
|
||||
pub fn try_from_scalar_value(value: ScalarValue, length: usize) -> Result<VectorRef> {
|
||||
let vector = match value {
|
||||
ScalarValue::Null => ConstantVector::new(Arc::new(NullVector::new(1)), length),
|
||||
ScalarValue::Boolean(v) => {
|
||||
ConstantVector::new(Arc::new(BooleanVector::from(vec![v])), length)
|
||||
}
|
||||
@@ -143,17 +148,29 @@ impl Helper {
|
||||
ScalarValue::UInt64(v) => {
|
||||
ConstantVector::new(Arc::new(UInt64Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Utf8(v) => {
|
||||
ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) => {
|
||||
ConstantVector::new(Arc::new(StringVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::LargeUtf8(v) => {
|
||||
ConstantVector::new(Arc::new(StringVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Binary(v) => {
|
||||
ScalarValue::Binary(v)
|
||||
| ScalarValue::LargeBinary(v)
|
||||
| ScalarValue::FixedSizeBinary(_, v) => {
|
||||
ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::LargeBinary(v) => {
|
||||
ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
|
||||
ScalarValue::List(v, field) => {
|
||||
let item_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
|
||||
if let Some(values) = v {
|
||||
let values = values
|
||||
.into_iter()
|
||||
.map(ScalarValue::try_into)
|
||||
.collect::<Result<_>>()?;
|
||||
let list_value = ListValue::new(Some(Box::new(values)), item_type);
|
||||
builder.push(Some(ListValueRef::Ref { val: &list_value }));
|
||||
} else {
|
||||
builder.push(None);
|
||||
}
|
||||
let list_vector = builder.to_vector();
|
||||
ConstantVector::new(list_vector, length)
|
||||
}
|
||||
ScalarValue::Date32(v) => {
|
||||
ConstantVector::new(Arc::new(DateVector::from(vec![v])), length)
|
||||
@@ -161,8 +178,30 @@ impl Helper {
|
||||
ScalarValue::Date64(v) => {
|
||||
ConstantVector::new(Arc::new(DateTimeVector::from(vec![v])), length)
|
||||
}
|
||||
_ => {
|
||||
return ConversionSnafu {
|
||||
ScalarValue::TimestampSecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampMillisecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampMillisecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampMicrosecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampMicrosecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampNanosecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampNanosecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Decimal128(_, _, _)
|
||||
| ScalarValue::Time64(_)
|
||||
| ScalarValue::IntervalYearMonth(_)
|
||||
| ScalarValue::IntervalDayTime(_)
|
||||
| ScalarValue::IntervalMonthDayNano(_)
|
||||
| ScalarValue::Struct(_, _)
|
||||
| ScalarValue::Dictionary(_, _) => {
|
||||
return error::ConversionSnafu {
|
||||
from: format!("Unsupported scalar value: {}", value),
|
||||
}
|
||||
.fail()
|
||||
@@ -180,9 +219,7 @@ impl Helper {
|
||||
Ok(match array.as_ref().data_type() {
|
||||
ArrowDataType::Null => Arc::new(NullVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Boolean => Arc::new(BooleanVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => {
|
||||
Arc::new(BinaryVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
ArrowDataType::LargeBinary => Arc::new(BinaryVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Int8 => Arc::new(Int8Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Int16 => Arc::new(Int16Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Int32 => Arc::new(Int32Vector::try_from_arrow_array(array)?),
|
||||
@@ -193,48 +230,80 @@ impl Helper {
|
||||
ArrowDataType::UInt64 => Arc::new(UInt64Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Float32 => Arc::new(Float32Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Float64 => Arc::new(Float64Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
|
||||
Arc::new(StringVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
ArrowDataType::Utf8 => Arc::new(StringVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Date64 => Arc::new(DateTimeVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Timestamp(_, _) => {
|
||||
Arc::new(TimestampVector::try_from_arrow_array(array)?)
|
||||
ArrowDataType::Timestamp(unit, _) => match unit {
|
||||
TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
|
||||
TimeUnit::Millisecond => {
|
||||
Arc::new(TimestampMillisecondVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
TimeUnit::Microsecond => {
|
||||
Arc::new(TimestampMicrosecondVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
TimeUnit::Nanosecond => {
|
||||
Arc::new(TimestampNanosecondVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
},
|
||||
ArrowDataType::Float16
|
||||
| ArrowDataType::Time32(_)
|
||||
| ArrowDataType::Time64(_)
|
||||
| ArrowDataType::Duration(_)
|
||||
| ArrowDataType::Interval(_)
|
||||
| ArrowDataType::Binary
|
||||
| ArrowDataType::FixedSizeBinary(_)
|
||||
| ArrowDataType::LargeUtf8
|
||||
| ArrowDataType::LargeList(_)
|
||||
| ArrowDataType::FixedSizeList(_, _)
|
||||
| ArrowDataType::Struct(_)
|
||||
| ArrowDataType::Union(_, _, _)
|
||||
| ArrowDataType::Dictionary(_, _)
|
||||
| ArrowDataType::Decimal128(_, _)
|
||||
| ArrowDataType::Decimal256(_, _)
|
||||
| ArrowDataType::Map(_, _) => {
|
||||
unimplemented!("Arrow array datatype: {:?}", array.as_ref().data_type())
|
||||
}
|
||||
_ => unimplemented!("Arrow array datatype: {:?}", array.as_ref().data_type()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Try to cast slice of `arrays` to vectors.
|
||||
pub fn try_into_vectors(arrays: &[ArrayRef]) -> Result<Vec<VectorRef>> {
|
||||
arrays.iter().map(Self::try_into_vector).collect()
|
||||
}
|
||||
|
||||
/// Perform SQL like operation on `names` and a scalar `s`.
|
||||
pub fn like_utf8(names: Vec<String>, s: &str) -> Result<VectorRef> {
|
||||
let array = StringArray::from_slice(&names);
|
||||
let array = StringArray::from(names);
|
||||
|
||||
let filter =
|
||||
compute::like::like_utf8_scalar(&array, s).context(error::ArrowComputeSnafu)?;
|
||||
let filter = comparison::like_utf8_scalar(&array, s).context(error::ArrowComputeSnafu)?;
|
||||
|
||||
let result = compute::filter::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
|
||||
let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
|
||||
Helper::try_into_vector(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::Int32Array;
|
||||
use common_time::date::Date;
|
||||
use common_time::datetime::DateTime;
|
||||
use arrow::array::{
|
||||
ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
|
||||
Int32Array, Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray,
|
||||
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
|
||||
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
|
||||
};
|
||||
use arrow::datatypes::{Field, Int32Type};
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use super::*;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::ConcreteDataType;
|
||||
|
||||
#[test]
|
||||
fn test_try_into_vectors() {
|
||||
let arrays: Vec<ArrayRef> = vec![
|
||||
Arc::new(Int32Array::from_vec(vec![1])),
|
||||
Arc::new(Int32Array::from_vec(vec![2])),
|
||||
Arc::new(Int32Array::from_vec(vec![3])),
|
||||
Arc::new(Int32Array::from(vec![1])),
|
||||
Arc::new(Int32Array::from(vec![2])),
|
||||
Arc::new(Int32Array::from(vec![3])),
|
||||
];
|
||||
let vectors = Helper::try_into_vectors(&arrays);
|
||||
assert!(vectors.is_ok());
|
||||
@@ -246,10 +315,10 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_try_into_date_vector() {
|
||||
fn test_try_into_date_vector() {
|
||||
let vector = DateVector::from(vec![Some(1), Some(2), None]);
|
||||
let arrow_array = vector.to_arrow_array();
|
||||
assert_eq!(&arrow::datatypes::DataType::Date32, arrow_array.data_type());
|
||||
assert_eq!(&ArrowDataType::Date32, arrow_array.data_type());
|
||||
let vector_converted = Helper::try_into_vector(arrow_array).unwrap();
|
||||
assert_eq!(vector.len(), vector_converted.len());
|
||||
for i in 0..vector_converted.len() {
|
||||
@@ -258,7 +327,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_try_from_scalar_date_value() {
|
||||
fn test_try_from_scalar_date_value() {
|
||||
let vector = Helper::try_from_scalar_value(ScalarValue::Date32(Some(42)), 3).unwrap();
|
||||
assert_eq!(ConcreteDataType::date_datatype(), vector.data_type());
|
||||
assert_eq!(3, vector.len());
|
||||
@@ -268,7 +337,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_try_from_scalar_datetime_value() {
|
||||
fn test_try_from_scalar_datetime_value() {
|
||||
let vector = Helper::try_from_scalar_value(ScalarValue::Date64(Some(42)), 3).unwrap();
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), vector.data_type());
|
||||
assert_eq!(3, vector.len());
|
||||
@@ -277,6 +346,28 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_list_value() {
|
||||
let value = ScalarValue::List(
|
||||
Some(vec![
|
||||
ScalarValue::Int32(Some(1)),
|
||||
ScalarValue::Int32(Some(2)),
|
||||
]),
|
||||
Box::new(Field::new("item", ArrowDataType::Int32, true)),
|
||||
);
|
||||
let vector = Helper::try_from_scalar_value(value, 3).unwrap();
|
||||
assert_eq!(
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
|
||||
vector.data_type()
|
||||
);
|
||||
assert_eq!(3, vector.len());
|
||||
for i in 0..vector.len() {
|
||||
let v = vector.get(i);
|
||||
let items = v.as_list().unwrap().unwrap().items().as_ref().unwrap();
|
||||
assert_eq!(vec![Value::Int32(1), Value::Int32(2)], **items);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_like_utf8() {
|
||||
fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
|
||||
@@ -301,4 +392,40 @@ mod tests {
|
||||
let ret = Helper::like_utf8(names, "%").unwrap();
|
||||
assert_vector(vec!["greptime", "hello", "public", "world"], &ret);
|
||||
}
|
||||
|
||||
fn check_try_into_vector(array: impl Array + 'static) {
|
||||
let array: ArrayRef = Arc::new(array);
|
||||
let vector = Helper::try_into_vector(array.clone()).unwrap();
|
||||
assert_eq!(&array, &vector.to_arrow_array());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_into_vector() {
|
||||
check_try_into_vector(NullArray::new(2));
|
||||
check_try_into_vector(BooleanArray::from(vec![true, false]));
|
||||
check_try_into_vector(LargeBinaryArray::from(vec![
|
||||
"hello".as_bytes(),
|
||||
"world".as_bytes(),
|
||||
]));
|
||||
check_try_into_vector(Int8Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Int16Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Int32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Int64Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt8Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt16Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt64Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Float32Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_try_into_vector(StringArray::from(vec!["hello", "world"]));
|
||||
check_try_into_vector(Date32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Date64Array::from(vec![1, 2, 3]));
|
||||
let data = vec![None, Some(vec![Some(6), Some(7)])];
|
||||
let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
|
||||
check_try_into_vector(list_array);
|
||||
check_try_into_vector(TimestampSecondArray::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(TimestampMillisecondArray::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(TimestampMicrosecondArray::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(TimestampNanosecondArray::from(vec![1, 2, 3]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,39 +13,48 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, ListArray};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use arrow::bitmap::MutableBitmap;
|
||||
use arrow::array::{
|
||||
Array, ArrayData, ArrayRef, BooleanBufferBuilder, Int32BufferBuilder, ListArray,
|
||||
};
|
||||
use arrow::buffer::Buffer;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde_json::Value as JsonValue;
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::Result;
|
||||
use crate::prelude::*;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::ListType;
|
||||
use crate::value::{ListValue, ListValueRef};
|
||||
use crate::vectors::{impl_try_from_arrow_array_for_vector, impl_validity_for_vector};
|
||||
|
||||
type ArrowListArray = ListArray<i32>;
|
||||
use crate::value::{ListValue, ListValueRef, Value, ValueRef};
|
||||
use crate::vectors::{self, Helper, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// Vector of Lists, basically backed by Arrow's `ListArray`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct ListVector {
|
||||
array: ArrowListArray,
|
||||
inner_datatype: ConcreteDataType,
|
||||
array: ListArray,
|
||||
/// The datatype of the items in the list.
|
||||
item_type: ConcreteDataType,
|
||||
}
|
||||
|
||||
impl ListVector {
|
||||
/// Only iterate values in the [ListVector].
|
||||
///
|
||||
/// Be careful to use this method as it would ignore validity and replace null
|
||||
/// by empty vector.
|
||||
pub fn values_iter(&self) -> Box<dyn Iterator<Item = Result<VectorRef>> + '_> {
|
||||
Box::new(self.array.values_iter().map(VectorHelper::try_into_vector))
|
||||
/// Iterate elements as [VectorRef].
|
||||
pub fn values_iter(&self) -> impl Iterator<Item = Result<Option<VectorRef>>> + '_ {
|
||||
self.array
|
||||
.iter()
|
||||
.map(|value_opt| value_opt.map(Helper::try_into_vector).transpose())
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data_and_type(data: ArrayData, item_type: ConcreteDataType) -> Self {
|
||||
Self {
|
||||
array: ListArray::from(data),
|
||||
item_type,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
@@ -55,7 +64,7 @@ impl ListVector {
|
||||
|
||||
impl Vector for ListVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(self.inner_datatype.clone()))
|
||||
ConcreteDataType::List(ListType::new(self.item_type.clone()))
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
@@ -71,21 +80,25 @@ impl Vector for ListVector {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Arc::new(ListArray::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Box::new(ListArray::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
impl_validity_for_vector!(self.array)
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
let offsets_bytes = self.array.offsets().len() * std::mem::size_of::<i64>();
|
||||
let value_refs_bytes = self.array.values().len() * std::mem::size_of::<Arc<dyn Array>>();
|
||||
offsets_bytes + value_refs_bytes
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
@@ -93,7 +106,8 @@ impl Vector for ListVector {
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(ListVector::from(self.array.slice(offset, length)))
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data_and_type(data, self.item_type.clone()))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
@@ -102,7 +116,7 @@ impl Vector for ListVector {
|
||||
}
|
||||
|
||||
let array = &self.array.value(index);
|
||||
let vector = VectorHelper::try_into_vector(array).unwrap_or_else(|_| {
|
||||
let vector = Helper::try_into_vector(array).unwrap_or_else(|_| {
|
||||
panic!(
|
||||
"arrow array with datatype {:?} cannot converted to our vector",
|
||||
array.data_type()
|
||||
@@ -113,7 +127,7 @@ impl Vector for ListVector {
|
||||
.collect::<Vec<Value>>();
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(values)),
|
||||
self.inner_datatype.clone(),
|
||||
self.item_type.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
@@ -131,7 +145,7 @@ impl Serializable for ListVector {
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
None => Ok(JsonValue::Null),
|
||||
Some(v) => VectorHelper::try_into_vector(v)
|
||||
Some(v) => Helper::try_into_vector(v)
|
||||
.and_then(|v| v.serialize_to_json())
|
||||
.map(JsonValue::Array),
|
||||
})
|
||||
@@ -139,70 +153,64 @@ impl Serializable for ListVector {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ArrowListArray> for ListVector {
|
||||
fn from(array: ArrowListArray) -> Self {
|
||||
let inner_datatype = ConcreteDataType::from_arrow_type(match array.data_type() {
|
||||
ArrowDataType::List(field) => &field.data_type,
|
||||
_ => unreachable!(),
|
||||
impl From<ListArray> for ListVector {
|
||||
fn from(array: ListArray) -> Self {
|
||||
let item_type = ConcreteDataType::from_arrow_type(match array.data_type() {
|
||||
ArrowDataType::List(field) => field.data_type(),
|
||||
other => panic!(
|
||||
"Try to create ListVector from an arrow array with type {:?}",
|
||||
other
|
||||
),
|
||||
});
|
||||
Self {
|
||||
array,
|
||||
inner_datatype,
|
||||
}
|
||||
Self { array, item_type }
|
||||
}
|
||||
}
|
||||
|
||||
impl_try_from_arrow_array_for_vector!(ArrowListArray, ListVector);
|
||||
vectors::impl_try_from_arrow_array_for_vector!(ListArray, ListVector);
|
||||
|
||||
pub struct ListVectorIter<'a> {
|
||||
pub struct ListIter<'a> {
|
||||
vector: &'a ListVector,
|
||||
iter: ZipValidity<'a, usize, Range<usize>>,
|
||||
idx: usize,
|
||||
}
|
||||
|
||||
impl<'a> ListVectorIter<'a> {
|
||||
pub fn new(vector: &'a ListVector) -> ListVectorIter<'a> {
|
||||
let iter = ZipValidity::new(
|
||||
0..vector.len(),
|
||||
vector.array.validity().as_ref().map(|x| x.iter()),
|
||||
);
|
||||
|
||||
Self { vector, iter }
|
||||
impl<'a> ListIter<'a> {
|
||||
fn new(vector: &'a ListVector) -> ListIter {
|
||||
ListIter { vector, idx: 0 }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ListVectorIter<'a> {
|
||||
impl<'a> Iterator for ListIter<'a> {
|
||||
type Item = Option<ListValueRef<'a>>;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|idx_opt| {
|
||||
idx_opt.map(|idx| ListValueRef::Indexed {
|
||||
vector: self.vector,
|
||||
idx,
|
||||
})
|
||||
})
|
||||
if self.idx >= self.vector.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let idx = self.idx;
|
||||
self.idx += 1;
|
||||
|
||||
if self.vector.is_null(idx) {
|
||||
return Some(None);
|
||||
}
|
||||
|
||||
Some(Some(ListValueRef::Indexed {
|
||||
vector: self.vector,
|
||||
idx,
|
||||
}))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.iter.size_hint()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nth(&mut self, n: usize) -> Option<Self::Item> {
|
||||
self.iter.nth(n).map(|idx_opt| {
|
||||
idx_opt.map(|idx| ListValueRef::Indexed {
|
||||
vector: self.vector,
|
||||
idx,
|
||||
})
|
||||
})
|
||||
(self.vector.len(), Some(self.vector.len()))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for ListVector {
|
||||
type OwnedItem = ListValue;
|
||||
type RefItem<'a> = ListValueRef<'a>;
|
||||
type Iter<'a> = ListVectorIter<'a>;
|
||||
type Iter<'a> = ListIter<'a>;
|
||||
type Builder = ListVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
@@ -214,86 +222,68 @@ impl ScalarVector for ListVector {
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
ListVectorIter::new(self)
|
||||
ListIter::new(self)
|
||||
}
|
||||
}
|
||||
|
||||
// Some codes are ported from arrow2's MutableListArray.
|
||||
// Ports from arrow's GenericListBuilder.
|
||||
// See https://github.com/apache/arrow-rs/blob/94565bca99b5d9932a3e9a8e094aaf4e4384b1e5/arrow-array/src/builder/generic_list_builder.rs
|
||||
/// [ListVector] builder.
|
||||
pub struct ListVectorBuilder {
|
||||
inner_type: ConcreteDataType,
|
||||
offsets: Vec<i32>,
|
||||
values: Box<dyn MutableVector>,
|
||||
validity: Option<MutableBitmap>,
|
||||
item_type: ConcreteDataType,
|
||||
offsets_builder: Int32BufferBuilder,
|
||||
null_buffer_builder: NullBufferBuilder,
|
||||
values_builder: Box<dyn MutableVector>,
|
||||
}
|
||||
|
||||
impl ListVectorBuilder {
|
||||
pub fn with_type_capacity(inner_type: ConcreteDataType, capacity: usize) -> ListVectorBuilder {
|
||||
let mut offsets = Vec::with_capacity(capacity + 1);
|
||||
offsets.push(0);
|
||||
// The actual required capacity might greater than the capacity of the `ListVector`
|
||||
// if there exists child vector that has more than one element.
|
||||
let values = inner_type.create_mutable_vector(capacity);
|
||||
/// Creates a new [`ListVectorBuilder`]. `item_type` is the data type of the list item, `capacity`
|
||||
/// is the number of items to pre-allocate space for in this builder.
|
||||
pub fn with_type_capacity(item_type: ConcreteDataType, capacity: usize) -> ListVectorBuilder {
|
||||
let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
|
||||
offsets_builder.append(0);
|
||||
// The actual required capacity might be greater than the capacity of the `ListVector`
|
||||
// if the child vector has more than one element.
|
||||
let values_builder = item_type.create_mutable_vector(capacity);
|
||||
|
||||
ListVectorBuilder {
|
||||
inner_type,
|
||||
offsets,
|
||||
values,
|
||||
validity: None,
|
||||
item_type,
|
||||
offsets_builder,
|
||||
null_buffer_builder: NullBufferBuilder::new(capacity),
|
||||
values_builder,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn last_offset(&self) -> i32 {
|
||||
*self.offsets.last().unwrap()
|
||||
/// Finish the current variable-length list vector slot.
|
||||
fn finish_list(&mut self, is_valid: bool) {
|
||||
self.offsets_builder
|
||||
.append(i32::try_from(self.values_builder.len()).unwrap());
|
||||
self.null_buffer_builder.append(is_valid);
|
||||
}
|
||||
|
||||
fn push_null(&mut self) {
|
||||
self.offsets.push(self.last_offset());
|
||||
match &mut self.validity {
|
||||
Some(validity) => validity.push(false),
|
||||
None => self.init_validity(),
|
||||
}
|
||||
}
|
||||
|
||||
fn init_validity(&mut self) {
|
||||
let len = self.offsets.len() - 1;
|
||||
|
||||
let mut validity = MutableBitmap::with_capacity(self.offsets.capacity());
|
||||
validity.extend_constant(len, true);
|
||||
validity.set(len - 1, false);
|
||||
self.validity = Some(validity)
|
||||
self.finish_list(false);
|
||||
}
|
||||
|
||||
fn push_list_value(&mut self, list_value: &ListValue) -> Result<()> {
|
||||
if let Some(items) = list_value.items() {
|
||||
for item in &**items {
|
||||
self.values.push_value_ref(item.as_value_ref())?;
|
||||
self.values_builder.push_value_ref(item.as_value_ref())?;
|
||||
}
|
||||
}
|
||||
self.push_valid();
|
||||
|
||||
self.finish_list(true);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Needs to be called when a valid value was extended to this builder.
|
||||
fn push_valid(&mut self) {
|
||||
let size = self.values.len();
|
||||
let size = i32::try_from(size).unwrap();
|
||||
assert!(size >= *self.offsets.last().unwrap());
|
||||
|
||||
self.offsets.push(size);
|
||||
if let Some(validity) = &mut self.validity {
|
||||
validity.push(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MutableVector for ListVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::list_datatype(self.inner_type.clone())
|
||||
ConcreteDataType::list_datatype(self.item_type.clone())
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.offsets.len() - 1
|
||||
self.null_buffer_builder.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
@@ -348,51 +338,181 @@ impl ScalarVectorBuilder for ListVectorBuilder {
|
||||
self.push_value_ref(value.into()).unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"Failed to push value, expect value type {:?}, err:{}",
|
||||
self.inner_type, e
|
||||
self.item_type, e
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
let array = ArrowListArray::try_new(
|
||||
ConcreteDataType::list_datatype(self.inner_type.clone()).as_arrow_type(),
|
||||
std::mem::take(&mut self.offsets).into(),
|
||||
self.values.to_vector().to_arrow_array(),
|
||||
std::mem::take(&mut self.validity).map(|x| x.into()),
|
||||
)
|
||||
.unwrap(); // The `ListVectorBuilder` itself should ensure it always builds a valid array.
|
||||
let len = self.len();
|
||||
let values_vector = self.values_builder.to_vector();
|
||||
let values_arr = values_vector.to_arrow_array();
|
||||
let values_data = values_arr.data();
|
||||
|
||||
let offset_buffer = self.offsets_builder.finish();
|
||||
let null_bit_buffer = self.null_buffer_builder.finish();
|
||||
// Re-initialize the offsets_builder.
|
||||
self.offsets_builder.append(0);
|
||||
let data_type = ConcreteDataType::list_datatype(self.item_type.clone()).as_arrow_type();
|
||||
let array_data_builder = ArrayData::builder(data_type)
|
||||
.len(len)
|
||||
.add_buffer(offset_buffer)
|
||||
.add_child_data(values_data.clone())
|
||||
.null_bit_buffer(null_bit_buffer);
|
||||
|
||||
let array_data = unsafe { array_data_builder.build_unchecked() };
|
||||
let array = ListArray::from(array_data);
|
||||
|
||||
ListVector {
|
||||
array,
|
||||
inner_datatype: self.inner_type.clone(),
|
||||
item_type: self.item_type.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ports from https://github.com/apache/arrow-rs/blob/94565bca99b5d9932a3e9a8e094aaf4e4384b1e5/arrow-array/src/builder/null_buffer_builder.rs
|
||||
/// Builder for creating the null bit buffer.
|
||||
/// This builder only materializes the buffer when we append `false`.
|
||||
/// If you only append `true`s to the builder, what you get will be
|
||||
/// `None` when calling [`finish`](#method.finish).
|
||||
/// This optimization is **very** important for the performance.
|
||||
#[derive(Debug)]
|
||||
struct NullBufferBuilder {
|
||||
bitmap_builder: Option<BooleanBufferBuilder>,
|
||||
/// Store the length of the buffer before materializing.
|
||||
len: usize,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl NullBufferBuilder {
|
||||
/// Creates a new empty builder.
|
||||
/// `capacity` is the number of bits in the null buffer.
|
||||
fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
bitmap_builder: None,
|
||||
len: 0,
|
||||
capacity,
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
if let Some(b) = &self.bitmap_builder {
|
||||
b.len()
|
||||
} else {
|
||||
self.len
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends a `true` into the builder
|
||||
/// to indicate that this item is not null.
|
||||
#[inline]
|
||||
fn append_non_null(&mut self) {
|
||||
if let Some(buf) = self.bitmap_builder.as_mut() {
|
||||
buf.append(true)
|
||||
} else {
|
||||
self.len += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends a `false` into the builder
|
||||
/// to indicate that this item is null.
|
||||
#[inline]
|
||||
fn append_null(&mut self) {
|
||||
self.materialize_if_needed();
|
||||
self.bitmap_builder.as_mut().unwrap().append(false);
|
||||
}
|
||||
|
||||
/// Appends a boolean value into the builder.
|
||||
#[inline]
|
||||
fn append(&mut self, not_null: bool) {
|
||||
if not_null {
|
||||
self.append_non_null()
|
||||
} else {
|
||||
self.append_null()
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the null buffer and resets the builder.
|
||||
/// Returns `None` if the builder only contains `true`s.
|
||||
fn finish(&mut self) -> Option<Buffer> {
|
||||
let buf = self.bitmap_builder.as_mut().map(|b| b.finish());
|
||||
self.bitmap_builder = None;
|
||||
self.len = 0;
|
||||
buf
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn materialize_if_needed(&mut self) {
|
||||
if self.bitmap_builder.is_none() {
|
||||
self.materialize()
|
||||
}
|
||||
}
|
||||
|
||||
#[cold]
|
||||
fn materialize(&mut self) {
|
||||
if self.bitmap_builder.is_none() {
|
||||
let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity));
|
||||
b.append_n(self.len, true);
|
||||
self.bitmap_builder = Some(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{MutableListArray, MutablePrimitiveArray, TryExtend};
|
||||
pub mod tests {
|
||||
use arrow::array::{Int32Array, Int32Builder, ListBuilder};
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
use crate::scalars::ScalarRef;
|
||||
use crate::types::ListType;
|
||||
use crate::vectors::Int32Vector;
|
||||
|
||||
pub fn new_list_vector(data: &[Option<Vec<Option<i32>>>]) -> ListVector {
|
||||
let mut builder =
|
||||
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8);
|
||||
for vec_opt in data {
|
||||
if let Some(vec) = vec_opt {
|
||||
let values = vec.iter().map(|v| Value::from(*v)).collect();
|
||||
let values = Some(Box::new(values));
|
||||
let list_value = ListValue::new(values, ConcreteDataType::int32_datatype());
|
||||
|
||||
builder.push(Some(ListValueRef::Ref { val: &list_value }));
|
||||
} else {
|
||||
builder.push(None);
|
||||
}
|
||||
}
|
||||
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn new_list_array(data: &[Option<Vec<Option<i32>>>]) -> ListArray {
|
||||
let mut builder = ListBuilder::new(Int32Builder::new());
|
||||
for vec_opt in data {
|
||||
if let Some(vec) = vec_opt {
|
||||
for value_opt in vec {
|
||||
builder.values().append_option(*value_opt);
|
||||
}
|
||||
|
||||
builder.append(true);
|
||||
} else {
|
||||
builder.append(false);
|
||||
}
|
||||
}
|
||||
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_vector() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1i32), Some(2), Some(3)]),
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i32>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
let list_vector = new_list_vector(&data);
|
||||
|
||||
let list_vector = ListVector {
|
||||
array: arrow_array.clone(),
|
||||
inner_datatype: ConcreteDataType::int32_datatype(),
|
||||
};
|
||||
assert_eq!(
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())),
|
||||
list_vector.data_type()
|
||||
@@ -403,30 +523,34 @@ mod tests {
|
||||
assert!(list_vector.is_null(1));
|
||||
assert!(!list_vector.is_null(2));
|
||||
|
||||
let arrow_array = new_list_array(&data);
|
||||
assert_eq!(
|
||||
arrow_array,
|
||||
list_vector
|
||||
*list_vector
|
||||
.to_arrow_array()
|
||||
.as_any()
|
||||
.downcast_ref::<ArrowListArray>()
|
||||
.downcast_ref::<ListArray>()
|
||||
.unwrap()
|
||||
.clone()
|
||||
);
|
||||
assert_eq!(
|
||||
Validity::Slots(arrow_array.validity().unwrap()),
|
||||
list_vector.validity()
|
||||
);
|
||||
assert_eq!(
|
||||
arrow_array.offsets().len() * std::mem::size_of::<i64>()
|
||||
+ arrow_array.values().len() * std::mem::size_of::<Arc<dyn Array>>(),
|
||||
list_vector.memory_size()
|
||||
);
|
||||
let validity = list_vector.validity();
|
||||
assert!(!validity.is_all_null());
|
||||
assert!(!validity.is_all_valid());
|
||||
assert!(validity.is_set(0));
|
||||
assert!(!validity.is_set(1));
|
||||
assert!(validity.is_set(2));
|
||||
assert_eq!(256, list_vector.memory_size());
|
||||
|
||||
let slice = list_vector.slice(0, 2);
|
||||
let slice = list_vector.slice(0, 2).to_arrow_array();
|
||||
let sliced_array = slice.as_any().downcast_ref::<ListArray>().unwrap();
|
||||
assert_eq!(
|
||||
"ListArray[[1, 2, 3], None]",
|
||||
format!("{:?}", slice.to_arrow_array())
|
||||
Int32Array::from_iter_values([1, 2, 3]),
|
||||
*sliced_array
|
||||
.value(0)
|
||||
.as_any()
|
||||
.downcast_ref::<Int32Array>()
|
||||
.unwrap()
|
||||
);
|
||||
assert!(sliced_array.is_null(1));
|
||||
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
@@ -467,52 +591,48 @@ mod tests {
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1u32), Some(2), Some(3)]),
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<u32>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
let arrow_array = new_list_array(&data);
|
||||
let array_ref: ArrayRef = Arc::new(arrow_array);
|
||||
let expect = new_list_vector(&data);
|
||||
|
||||
// Test try from ArrayRef
|
||||
let list_vector = ListVector::try_from_arrow_array(array_ref).unwrap();
|
||||
assert_eq!(
|
||||
"ListVector { array: ListArray[[1, 2, 3], None, [4, None, 6]], inner_datatype: UInt32(UInt32) }",
|
||||
format!("{:?}", list_vector)
|
||||
);
|
||||
assert_eq!(expect, list_vector);
|
||||
|
||||
// Test from
|
||||
let arrow_array = new_list_array(&data);
|
||||
let list_vector = ListVector::from(arrow_array);
|
||||
assert_eq!(expect, list_vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iter_list_vector_values() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1i64), Some(2), Some(3)]),
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i64>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
let list_vector = new_list_vector(&data);
|
||||
|
||||
let list_vector = ListVector::from(arrow_array);
|
||||
assert_eq!(
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())),
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())),
|
||||
list_vector.data_type()
|
||||
);
|
||||
let mut iter = list_vector.values_iter();
|
||||
assert_eq!(
|
||||
"Int64[1, 2, 3]",
|
||||
format!("{:?}", iter.next().unwrap().unwrap().to_arrow_array())
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2, 3])) as VectorRef,
|
||||
*iter.next().unwrap().unwrap().unwrap()
|
||||
);
|
||||
assert!(iter.next().unwrap().unwrap().is_none());
|
||||
assert_eq!(
|
||||
"Int64[]",
|
||||
format!("{:?}", iter.next().unwrap().unwrap().to_arrow_array())
|
||||
);
|
||||
assert_eq!(
|
||||
"Int64[4, None, 6]",
|
||||
format!("{:?}", iter.next().unwrap().unwrap().to_arrow_array())
|
||||
Arc::new(Int32Vector::from(vec![Some(4), None, Some(6)])) as VectorRef,
|
||||
*iter.next().unwrap().unwrap().unwrap(),
|
||||
);
|
||||
assert!(iter.next().is_none())
|
||||
}
|
||||
@@ -520,30 +640,18 @@ mod tests {
|
||||
#[test]
|
||||
fn test_serialize_to_json() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1i64), Some(2), Some(3)]),
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i64>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
|
||||
let list_vector = ListVector::from(arrow_array);
|
||||
let list_vector = new_list_vector(&data);
|
||||
assert_eq!(
|
||||
vec![json!([1, 2, 3]), json!(null), json!([4, null, 6]),],
|
||||
list_vector.serialize_to_json().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
fn new_list_vector(data: Vec<Option<Vec<Option<i32>>>>) -> ListVector {
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i32>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
|
||||
ListVector::from(arrow_array)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_vector_builder() {
|
||||
let mut builder =
|
||||
@@ -567,14 +675,14 @@ mod tests {
|
||||
None,
|
||||
Some(vec![Some(7), Some(8), None]),
|
||||
];
|
||||
let input = new_list_vector(data);
|
||||
let input = new_list_vector(&data);
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(new_list_vector(vec![
|
||||
let expect: VectorRef = Arc::new(new_list_vector(&[
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
None,
|
||||
Some(vec![Some(7), Some(8), None]),
|
||||
@@ -599,7 +707,7 @@ mod tests {
|
||||
}));
|
||||
let vector = builder.finish();
|
||||
|
||||
let expect = new_list_vector(vec![None, Some(vec![Some(4), None, Some(6)])]);
|
||||
let expect = new_list_vector(&[None, Some(vec![Some(4), None, Some(6)])]);
|
||||
assert_eq!(expect, vector);
|
||||
|
||||
assert!(vector.get_data(0).is_none());
|
||||
|
||||
@@ -16,8 +16,7 @@ use std::any::Any;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, NullArray};
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use arrow::array::{Array, ArrayData, ArrayRef, NullArray};
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
@@ -27,21 +26,28 @@ use crate::types::NullType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// A vector where all elements are nulls.
|
||||
#[derive(PartialEq)]
|
||||
pub struct NullVector {
|
||||
array: NullArray,
|
||||
}
|
||||
|
||||
// TODO(yingwen): Support null vector with other logical types.
|
||||
impl NullVector {
|
||||
/// Create a new `NullVector` with `n` elements.
|
||||
pub fn new(n: usize) -> Self {
|
||||
Self {
|
||||
array: NullArray::new(ArrowDataType::Null, n),
|
||||
array: NullArray::new(n),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NullArray> for NullVector {
|
||||
@@ -68,21 +74,28 @@ impl Vector for NullVector {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
// TODO(yingwen): Replaced by clone after upgrading to arrow 28.0.
|
||||
let data = self.to_array_data();
|
||||
Arc::new(NullArray::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Box::new(NullArray::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
Validity::AllNull
|
||||
Validity::all_null(self.array.len())
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, _row: usize) -> bool {
|
||||
true
|
||||
}
|
||||
@@ -217,7 +230,7 @@ mod tests {
|
||||
|
||||
assert_eq!("NullVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert_eq!(Validity::AllNull, v.validity());
|
||||
assert!(v.validity().is_all_null());
|
||||
assert!(v.only_null());
|
||||
|
||||
for i in 0..32 {
|
||||
@@ -246,7 +259,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_null_vector_validity() {
|
||||
let vector = NullVector::new(5);
|
||||
assert_eq!(Validity::AllNull, vector.validity());
|
||||
assert!(vector.validity().is_all_null());
|
||||
assert_eq!(5, vector.null_count());
|
||||
}
|
||||
|
||||
|
||||
@@ -19,10 +19,11 @@ mod replicate;
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::types::PrimitiveElement;
|
||||
use crate::types::LogicalPrimitiveType;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, ListVector,
|
||||
NullVector, PrimitiveVector, StringVector, TimestampVector, Vector, VectorRef,
|
||||
BinaryVector, BooleanVector, ListVector, NullVector, PrimitiveVector, StringVector, Vector,
|
||||
VectorRef,
|
||||
};
|
||||
|
||||
/// Vector compute operations.
|
||||
@@ -59,10 +60,10 @@ pub trait VectorOp {
|
||||
}
|
||||
|
||||
macro_rules! impl_scalar_vector_op {
|
||||
($( { $VectorType: ident, $replicate: ident } ),+) => {$(
|
||||
($($VectorType: ident),+) => {$(
|
||||
impl VectorOp for $VectorType {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
replicate::$replicate(self, offsets)
|
||||
replicate::replicate_scalar(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
@@ -77,28 +78,21 @@ macro_rules! impl_scalar_vector_op {
|
||||
)+};
|
||||
}
|
||||
|
||||
impl_scalar_vector_op!(
|
||||
{ BinaryVector, replicate_scalar },
|
||||
{ BooleanVector, replicate_scalar },
|
||||
{ ListVector, replicate_scalar },
|
||||
{ StringVector, replicate_scalar },
|
||||
{ DateVector, replicate_date },
|
||||
{ DateTimeVector, replicate_datetime },
|
||||
{ TimestampVector, replicate_timestamp }
|
||||
);
|
||||
impl_scalar_vector_op!(BinaryVector, BooleanVector, ListVector, StringVector);
|
||||
|
||||
impl VectorOp for ConstantVector {
|
||||
impl<T: LogicalPrimitiveType> VectorOp for PrimitiveVector<T> {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
replicate::replicate_constant(self, offsets)
|
||||
std::sync::Arc::new(replicate::replicate_primitive(self, offsets))
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<ConstantVector>());
|
||||
find_unique::find_unique_constant(self, selected, prev_vector);
|
||||
let prev_vector =
|
||||
prev_vector.and_then(|pv| pv.as_any().downcast_ref::<PrimitiveVector<T>>());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_constant(self, filter)
|
||||
filter::filter_non_constant!(self, PrimitiveVector<T>, filter)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,21 +111,17 @@ impl VectorOp for NullVector {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> VectorOp for PrimitiveVector<T>
|
||||
where
|
||||
T: PrimitiveElement,
|
||||
{
|
||||
impl VectorOp for ConstantVector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
replicate::replicate_primitive(self, offsets)
|
||||
self.replicate_vector(offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector =
|
||||
prev_vector.and_then(|pv| pv.as_any().downcast_ref::<PrimitiveVector<T>>());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<ConstantVector>());
|
||||
find_unique::find_unique_constant(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, PrimitiveVector<T>, filter)
|
||||
self.filter_vector(filter)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,16 +12,15 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) use crate::vectors::constant::filter_constant;
|
||||
|
||||
macro_rules! filter_non_constant {
|
||||
($vector: expr, $VectorType: ty, $filter: ident) => {{
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::compute;
|
||||
use snafu::ResultExt;
|
||||
|
||||
let arrow_array = $vector.as_arrow();
|
||||
let filtered = arrow::compute::filter::filter(arrow_array, $filter.as_boolean_array())
|
||||
let filtered = compute::filter(arrow_array, $filter.as_boolean_array())
|
||||
.context(crate::error::ArrowComputeSnafu)?;
|
||||
Ok(Arc::new(<$VectorType>::try_from_arrow_array(filtered)?))
|
||||
}};
|
||||
@@ -33,9 +32,16 @@ pub(crate) use filter_non_constant;
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::timestamp::{
|
||||
TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
|
||||
};
|
||||
use crate::types::WrapperType;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{
|
||||
BooleanVector, ConstantVector, Int32Vector, NullVector, StringVector, VectorOp, VectorRef,
|
||||
BooleanVector, Int32Vector, NullVector, StringVector, VectorOp, VectorRef,
|
||||
};
|
||||
|
||||
fn check_filter_primitive(expect: &[i32], input: &[i32], filter: &[bool]) {
|
||||
@@ -105,7 +111,6 @@ mod tests {
|
||||
($VectorType: ident, $ValueType: ident, $method: ident) => {{
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::$ValueType;
|
||||
use $crate::vectors::{$VectorType, VectorRef};
|
||||
|
||||
let v = $VectorType::from_iterator((0..5).map($ValueType::$method));
|
||||
@@ -123,6 +128,18 @@ mod tests {
|
||||
fn test_filter_date_like() {
|
||||
impl_filter_date_like_test!(DateVector, Date, new);
|
||||
impl_filter_date_like_test!(DateTimeVector, DateTime, new);
|
||||
impl_filter_date_like_test!(TimestampVector, Timestamp, from_millis);
|
||||
|
||||
impl_filter_date_like_test!(TimestampSecondVector, TimestampSecond, from_native);
|
||||
impl_filter_date_like_test!(
|
||||
TimestampMillisecondVector,
|
||||
TimestampMillisecond,
|
||||
from_native
|
||||
);
|
||||
impl_filter_date_like_test!(
|
||||
TimestampMicrosecondVector,
|
||||
TimestampMicrosecond,
|
||||
from_native
|
||||
);
|
||||
impl_filter_date_like_test!(TimestampNanosecondVector, TimestampNanosecond, from_native);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,8 @@
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::vectors::{ConstantVector, NullVector, Vector};
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{NullVector, Vector};
|
||||
|
||||
// To implement `find_unique()` correctly, we need to keep in mind that always marks an element as
|
||||
// selected when it is different from the previous one, and leaves the `selected` unchanged
|
||||
@@ -70,7 +71,7 @@ pub(crate) fn find_unique_null(
|
||||
return;
|
||||
}
|
||||
|
||||
let is_first_not_duplicate = prev_vector.map(|pv| pv.is_empty()).unwrap_or(true);
|
||||
let is_first_not_duplicate = prev_vector.map(NullVector::is_empty).unwrap_or(true);
|
||||
if is_first_not_duplicate {
|
||||
selected.set(0, true);
|
||||
}
|
||||
@@ -104,8 +105,11 @@ pub(crate) fn find_unique_constant(
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use super::*;
|
||||
use crate::vectors::{Int32Vector, StringVector, VectorOp};
|
||||
use crate::timestamp::*;
|
||||
use crate::vectors::{Int32Vector, StringVector, Vector, VectorOp};
|
||||
|
||||
fn check_bitmap(expect: &[bool], selected: &BitVec) {
|
||||
let actual = selected.iter().collect::<Vec<_>>();
|
||||
@@ -121,7 +125,7 @@ mod tests {
|
||||
input: impl Iterator<Item = Option<i32>>,
|
||||
prev: Option<&[i32]>,
|
||||
) {
|
||||
let input = Int32Vector::from_iter(input);
|
||||
let input = Int32Vector::from(input.collect::<Vec<_>>());
|
||||
let prev = prev.map(Int32Vector::from_slice);
|
||||
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
@@ -341,7 +345,6 @@ mod tests {
|
||||
|
||||
macro_rules! impl_find_unique_date_like_test {
|
||||
($VectorType: ident, $ValueType: ident, $method: ident) => {{
|
||||
use common_time::$ValueType;
|
||||
use $crate::vectors::$VectorType;
|
||||
|
||||
let v = $VectorType::from_iterator([8, 8, 9, 10].into_iter().map($ValueType::$method));
|
||||
@@ -356,6 +359,9 @@ mod tests {
|
||||
fn test_find_unique_date_like() {
|
||||
impl_find_unique_date_like_test!(DateVector, Date, new);
|
||||
impl_find_unique_date_like_test!(DateTimeVector, DateTime, new);
|
||||
impl_find_unique_date_like_test!(TimestampVector, Timestamp, from_millis);
|
||||
impl_find_unique_date_like_test!(TimestampSecondVector, TimestampSecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMillisecondVector, TimestampMillisecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMicrosecondVector, TimestampMicrosecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampNanosecondVector, TimestampNanosecond, from);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,12 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
use crate::prelude::*;
|
||||
pub(crate) use crate::vectors::constant::replicate_constant;
|
||||
pub(crate) use crate::vectors::date::replicate_date;
|
||||
pub(crate) use crate::vectors::datetime::replicate_datetime;
|
||||
pub(crate) use crate::vectors::null::replicate_null;
|
||||
pub(crate) use crate::vectors::primitive::replicate_primitive;
|
||||
pub(crate) use crate::vectors::timestamp::replicate_timestamp;
|
||||
|
||||
pub(crate) fn replicate_scalar<C: ScalarVector>(c: &C, offsets: &[usize]) -> VectorRef {
|
||||
assert_eq!(offsets.len(), c.len());
|
||||
@@ -43,8 +39,13 @@ pub(crate) fn replicate_scalar<C: ScalarVector>(c: &C, offsets: &[usize]) -> Vec
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use paste::paste;
|
||||
|
||||
use super::*;
|
||||
use crate::vectors::{ConstantVector, Int32Vector, NullVector, StringVector, VectorOp};
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{Int32Vector, NullVector, StringVector, VectorOp};
|
||||
|
||||
#[test]
|
||||
fn test_replicate_primitive() {
|
||||
@@ -120,7 +121,6 @@ mod tests {
|
||||
|
||||
macro_rules! impl_replicate_date_like_test {
|
||||
($VectorType: ident, $ValueType: ident, $method: ident) => {{
|
||||
use common_time::$ValueType;
|
||||
use $crate::vectors::$VectorType;
|
||||
|
||||
let v = $VectorType::from_iterator((0..5).map($ValueType::$method));
|
||||
@@ -138,10 +138,33 @@ mod tests {
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! impl_replicate_timestamp_test {
|
||||
($unit: ident) => {{
|
||||
paste!{
|
||||
use $crate::vectors::[<Timestamp $unit Vector>];
|
||||
use $crate::timestamp::[<Timestamp $unit>];
|
||||
let v = [<Timestamp $unit Vector>]::from_iterator((0..5).map([<Timestamp $unit>]::from));
|
||||
let offsets = [0, 1, 2, 3, 4];
|
||||
let v = v.replicate(&offsets);
|
||||
assert_eq!(4, v.len());
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(i as i64 + 1, TimeUnit::$unit)),
|
||||
v.get(i)
|
||||
);
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replicate_date_like() {
|
||||
impl_replicate_date_like_test!(DateVector, Date, new);
|
||||
impl_replicate_date_like_test!(DateTimeVector, DateTime, new);
|
||||
impl_replicate_date_like_test!(TimestampVector, Timestamp, from_millis);
|
||||
|
||||
impl_replicate_timestamp_test!(Second);
|
||||
impl_replicate_timestamp_test!(Millisecond);
|
||||
impl_replicate_timestamp_test!(Microsecond);
|
||||
impl_replicate_timestamp_test!(Nanosecond);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,75 +13,111 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::iter::FromIterator;
|
||||
use std::slice::Iter;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, MutableArray, MutablePrimitiveArray, PrimitiveArray};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use arrow::array::{
|
||||
Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef, PrimitiveArray, PrimitiveBuilder,
|
||||
};
|
||||
use serde_json::Value as JsonValue;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{ConversionSnafu, Result, SerializeSnafu};
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{Scalar, ScalarRef, ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::{Primitive, PrimitiveElement};
|
||||
use crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LogicalPrimitiveType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type, WrapperType,
|
||||
};
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
pub type UInt8Vector = PrimitiveVector<UInt8Type>;
|
||||
pub type UInt16Vector = PrimitiveVector<UInt16Type>;
|
||||
pub type UInt32Vector = PrimitiveVector<UInt32Type>;
|
||||
pub type UInt64Vector = PrimitiveVector<UInt64Type>;
|
||||
|
||||
pub type Int8Vector = PrimitiveVector<Int8Type>;
|
||||
pub type Int16Vector = PrimitiveVector<Int16Type>;
|
||||
pub type Int32Vector = PrimitiveVector<Int32Type>;
|
||||
pub type Int64Vector = PrimitiveVector<Int64Type>;
|
||||
|
||||
pub type Float32Vector = PrimitiveVector<Float32Type>;
|
||||
pub type Float64Vector = PrimitiveVector<Float64Type>;
|
||||
|
||||
/// Vector for primitive data types.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct PrimitiveVector<T: Primitive> {
|
||||
pub(crate) array: PrimitiveArray<T>,
|
||||
pub struct PrimitiveVector<T: LogicalPrimitiveType> {
|
||||
array: PrimitiveArray<T::ArrowPrimitive>,
|
||||
}
|
||||
|
||||
impl<T: Primitive> PrimitiveVector<T> {
|
||||
pub fn new(array: PrimitiveArray<T>) -> Self {
|
||||
impl<T: LogicalPrimitiveType> PrimitiveVector<T> {
|
||||
pub fn new(array: PrimitiveArray<T::ArrowPrimitive>) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
Ok(Self::new(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<T>>()
|
||||
.with_context(|| ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
))
|
||||
let data = array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<T::ArrowPrimitive>>()
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.data()
|
||||
.clone();
|
||||
let concrete_array = PrimitiveArray::<T::ArrowPrimitive>::from(data);
|
||||
Ok(Self::new(concrete_array))
|
||||
}
|
||||
|
||||
pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
|
||||
pub fn from_slice<P: AsRef<[T::Native]>>(slice: P) -> Self {
|
||||
let iter = slice.as_ref().iter().copied();
|
||||
Self {
|
||||
array: PrimitiveArray::from_slice(slice),
|
||||
array: PrimitiveArray::from_iter_values(iter),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_vec(array: Vec<T>) -> Self {
|
||||
pub fn from_wrapper_slice<P: AsRef<[T::Wrapper]>>(slice: P) -> Self {
|
||||
let iter = slice.as_ref().iter().copied().map(WrapperType::into_native);
|
||||
Self {
|
||||
array: PrimitiveArray::from_vec(array),
|
||||
array: PrimitiveArray::from_iter_values(iter),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_values<I: IntoIterator<Item = T>>(iter: I) -> Self {
|
||||
pub fn from_vec(array: Vec<T::Native>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_values(iter),
|
||||
array: PrimitiveArray::from_iter_values(array),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
pub fn from_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter_values(iter),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &PrimitiveArray<T::ArrowPrimitive> {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> Self {
|
||||
Self::from(self.array.slice(offset, length))
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from(data),
|
||||
}
|
||||
}
|
||||
|
||||
// To distinguish with `Vector::slice()`.
|
||||
fn get_slice(&self, offset: usize, length: usize) -> Self {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Self::from_array_data(data)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PrimitiveElement> Vector for PrimitiveVector<T> {
|
||||
impl<T: LogicalPrimitiveType> Vector for PrimitiveVector<T> {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
T::build_data_type()
|
||||
}
|
||||
@@ -99,11 +135,13 @@ impl<T: PrimitiveElement> Vector for PrimitiveVector<T> {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Arc::new(PrimitiveArray::<T::ArrowPrimitive>::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Box::new(PrimitiveArray::<T::ArrowPrimitive>::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
@@ -111,7 +149,11 @@ impl<T: PrimitiveElement> Vector for PrimitiveVector<T> {
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.values().len() * std::mem::size_of::<T>()
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
@@ -119,57 +161,80 @@ impl<T: PrimitiveElement> Vector for PrimitiveVector<T> {
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(self.slice(offset, length))
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
vectors::impl_get_for_vector!(self.array, index)
|
||||
if self.array.is_valid(index) {
|
||||
// Safety: The index have been checked by `is_valid()`.
|
||||
let wrapper = unsafe { T::Wrapper::from_native(self.array.value_unchecked(index)) };
|
||||
wrapper.into()
|
||||
} else {
|
||||
Value::Null
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
if self.array.is_valid(index) {
|
||||
// Safety: The index have been checked by `is_valid()`.
|
||||
unsafe { self.array.value_unchecked(index).into_value_ref() }
|
||||
let wrapper = unsafe { T::Wrapper::from_native(self.array.value_unchecked(index)) };
|
||||
wrapper.into()
|
||||
} else {
|
||||
ValueRef::Null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive> From<PrimitiveArray<T>> for PrimitiveVector<T> {
|
||||
fn from(array: PrimitiveArray<T>) -> Self {
|
||||
impl<T: LogicalPrimitiveType> fmt::Debug for PrimitiveVector<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("PrimitiveVector")
|
||||
.field("array", &self.array)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> From<PrimitiveArray<T::ArrowPrimitive>> for PrimitiveVector<T> {
|
||||
fn from(array: PrimitiveArray<T::ArrowPrimitive>) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive> From<Vec<Option<T>>> for PrimitiveVector<T> {
|
||||
fn from(v: Vec<Option<T>>) -> Self {
|
||||
impl<T: LogicalPrimitiveType> From<Vec<Option<T::Native>>> for PrimitiveVector<T> {
|
||||
fn from(v: Vec<Option<T::Native>>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::<T>::from(v),
|
||||
array: PrimitiveArray::from_iter(v),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr> for PrimitiveVector<T> {
|
||||
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
|
||||
Self {
|
||||
array: MutablePrimitiveArray::<T>::from_iter(iter).into(),
|
||||
}
|
||||
pub struct PrimitiveIter<'a, T: LogicalPrimitiveType> {
|
||||
iter: ArrayIter<&'a PrimitiveArray<T::ArrowPrimitive>>,
|
||||
}
|
||||
|
||||
impl<'a, T: LogicalPrimitiveType> Iterator for PrimitiveIter<'a, T> {
|
||||
type Item = Option<T::Wrapper>;
|
||||
|
||||
fn next(&mut self) -> Option<Option<T::Wrapper>> {
|
||||
self.iter
|
||||
.next()
|
||||
.map(|item| item.map(T::Wrapper::from_native))
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.iter.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ScalarVector for PrimitiveVector<T>
|
||||
where
|
||||
T: PrimitiveElement,
|
||||
{
|
||||
type OwnedItem = T;
|
||||
type RefItem<'a> = T;
|
||||
impl<T: LogicalPrimitiveType> ScalarVector for PrimitiveVector<T> {
|
||||
type OwnedItem = T::Wrapper;
|
||||
type RefItem<'a> = T::Wrapper;
|
||||
type Iter<'a> = PrimitiveIter<'a, T>;
|
||||
type Builder = PrimitiveVectorBuilder<T>;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if self.array.is_valid(idx) {
|
||||
Some(self.array.value(idx))
|
||||
Some(T::Wrapper::from_native(self.array.value(idx)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -182,59 +247,47 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
pub type UInt8Vector = PrimitiveVector<u8>;
|
||||
pub type UInt16Vector = PrimitiveVector<u16>;
|
||||
pub type UInt32Vector = PrimitiveVector<u32>;
|
||||
pub type UInt64Vector = PrimitiveVector<u64>;
|
||||
|
||||
pub type Int8Vector = PrimitiveVector<i8>;
|
||||
pub type Int16Vector = PrimitiveVector<i16>;
|
||||
pub type Int32Vector = PrimitiveVector<i32>;
|
||||
pub type Int64Vector = PrimitiveVector<i64>;
|
||||
|
||||
pub type Float32Vector = PrimitiveVector<f32>;
|
||||
pub type Float64Vector = PrimitiveVector<f64>;
|
||||
|
||||
pub struct PrimitiveIter<'a, T> {
|
||||
iter: ZipValidity<'a, &'a T, Iter<'a, T>>,
|
||||
}
|
||||
|
||||
impl<'a, T: Copy> Iterator for PrimitiveIter<'a, T> {
|
||||
type Item = Option<T>;
|
||||
|
||||
fn next(&mut self) -> Option<Option<T>> {
|
||||
self.iter.next().map(|v| v.copied())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PrimitiveElement> Serializable for PrimitiveVector<T> {
|
||||
impl<T: LogicalPrimitiveType> Serializable for PrimitiveVector<T> {
|
||||
fn serialize_to_json(&self) -> Result<Vec<JsonValue>> {
|
||||
self.array
|
||||
.iter()
|
||||
.map(serde_json::to_value)
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(SerializeSnafu)
|
||||
let res = self
|
||||
.iter_data()
|
||||
.map(|v| match v {
|
||||
None => serde_json::Value::Null,
|
||||
// use WrapperType's Into<serde_json::Value> bound instead of
|
||||
// serde_json::to_value to facilitate customized serialization
|
||||
// for WrapperType
|
||||
Some(v) => v.into(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PrimitiveVectorBuilder<T: PrimitiveElement> {
|
||||
pub(crate) mutable_array: MutablePrimitiveArray<T>,
|
||||
impl<T: LogicalPrimitiveType> PartialEq for PrimitiveVector<T> {
|
||||
fn eq(&self, other: &PrimitiveVector<T>) -> bool {
|
||||
self.array == other.array
|
||||
}
|
||||
}
|
||||
|
||||
pub type UInt8VectorBuilder = PrimitiveVectorBuilder<u8>;
|
||||
pub type UInt16VectorBuilder = PrimitiveVectorBuilder<u16>;
|
||||
pub type UInt32VectorBuilder = PrimitiveVectorBuilder<u32>;
|
||||
pub type UInt64VectorBuilder = PrimitiveVectorBuilder<u64>;
|
||||
pub type UInt8VectorBuilder = PrimitiveVectorBuilder<UInt8Type>;
|
||||
pub type UInt16VectorBuilder = PrimitiveVectorBuilder<UInt16Type>;
|
||||
pub type UInt32VectorBuilder = PrimitiveVectorBuilder<UInt32Type>;
|
||||
pub type UInt64VectorBuilder = PrimitiveVectorBuilder<UInt64Type>;
|
||||
|
||||
pub type Int8VectorBuilder = PrimitiveVectorBuilder<i8>;
|
||||
pub type Int16VectorBuilder = PrimitiveVectorBuilder<i16>;
|
||||
pub type Int32VectorBuilder = PrimitiveVectorBuilder<i32>;
|
||||
pub type Int64VectorBuilder = PrimitiveVectorBuilder<i64>;
|
||||
pub type Int8VectorBuilder = PrimitiveVectorBuilder<Int8Type>;
|
||||
pub type Int16VectorBuilder = PrimitiveVectorBuilder<Int16Type>;
|
||||
pub type Int32VectorBuilder = PrimitiveVectorBuilder<Int32Type>;
|
||||
pub type Int64VectorBuilder = PrimitiveVectorBuilder<Int64Type>;
|
||||
|
||||
pub type Float32VectorBuilder = PrimitiveVectorBuilder<f32>;
|
||||
pub type Float64VectorBuilder = PrimitiveVectorBuilder<f64>;
|
||||
pub type Float32VectorBuilder = PrimitiveVectorBuilder<Float32Type>;
|
||||
pub type Float64VectorBuilder = PrimitiveVectorBuilder<Float64Type>;
|
||||
|
||||
impl<T: PrimitiveElement> MutableVector for PrimitiveVectorBuilder<T> {
|
||||
/// Builder to build a primitive vector.
|
||||
pub struct PrimitiveVectorBuilder<T: LogicalPrimitiveType> {
|
||||
mutable_array: PrimitiveBuilder<T::ArrowPrimitive>,
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> MutableVector for PrimitiveVectorBuilder<T> {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
T::build_data_type()
|
||||
}
|
||||
@@ -257,81 +310,62 @@ impl<T: PrimitiveElement> MutableVector for PrimitiveVectorBuilder<T> {
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
let primitive = T::cast_value_ref(value)?;
|
||||
self.mutable_array.push(primitive);
|
||||
match primitive {
|
||||
Some(v) => self.mutable_array.append_value(v.into_native()),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let primitive = T::cast_vector(vector)?;
|
||||
// Slice the underlying array to avoid creating a new Arc.
|
||||
let slice = primitive.slice(offset, length);
|
||||
self.mutable_array.extend_trusted_len(slice.iter());
|
||||
let slice = primitive.get_slice(offset, length);
|
||||
for v in slice.iter_data() {
|
||||
self.push(v);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ScalarVectorBuilder for PrimitiveVectorBuilder<T>
|
||||
where
|
||||
T: Scalar<VectorType = PrimitiveVector<T>> + PrimitiveElement,
|
||||
for<'a> T: ScalarRef<'a, ScalarType = T, VectorType = PrimitiveVector<T>>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
T: LogicalPrimitiveType,
|
||||
T::Wrapper: Scalar<VectorType = PrimitiveVector<T>>,
|
||||
for<'a> T::Wrapper: ScalarRef<'a, ScalarType = T::Wrapper>,
|
||||
for<'a> T::Wrapper: Scalar<RefType<'a> = T::Wrapper>,
|
||||
{
|
||||
type VectorType = PrimitiveVector<T>;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutablePrimitiveArray::with_capacity(capacity),
|
||||
mutable_array: PrimitiveBuilder::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.mutable_array.push(value);
|
||||
self.mutable_array
|
||||
.append_option(value.map(|v| v.into_native()));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
PrimitiveVector {
|
||||
array: std::mem::take(&mut self.mutable_array).into(),
|
||||
array: self.mutable_array.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PrimitiveElement> PrimitiveVectorBuilder<T> {
|
||||
fn with_type_capacity(data_type: ConcreteDataType, capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutablePrimitiveArray::with_capacity_from(
|
||||
capacity,
|
||||
data_type.as_arrow_type(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_primitive<T: PrimitiveElement>(
|
||||
pub(crate) fn replicate_primitive<T: LogicalPrimitiveType>(
|
||||
vector: &PrimitiveVector<T>,
|
||||
offsets: &[usize],
|
||||
) -> VectorRef {
|
||||
Arc::new(replicate_primitive_with_type(
|
||||
vector,
|
||||
offsets,
|
||||
T::build_data_type(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_primitive_with_type<T: PrimitiveElement>(
|
||||
vector: &PrimitiveVector<T>,
|
||||
offsets: &[usize],
|
||||
data_type: ConcreteDataType,
|
||||
) -> PrimitiveVector<T> {
|
||||
assert_eq!(offsets.len(), vector.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return vector.slice(0, 0);
|
||||
return vector.get_slice(0, 0);
|
||||
}
|
||||
|
||||
let mut builder = PrimitiveVectorBuilder::<T>::with_type_capacity(
|
||||
data_type,
|
||||
*offsets.last().unwrap() as usize,
|
||||
);
|
||||
let mut builder = PrimitiveVectorBuilder::<T>::with_capacity(*offsets.last().unwrap() as usize);
|
||||
|
||||
let mut previous_offset = 0;
|
||||
|
||||
@@ -339,14 +373,15 @@ pub(crate) fn replicate_primitive_with_type<T: PrimitiveElement>(
|
||||
let repeat_times = *offset - previous_offset;
|
||||
match value {
|
||||
Some(data) => {
|
||||
builder.mutable_array.extend_trusted_len(
|
||||
std::iter::repeat(*data)
|
||||
.take(repeat_times)
|
||||
.map(Option::Some),
|
||||
);
|
||||
unsafe {
|
||||
// Safety: std::iter::Repeat and std::iter::Take implement TrustedLen.
|
||||
builder
|
||||
.mutable_array
|
||||
.append_trusted_len_iter(std::iter::repeat(data).take(repeat_times));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
builder.mutable_array.extend_constant(repeat_times, None);
|
||||
builder.mutable_array.append_nulls(repeat_times);
|
||||
}
|
||||
}
|
||||
previous_offset = *offset;
|
||||
@@ -356,6 +391,7 @@ pub(crate) fn replicate_primitive_with_type<T: PrimitiveElement>(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::Int32Array;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde_json;
|
||||
|
||||
@@ -364,11 +400,11 @@ mod tests {
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::Int64Type;
|
||||
|
||||
fn check_vec(v: PrimitiveVector<i32>) {
|
||||
fn check_vec(v: Int32Vector) {
|
||||
assert_eq!(4, v.len());
|
||||
assert_eq!("Int32Vector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert_eq!(Validity::AllValid, v.validity());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert!(!v.only_null());
|
||||
|
||||
for i in 0..4 {
|
||||
@@ -387,26 +423,26 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_from_values() {
|
||||
let v = PrimitiveVector::<i32>::from_values(vec![1, 2, 3, 4]);
|
||||
let v = Int32Vector::from_values(vec![1, 2, 3, 4]);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_vec() {
|
||||
let v = PrimitiveVector::<i32>::from_vec(vec![1, 2, 3, 4]);
|
||||
let v = Int32Vector::from_vec(vec![1, 2, 3, 4]);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_slice() {
|
||||
let v = PrimitiveVector::<i32>::from_slice(vec![1, 2, 3, 4]);
|
||||
let v = Int32Vector::from_slice(vec![1, 2, 3, 4]);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_primitive_vector_with_null_to_json() {
|
||||
let input = [Some(1i32), Some(2i32), None, Some(4i32), None];
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
let mut builder = Int32VectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
@@ -421,15 +457,15 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let arrow_array = PrimitiveArray::from_slice(vec![1, 2, 3, 4]);
|
||||
let v = PrimitiveVector::from(arrow_array);
|
||||
let arrow_array = Int32Array::from(vec![1, 2, 3, 4]);
|
||||
let v = Int32Vector::from(arrow_array);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_primitive_vector_build_get() {
|
||||
let input = [Some(1i32), Some(2i32), None, Some(4i32), None];
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
let mut builder = Int32VectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
@@ -448,29 +484,28 @@ mod tests {
|
||||
#[test]
|
||||
fn test_primitive_vector_validity() {
|
||||
let input = [Some(1i32), Some(2i32), None, None];
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
let mut builder = Int32VectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let vector = builder.finish();
|
||||
assert_eq!(2, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
let slots = validity.slots().unwrap();
|
||||
assert_eq!(2, slots.null_count());
|
||||
assert!(!slots.get_bit(2));
|
||||
assert!(!slots.get_bit(3));
|
||||
assert_eq!(2, validity.null_count());
|
||||
assert!(!validity.is_set(2));
|
||||
assert!(!validity.is_set(3));
|
||||
|
||||
let vector = PrimitiveVector::<i32>::from_slice(vec![1, 2, 3, 4]);
|
||||
let vector = Int32Vector::from_slice(vec![1, 2, 3, 4]);
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert_eq!(Validity::AllValid, vector.validity());
|
||||
assert!(vector.validity().is_all_valid());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_memory_size() {
|
||||
let v = PrimitiveVector::<i32>::from_slice((0..5).collect::<Vec<i32>>());
|
||||
assert_eq!(20, v.memory_size());
|
||||
let v = PrimitiveVector::<i64>::from(vec![Some(0i64), Some(1i64), Some(2i64), None, None]);
|
||||
assert_eq!(40, v.memory_size());
|
||||
let v = Int32Vector::from_slice((0..5).collect::<Vec<i32>>());
|
||||
assert_eq!(64, v.memory_size());
|
||||
let v = Int64Vector::from(vec![Some(0i64), Some(1i64), Some(2i64), None, None]);
|
||||
assert_eq!(128, v.memory_size());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -489,4 +524,29 @@ mod tests {
|
||||
let expect: VectorRef = Arc::new(Int64Vector::from_slice(&[123, 8, 9]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_wrapper_slice() {
|
||||
macro_rules! test_from_wrapper_slice {
|
||||
($vec: ident, $ty: ident) => {
|
||||
let from_wrapper_slice = $vec::from_wrapper_slice(&[
|
||||
$ty::from_native($ty::MAX),
|
||||
$ty::from_native($ty::MIN),
|
||||
]);
|
||||
let from_slice = $vec::from_slice(&[$ty::MAX, $ty::MIN]);
|
||||
assert_eq!(from_wrapper_slice, from_slice);
|
||||
};
|
||||
}
|
||||
|
||||
test_from_wrapper_slice!(UInt8Vector, u8);
|
||||
test_from_wrapper_slice!(Int8Vector, i8);
|
||||
test_from_wrapper_slice!(UInt16Vector, u16);
|
||||
test_from_wrapper_slice!(Int16Vector, i16);
|
||||
test_from_wrapper_slice!(UInt32Vector, u32);
|
||||
test_from_wrapper_slice!(Int32Vector, i32);
|
||||
test_from_wrapper_slice!(UInt64Vector, u64);
|
||||
test_from_wrapper_slice!(Int64Vector, i64);
|
||||
test_from_wrapper_slice!(Float32Vector, f32);
|
||||
test_from_wrapper_slice!(Float64Vector, f64);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,22 +15,19 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, MutableArray, Utf8ValuesIter};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use serde_json::Value as JsonValue;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
use arrow::array::{Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::arrow_array::{MutableStringArray, StringArray};
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{Result, SerializeSnafu};
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::StringType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// String array wrapper
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
/// Vector of strings.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct StringVector {
|
||||
array: StringArray,
|
||||
}
|
||||
@@ -39,6 +36,16 @@ impl StringVector {
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> Self {
|
||||
Self {
|
||||
array: StringArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringArray> for StringVector {
|
||||
@@ -50,19 +57,7 @@ impl From<StringArray> for StringVector {
|
||||
impl From<Vec<Option<String>>> for StringVector {
|
||||
fn from(data: Vec<Option<String>>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<String>> for StringVector {
|
||||
fn from(data: Vec<String>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from(
|
||||
data.into_iter()
|
||||
.map(Option::Some)
|
||||
.collect::<Vec<Option<String>>>(),
|
||||
),
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -70,7 +65,31 @@ impl From<Vec<String>> for StringVector {
|
||||
impl From<Vec<Option<&str>>> for StringVector {
|
||||
fn from(data: Vec<Option<&str>>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from(data),
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&[Option<String>]> for StringVector {
|
||||
fn from(data: &[Option<String>]) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&[Option<&str>]> for StringVector {
|
||||
fn from(data: &[Option<&str>]) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<String>> for StringVector {
|
||||
fn from(data: Vec<String>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data.into_iter().map(Some)),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -78,18 +97,14 @@ impl From<Vec<Option<&str>>> for StringVector {
|
||||
impl From<Vec<&str>> for StringVector {
|
||||
fn from(data: Vec<&str>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from(
|
||||
data.into_iter()
|
||||
.map(Option::Some)
|
||||
.collect::<Vec<Option<&str>>>(),
|
||||
),
|
||||
array: StringArray::from_iter(data.into_iter().map(Some)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for StringVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::String(StringType::default())
|
||||
ConcreteDataType::string_datatype()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
@@ -105,11 +120,13 @@ impl Vector for StringVector {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Arc::new(StringArray::from(data))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
let data = self.to_array_data();
|
||||
Box::new(StringArray::from(data))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
@@ -117,7 +134,11 @@ impl Vector for StringVector {
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.len() * std::mem::size_of::<i64>() + self.array.values().len()
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
@@ -125,7 +146,8 @@ impl Vector for StringVector {
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self::from(self.array.slice(offset, length)))
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
@@ -140,7 +162,7 @@ impl Vector for StringVector {
|
||||
impl ScalarVector for StringVector {
|
||||
type OwnedItem = String;
|
||||
type RefItem<'a> = &'a str;
|
||||
type Iter<'a> = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, i32>>;
|
||||
type Iter<'a> = ArrayIter<&'a StringArray>;
|
||||
type Builder = StringVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
@@ -157,7 +179,7 @@ impl ScalarVector for StringVector {
|
||||
}
|
||||
|
||||
pub struct StringVectorBuilder {
|
||||
buffer: MutableStringArray,
|
||||
mutable_array: MutableStringArray,
|
||||
}
|
||||
|
||||
impl MutableVector for StringVectorBuilder {
|
||||
@@ -166,7 +188,7 @@ impl MutableVector for StringVectorBuilder {
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
self.mutable_array.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
@@ -182,12 +204,15 @@ impl MutableVector for StringVectorBuilder {
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.buffer.push(value.as_string()?);
|
||||
match value.as_string()? {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self.buffer, vector, StringVector, offset, length)
|
||||
vectors::impl_extend_for_builder!(self, vector, StringVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -196,30 +221,30 @@ impl ScalarVectorBuilder for StringVectorBuilder {
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
buffer: MutableStringArray::with_capacity(capacity),
|
||||
mutable_array: MutableStringArray::with_capacity(capacity, 0),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.buffer.push(value)
|
||||
match value {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
Self::VectorType {
|
||||
array: std::mem::take(&mut self.buffer).into(),
|
||||
StringVector {
|
||||
array: self.mutable_array.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for StringVector {
|
||||
fn serialize_to_json(&self) -> crate::error::Result<Vec<JsonValue>> {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
self.iter_data()
|
||||
.map(|v| match v {
|
||||
None => Ok(serde_json::Value::Null),
|
||||
Some(s) => serde_json::to_value(s),
|
||||
})
|
||||
.map(serde_json::to_value)
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(SerializeSnafu)
|
||||
.context(error::SerializeSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -227,60 +252,9 @@ vectors::impl_try_from_arrow_array_for_vector!(StringArray, StringVector);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde_json;
|
||||
use arrow::datatypes::DataType;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_misc() {
|
||||
let strs = vec!["hello", "greptime", "rust"];
|
||||
let v = StringVector::from(strs.clone());
|
||||
assert_eq!(3, v.len());
|
||||
assert_eq!("StringVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert_eq!(Validity::AllValid, v.validity());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(41, v.memory_size());
|
||||
|
||||
for (i, s) in strs.iter().enumerate() {
|
||||
assert_eq!(Value::from(*s), v.get(i));
|
||||
assert_eq!(ValueRef::from(*s), v.get_ref(i));
|
||||
assert_eq!(Value::from(*s), v.try_get(i).unwrap());
|
||||
}
|
||||
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
assert_eq!(3, arrow_arr.len());
|
||||
assert_eq!(&ArrowDataType::Utf8, arrow_arr.data_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_string_vector() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(3);
|
||||
builder.push(Some("hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some("world"));
|
||||
let string_vector = builder.finish();
|
||||
let serialized =
|
||||
serde_json::to_string(&string_vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["hello",null,"world"]"#, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let mut builder = MutableStringArray::new();
|
||||
builder.push(Some("A"));
|
||||
builder.push(Some("B"));
|
||||
builder.push::<&str>(None);
|
||||
builder.push(Some("D"));
|
||||
let string_array: StringArray = builder.into();
|
||||
let vector = StringVector::from(string_array);
|
||||
assert_eq!(
|
||||
r#"["A","B",null,"D"]"#,
|
||||
serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_build_get() {
|
||||
@@ -310,7 +284,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_builder() {
|
||||
let mut builder = StringType::default().create_mutable_vector(3);
|
||||
let mut builder = StringVectorBuilder::with_capacity(3);
|
||||
builder.push_value_ref(ValueRef::String("hello")).unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
|
||||
@@ -324,4 +298,73 @@ mod tests {
|
||||
let expect: VectorRef = Arc::new(StringVector::from_slice(&["hello", "one", "two"]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_misc() {
|
||||
let strs = vec!["hello", "greptime", "rust"];
|
||||
let v = StringVector::from(strs.clone());
|
||||
assert_eq!(3, v.len());
|
||||
assert_eq!("StringVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(128, v.memory_size());
|
||||
|
||||
for (i, s) in strs.iter().enumerate() {
|
||||
assert_eq!(Value::from(*s), v.get(i));
|
||||
assert_eq!(ValueRef::from(*s), v.get_ref(i));
|
||||
assert_eq!(Value::from(*s), v.try_get(i).unwrap());
|
||||
}
|
||||
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
assert_eq!(3, arrow_arr.len());
|
||||
assert_eq!(&DataType::Utf8, arrow_arr.data_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_string_vector() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(3);
|
||||
builder.push(Some("hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some("world"));
|
||||
let string_vector = builder.finish();
|
||||
let serialized =
|
||||
serde_json::to_string(&string_vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["hello",null,"world"]"#, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let mut builder = MutableStringArray::new();
|
||||
builder.append_option(Some("A"));
|
||||
builder.append_option(Some("B"));
|
||||
builder.append_null();
|
||||
builder.append_option(Some("D"));
|
||||
let string_array: StringArray = builder.finish();
|
||||
let vector = StringVector::from(string_array);
|
||||
assert_eq!(
|
||||
r#"["A","B",null,"D"]"#,
|
||||
serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_non_option_string() {
|
||||
let nul = String::from_utf8(vec![0]).unwrap();
|
||||
let corpus = vec!["😅😅😅", "😍😍😍😍", "🥵🥵", nul.as_str()];
|
||||
let vector = StringVector::from(corpus);
|
||||
let serialized = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["😅😅😅","😍😍😍😍","🥵🥵","\u0000"]"#, serialized);
|
||||
|
||||
let corpus = vec![
|
||||
"🀀🀀🀀".to_string(),
|
||||
"🀁🀁🀁".to_string(),
|
||||
"🀂🀂🀂".to_string(),
|
||||
"🀃🀃🀃".to_string(),
|
||||
"🀆🀆".to_string(),
|
||||
];
|
||||
let vector = StringVector::from(corpus);
|
||||
let serialized = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["🀀🀀🀀","🀁🀁🀁","🀂🀂🀂","🀃🀃🀃","🀆🀆"]"#, serialized);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,308 +12,20 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, PrimitiveArray};
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
use crate::prelude::{
|
||||
MutableVector, ScalarVector, ScalarVectorBuilder, Validity, Value, ValueRef, Vector, VectorRef,
|
||||
use crate::types::{
|
||||
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
|
||||
TimestampSecondType,
|
||||
};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::TimestampType;
|
||||
use crate::vectors::{PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder};
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
/// `TimestampVector` stores timestamp in millisecond since UNIX Epoch.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct TimestampVector {
|
||||
array: PrimitiveVector<i64>,
|
||||
}
|
||||
pub type TimestampSecondVector = PrimitiveVector<TimestampSecondType>;
|
||||
pub type TimestampSecondVectorBuilder = PrimitiveVectorBuilder<TimestampSecondType>;
|
||||
|
||||
impl TimestampVector {
|
||||
pub fn new(array: PrimitiveArray<i64>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector { array },
|
||||
}
|
||||
}
|
||||
pub type TimestampMillisecondVector = PrimitiveVector<TimestampMillisecondType>;
|
||||
pub type TimestampMillisecondVectorBuilder = PrimitiveVectorBuilder<TimestampMillisecondType>;
|
||||
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
Ok(Self::new(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<i64>>()
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
))
|
||||
}
|
||||
pub type TimestampMicrosecondVector = PrimitiveVector<TimestampMicrosecondType>;
|
||||
pub type TimestampMicrosecondVectorBuilder = PrimitiveVectorBuilder<TimestampMicrosecondType>;
|
||||
|
||||
pub fn from_values<I: IntoIterator<Item = i64>>(iter: I) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector {
|
||||
array: PrimitiveArray::from_values(iter),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
self.array.as_arrow()
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for TimestampVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::timestamp_millis_datatype()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"TimestampVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Arc::new(PrimitiveArray::new(
|
||||
TimestampType::new(TimeUnit::Millisecond).as_arrow_type(),
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let values = self.array.array.values().clone();
|
||||
Box::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, None),
|
||||
values,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
self.array.validity()
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.memory_size()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self {
|
||||
array: PrimitiveVector {
|
||||
array: self.array.array.slice(offset, length),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
match self.array.get(index) {
|
||||
Value::Null => Value::Null,
|
||||
Value::Int64(v) => Value::Timestamp(Timestamp::from_millis(v)),
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
match self.array.get(index) {
|
||||
Value::Int64(v) => ValueRef::Timestamp(Timestamp::from_millis(v)),
|
||||
Value::Null => ValueRef::Null,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for TimestampVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
Ok(self
|
||||
.array
|
||||
.iter_data()
|
||||
.map(|v| match v {
|
||||
None => serde_json::Value::Null,
|
||||
Some(v) => v.into(),
|
||||
})
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for TimestampVector {
|
||||
type OwnedItem = Timestamp;
|
||||
type RefItem<'a> = Timestamp;
|
||||
type Iter<'a> = TimestampDataIter<'a>;
|
||||
type Builder = TimestampVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
self.array.get_data(idx).map(Timestamp::from_millis)
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
TimestampDataIter {
|
||||
iter: self.array.iter_data(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TimestampDataIter<'a> {
|
||||
iter: PrimitiveIter<'a, i64>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TimestampDataIter<'a> {
|
||||
type Item = Option<Timestamp>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|v| v.map(Timestamp::from_millis))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TimestampVectorBuilder {
|
||||
buffer: PrimitiveVectorBuilder<i64>,
|
||||
}
|
||||
|
||||
impl MutableVector for TimestampVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::timestamp_millis_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
// TODO(hl): vector and vector builder should also support customized time unit.
|
||||
self.buffer.push(
|
||||
value
|
||||
.as_timestamp()?
|
||||
.map(|t| t.convert_to(TimeUnit::Millisecond)),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let concrete_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to convert vector from {} to DateVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
|
||||
self.buffer
|
||||
.extend_slice_of(&concrete_vector.array, offset, length)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for TimestampVectorBuilder {
|
||||
type VectorType = TimestampVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
buffer: PrimitiveVectorBuilder::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
/// Pushes a Timestamp value into vector builder. The timestamp must be with time unit
|
||||
/// `Second`/`MilliSecond`/`Microsecond`.
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.buffer
|
||||
.push(value.map(|v| v.convert_to(TimeUnit::Millisecond)));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
Self::VectorType {
|
||||
array: self.buffer.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_timestamp(vector: &TimestampVector, offsets: &[usize]) -> VectorRef {
|
||||
let array = crate::vectors::primitive::replicate_primitive_with_type(
|
||||
&vector.array,
|
||||
offsets,
|
||||
vector.data_type(),
|
||||
);
|
||||
Arc::new(TimestampVector { array })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
pub fn test_build_timestamp_vector() {
|
||||
let mut builder = TimestampVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(Timestamp::new(1, TimeUnit::Second)));
|
||||
builder.push(None);
|
||||
builder.push(Some(Timestamp::new(2, TimeUnit::Millisecond)));
|
||||
|
||||
let vector = builder.finish();
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
vector.data_type()
|
||||
);
|
||||
assert_eq!(3, vector.len());
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1000, TimeUnit::Millisecond)),
|
||||
vector.get(0)
|
||||
);
|
||||
|
||||
assert_eq!(Value::Null, vector.get(1));
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Millisecond)),
|
||||
vector.get(2)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
Some(Timestamp::new(1000, TimeUnit::Millisecond)),
|
||||
None,
|
||||
Some(Timestamp::new(2, TimeUnit::Millisecond)),
|
||||
],
|
||||
vector.iter_data().collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_from_arrow() {
|
||||
let vector =
|
||||
TimestampVector::from_slice(&[Timestamp::from_millis(1), Timestamp::from_millis(2)]);
|
||||
let arrow = vector.as_arrow().slice(0, vector.len());
|
||||
let vector2 = TimestampVector::try_from_arrow_array(&arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
}
|
||||
}
|
||||
pub type TimestampNanosecondVector = PrimitiveVector<TimestampNanosecondType>;
|
||||
pub type TimestampNanosecondVectorBuilder = PrimitiveVectorBuilder<TimestampNanosecondType>;
|
||||
|
||||
@@ -9,10 +9,11 @@ default = []
|
||||
test = []
|
||||
|
||||
[dependencies]
|
||||
arrow = "26.0.0"
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion-common = "14.0"
|
||||
datafusion-common = "14.0.0"
|
||||
enum_dispatch = "0.3"
|
||||
num = "0.4"
|
||||
num-traits = "0.2"
|
||||
@@ -21,4 +22,3 @@ paste = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
arrow = "26.0"
|
||||
|
||||
@@ -12,18 +12,13 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use arrow::array::{
|
||||
Array, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
|
||||
Int32Array, Int64Array, Int8Array, ListArray, UInt16Array, UInt32Array, UInt64Array,
|
||||
UInt8Array,
|
||||
};
|
||||
use arrow::array::{self, Array, ListArray, PrimitiveArray};
|
||||
use arrow::datatypes::DataType;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::Timestamp;
|
||||
use common_time::timestamp::Timestamp;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{ConversionSnafu, Result};
|
||||
use crate::prelude::ConcreteDataType;
|
||||
use crate::value::{ListValue, Value};
|
||||
|
||||
pub type BinaryArray = arrow::array::LargeBinaryArray;
|
||||
@@ -41,7 +36,6 @@ macro_rules! cast_array {
|
||||
};
|
||||
}
|
||||
|
||||
// TODO(yingwen): Remove this function.
|
||||
pub fn arrow_array_get(array: &dyn Array, idx: usize) -> Result<Value> {
|
||||
if array.is_null(idx) {
|
||||
return Ok(Value::Null);
|
||||
@@ -49,46 +43,42 @@ pub fn arrow_array_get(array: &dyn Array, idx: usize) -> Result<Value> {
|
||||
|
||||
let result = match array.data_type() {
|
||||
DataType::Null => Value::Null,
|
||||
DataType::Boolean => Value::Boolean(cast_array!(array, BooleanArray).value(idx)),
|
||||
DataType::Binary => Value::Binary(cast_array!(array, BinaryArray).value(idx).into()),
|
||||
DataType::Int8 => Value::Int8(cast_array!(array, Int8Array).value(idx)),
|
||||
DataType::Int16 => Value::Int16(cast_array!(array, Int16Array).value(idx)),
|
||||
DataType::Int32 => Value::Int32(cast_array!(array, Int32Array).value(idx)),
|
||||
DataType::Int64 => Value::Int64(cast_array!(array, Int64Array).value(idx)),
|
||||
DataType::UInt8 => Value::UInt8(cast_array!(array, UInt8Array).value(idx)),
|
||||
DataType::UInt16 => Value::UInt16(cast_array!(array, UInt16Array).value(idx)),
|
||||
DataType::UInt32 => Value::UInt32(cast_array!(array, UInt32Array).value(idx)),
|
||||
DataType::UInt64 => Value::UInt64(cast_array!(array, UInt64Array).value(idx)),
|
||||
DataType::Float32 => Value::Float32(cast_array!(array, Float32Array).value(idx).into()),
|
||||
DataType::Float64 => Value::Float64(cast_array!(array, Float64Array).value(idx).into()),
|
||||
DataType::Utf8 => Value::String(cast_array!(array, StringArray).value(idx).into()),
|
||||
DataType::Date32 => Value::Date(cast_array!(array, Date32Array).value(idx).into()),
|
||||
DataType::Date64 => Value::DateTime(cast_array!(array, Date64Array).value(idx).into()),
|
||||
DataType::Timestamp(t, _) => match t {
|
||||
arrow::datatypes::TimeUnit::Second => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampSecondArray).value(idx),
|
||||
TimeUnit::Second,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Millisecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampMillisecondArray).value(idx),
|
||||
TimeUnit::Millisecond,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Microsecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampMicrosecondArray).value(idx),
|
||||
TimeUnit::Microsecond,
|
||||
)),
|
||||
arrow::datatypes::TimeUnit::Nanosecond => Value::Timestamp(Timestamp::new(
|
||||
cast_array!(array, arrow::array::TimestampNanosecondArray).value(idx),
|
||||
TimeUnit::Nanosecond,
|
||||
)),
|
||||
},
|
||||
DataType::Boolean => Value::Boolean(cast_array!(array, array::BooleanArray).value(idx)),
|
||||
DataType::Binary | DataType::LargeBinary => {
|
||||
Value::Binary(cast_array!(array, BinaryArray).value(idx).into())
|
||||
}
|
||||
DataType::Int8 => Value::Int8(cast_array!(array, PrimitiveArray::<i8>).value(idx)),
|
||||
DataType::Int16 => Value::Int16(cast_array!(array, PrimitiveArray::<i16>).value(idx)),
|
||||
DataType::Int32 => Value::Int32(cast_array!(array, PrimitiveArray::<i32>).value(idx)),
|
||||
DataType::Int64 => Value::Int64(cast_array!(array, PrimitiveArray::<i64>).value(idx)),
|
||||
DataType::UInt8 => Value::UInt8(cast_array!(array, PrimitiveArray::<u8>).value(idx)),
|
||||
DataType::UInt16 => Value::UInt16(cast_array!(array, PrimitiveArray::<u16>).value(idx)),
|
||||
DataType::UInt32 => Value::UInt32(cast_array!(array, PrimitiveArray::<u32>).value(idx)),
|
||||
DataType::UInt64 => Value::UInt64(cast_array!(array, PrimitiveArray::<u64>).value(idx)),
|
||||
DataType::Float32 => {
|
||||
Value::Float32(cast_array!(array, PrimitiveArray::<f32>).value(idx).into())
|
||||
}
|
||||
DataType::Float64 => {
|
||||
Value::Float64(cast_array!(array, PrimitiveArray::<f64>).value(idx).into())
|
||||
}
|
||||
DataType::Utf8 | DataType::LargeUtf8 => {
|
||||
Value::String(cast_array!(array, StringArray).value(idx).into())
|
||||
}
|
||||
DataType::Timestamp(t, _) => {
|
||||
let value = cast_array!(array, PrimitiveArray::<i64>).value(idx);
|
||||
let unit = match ConcreteDataType::from_arrow_time_unit(t) {
|
||||
ConcreteDataType::Timestamp(t) => t.unit,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
Value::Timestamp(Timestamp::new(value, unit))
|
||||
}
|
||||
DataType::List(_) => {
|
||||
let array = cast_array!(array, ListArray).value(idx);
|
||||
let item_type = ConcreteDataType::try_from(array.data_type())?;
|
||||
let array = cast_array!(array, ListArray::<i32>).value(idx);
|
||||
let inner_datatype = ConcreteDataType::try_from(array.data_type())?;
|
||||
let values = (0..array.len())
|
||||
.map(|i| arrow_array_get(&*array, i))
|
||||
.collect::<Result<Vec<Value>>>()?;
|
||||
Value::List(ListValue::new(Some(Box::new(values)), item_type))
|
||||
Value::List(ListValue::new(Some(Box::new(values)), inner_datatype))
|
||||
}
|
||||
_ => unimplemented!("Arrow array datatype: {:?}", array.data_type()),
|
||||
};
|
||||
@@ -98,74 +88,45 @@ pub fn arrow_array_get(array: &dyn Array, idx: usize) -> Result<Value> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
BooleanArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
|
||||
LargeBinaryArray, TimestampMicrosecondArray, TimestampMillisecondArray,
|
||||
TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array,
|
||||
MutableListArray, MutablePrimitiveArray, TryExtend, UInt16Array, UInt32Array, UInt64Array,
|
||||
UInt8Array,
|
||||
};
|
||||
use arrow::datatypes::Int32Type;
|
||||
use arrow::buffer::Buffer;
|
||||
use arrow::datatypes::{DataType, TimeUnit as ArrowTimeUnit};
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
use paste::paste;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::types::TimestampType;
|
||||
|
||||
macro_rules! test_arrow_array_get_for_timestamps {
|
||||
( $($unit: ident), *) => {
|
||||
$(
|
||||
paste! {
|
||||
let mut builder = arrow::array::[<Timestamp $unit Array>]::builder(3);
|
||||
builder.append_value(1);
|
||||
builder.append_value(0);
|
||||
builder.append_value(-1);
|
||||
let ts_array = Arc::new(builder.finish()) as Arc<dyn Array>;
|
||||
let v = arrow_array_get(&ts_array, 1).unwrap();
|
||||
assert_eq!(
|
||||
ConcreteDataType::Timestamp(TimestampType::$unit(
|
||||
$crate::types::[<Timestamp $unit Type>]::default(),
|
||||
)),
|
||||
v.data_type()
|
||||
);
|
||||
}
|
||||
)*
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_array() {
|
||||
test_arrow_array_get_for_timestamps![Second, Millisecond, Microsecond, Nanosecond];
|
||||
}
|
||||
use crate::prelude::Vector;
|
||||
use crate::vectors::TimestampVector;
|
||||
|
||||
#[test]
|
||||
fn test_arrow_array_access() {
|
||||
let array1 = BooleanArray::from(vec![true, true, false, false]);
|
||||
let array1 = BooleanArray::from_slice(vec![true, true, false, false]);
|
||||
assert_eq!(Value::Boolean(true), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int8Array::from(vec![1, 2, 3, 4]);
|
||||
let array1 = Int8Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int8(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt8Array::from(vec![1, 2, 3, 4]);
|
||||
let array1 = UInt8Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt8(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int16Array::from(vec![1, 2, 3, 4]);
|
||||
let array1 = Int16Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int16(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt16Array::from(vec![1, 2, 3, 4]);
|
||||
let array1 = UInt16Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt16(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Int32Array::from(vec![1, 2, 3, 4]);
|
||||
let array1 = Int32Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int32(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = UInt32Array::from(vec![1, 2, 3, 4]);
|
||||
let array1 = UInt32Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt32(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array = Int64Array::from(vec![1, 2, 3, 4]);
|
||||
let array = Int64Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::Int64(2), arrow_array_get(&array, 1).unwrap());
|
||||
let array1 = UInt64Array::from(vec![1, 2, 3, 4]);
|
||||
let array1 = UInt64Array::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_eq!(Value::UInt64(2), arrow_array_get(&array1, 1).unwrap());
|
||||
let array1 = Float32Array::from(vec![1f32, 2f32, 3f32, 4f32]);
|
||||
let array1 = Float32Array::from_vec(vec![1f32, 2f32, 3f32, 4f32]);
|
||||
assert_eq!(
|
||||
Value::Float32(2f32.into()),
|
||||
arrow_array_get(&array1, 1).unwrap()
|
||||
);
|
||||
let array1 = Float64Array::from(vec![1f64, 2f64, 3f64, 4f64]);
|
||||
let array1 = Float64Array::from_vec(vec![1f64, 2f64, 3f64, 4f64]);
|
||||
assert_eq!(
|
||||
Value::Float64(2f64.into()),
|
||||
arrow_array_get(&array1, 1).unwrap()
|
||||
@@ -178,42 +139,55 @@ mod test {
|
||||
);
|
||||
assert_eq!(Value::Null, arrow_array_get(&array2, 1).unwrap());
|
||||
|
||||
let array3 = LargeBinaryArray::from(vec![
|
||||
let array3 = super::BinaryArray::from(vec![
|
||||
Some("hello".as_bytes()),
|
||||
None,
|
||||
Some("world".as_bytes()),
|
||||
]);
|
||||
assert_eq!(
|
||||
Value::Binary("hello".as_bytes().into()),
|
||||
arrow_array_get(&array3, 0).unwrap()
|
||||
);
|
||||
assert_eq!(Value::Null, arrow_array_get(&array3, 1).unwrap());
|
||||
|
||||
let array = TimestampSecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(value, Value::Timestamp(Timestamp::new(2, TimeUnit::Second)));
|
||||
let array = TimestampMillisecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
let vector = TimestampVector::new(Int64Array::from_vec(vec![1, 2, 3, 4]));
|
||||
let array = vector.to_boxed_arrow_array();
|
||||
let value = arrow_array_get(&*array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Millisecond))
|
||||
);
|
||||
let array = TimestampMicrosecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Microsecond))
|
||||
|
||||
let array4 = PrimitiveArray::<i64>::from_data(
|
||||
DataType::Timestamp(ArrowTimeUnit::Millisecond, None),
|
||||
Buffer::from_slice(&vec![1, 2, 3, 4]),
|
||||
None,
|
||||
);
|
||||
let array = TimestampNanosecondArray::from(vec![1, 2, 3]);
|
||||
let value = arrow_array_get(&array, 1).unwrap();
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Nanosecond))
|
||||
Value::Timestamp(Timestamp::new(1, TimeUnit::Millisecond)),
|
||||
arrow_array_get(&array4, 0).unwrap()
|
||||
);
|
||||
|
||||
let array4 = PrimitiveArray::<i64>::from_data(
|
||||
DataType::Timestamp(ArrowTimeUnit::Nanosecond, None),
|
||||
Buffer::from_slice(&vec![1, 2, 3, 4]),
|
||||
None,
|
||||
);
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1, TimeUnit::Nanosecond)),
|
||||
arrow_array_get(&array4, 0).unwrap()
|
||||
);
|
||||
|
||||
// test list array
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
Some(vec![Some(1i32), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
let arrow_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
|
||||
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i32>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ListArray<i32> = arrow_array.into();
|
||||
|
||||
let v0 = arrow_array_get(&arrow_array, 0).unwrap();
|
||||
match v0 {
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit};
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -23,14 +23,13 @@ use crate::error::{self, Error, Result};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{
|
||||
BinaryType, BooleanType, DateTimeType, DateType, Float32Type, Float64Type, Int16Type,
|
||||
Int32Type, Int64Type, Int8Type, ListType, NullType, StringType, TimestampMicrosecondType,
|
||||
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
Int32Type, Int64Type, Int8Type, ListType, NullType, StringType, TimestampType, UInt16Type,
|
||||
UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::MutableVector;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[enum_dispatch::enum_dispatch(DataType)]
|
||||
pub enum ConcreteDataType {
|
||||
Null(NullType),
|
||||
@@ -48,21 +47,17 @@ pub enum ConcreteDataType {
|
||||
Float32(Float32Type),
|
||||
Float64(Float64Type),
|
||||
|
||||
// String types:
|
||||
// String types
|
||||
Binary(BinaryType),
|
||||
String(StringType),
|
||||
|
||||
// Date types:
|
||||
Date(DateType),
|
||||
DateTime(DateTimeType),
|
||||
Timestamp(TimestampType),
|
||||
|
||||
// Compound types:
|
||||
List(ListType),
|
||||
}
|
||||
|
||||
// TODO(yingwen): Refactor these `is_xxx()` methods, such as adding a `properties()` method
|
||||
// returning all these properties to the `DataType` trait
|
||||
impl ConcreteDataType {
|
||||
pub fn is_float(&self) -> bool {
|
||||
matches!(
|
||||
@@ -75,7 +70,7 @@ impl ConcreteDataType {
|
||||
matches!(self, ConcreteDataType::Boolean(_))
|
||||
}
|
||||
|
||||
pub fn is_stringifiable(&self) -> bool {
|
||||
pub fn stringifiable(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::String(_)
|
||||
@@ -108,6 +103,13 @@ impl ConcreteDataType {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_timestamp(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
ConcreteDataType::Timestamp(_) | ConcreteDataType::Int64(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn numerics() -> Vec<ConcreteDataType> {
|
||||
vec![
|
||||
ConcreteDataType::int8_datatype(),
|
||||
@@ -159,7 +161,7 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType {
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => Self::binary_datatype(),
|
||||
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => Self::string_datatype(),
|
||||
ArrowDataType::List(field) => Self::List(ListType::new(
|
||||
ConcreteDataType::from_arrow_type(field.data_type()),
|
||||
ConcreteDataType::from_arrow_type(&field.data_type),
|
||||
)),
|
||||
_ => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
@@ -189,52 +191,38 @@ macro_rules! impl_new_concrete_type_functions {
|
||||
|
||||
impl_new_concrete_type_functions!(
|
||||
Null, Boolean, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64,
|
||||
Binary, Date, DateTime, String
|
||||
Binary, String, Date, DateTime
|
||||
);
|
||||
|
||||
impl ConcreteDataType {
|
||||
pub fn timestamp_second_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType::default()))
|
||||
}
|
||||
|
||||
pub fn timestamp_millisecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Millisecond(
|
||||
TimestampMillisecondType::default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timestamp_microsecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Microsecond(
|
||||
TimestampMicrosecondType::default(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn timestamp_nanosecond_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::Nanosecond(TimestampNanosecondType::default()))
|
||||
pub fn list_datatype(inner_type: ConcreteDataType) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(inner_type))
|
||||
}
|
||||
|
||||
pub fn timestamp_datatype(unit: TimeUnit) -> Self {
|
||||
match unit {
|
||||
TimeUnit::Second => Self::timestamp_second_datatype(),
|
||||
TimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
|
||||
TimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
|
||||
TimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
|
||||
}
|
||||
ConcreteDataType::Timestamp(TimestampType::new(unit))
|
||||
}
|
||||
|
||||
pub fn timestamp_millis_datatype() -> Self {
|
||||
ConcreteDataType::Timestamp(TimestampType::new(TimeUnit::Millisecond))
|
||||
}
|
||||
|
||||
/// Converts from arrow timestamp unit to
|
||||
pub fn from_arrow_time_unit(t: &ArrowTimeUnit) -> Self {
|
||||
// TODO(hl): maybe impl From<ArrowTimestamp> for our timestamp ?
|
||||
pub fn from_arrow_time_unit(t: &arrow::datatypes::TimeUnit) -> Self {
|
||||
match t {
|
||||
ArrowTimeUnit::Second => Self::timestamp_second_datatype(),
|
||||
ArrowTimeUnit::Millisecond => Self::timestamp_millisecond_datatype(),
|
||||
ArrowTimeUnit::Microsecond => Self::timestamp_microsecond_datatype(),
|
||||
ArrowTimeUnit::Nanosecond => Self::timestamp_nanosecond_datatype(),
|
||||
arrow::datatypes::TimeUnit::Second => Self::timestamp_datatype(TimeUnit::Second),
|
||||
arrow::datatypes::TimeUnit::Millisecond => {
|
||||
Self::timestamp_datatype(TimeUnit::Millisecond)
|
||||
}
|
||||
arrow::datatypes::TimeUnit::Microsecond => {
|
||||
Self::timestamp_datatype(TimeUnit::Microsecond)
|
||||
}
|
||||
arrow::datatypes::TimeUnit::Nanosecond => {
|
||||
Self::timestamp_datatype(TimeUnit::Nanosecond)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(item_type))
|
||||
}
|
||||
}
|
||||
|
||||
/// Data type abstraction.
|
||||
@@ -249,15 +237,11 @@ pub trait DataType: std::fmt::Debug + Send + Sync {
|
||||
/// Returns the default value of this type.
|
||||
fn default_value(&self) -> Value;
|
||||
|
||||
/// Convert this type as [arrow::datatypes::DataType].
|
||||
/// Convert this type as [arrow2::datatypes::DataType].
|
||||
fn as_arrow_type(&self) -> ArrowDataType;
|
||||
|
||||
/// Creates a mutable vector with given `capacity` of this type.
|
||||
/// Create a mutable vector with given `capacity` of this type.
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
|
||||
|
||||
/// Returns true if the data type is compatible with timestamp type so we can
|
||||
/// use it as a timestamp.
|
||||
fn is_timestamp_compatible(&self) -> bool;
|
||||
}
|
||||
|
||||
pub type DataTypeRef = Arc<dyn DataType>;
|
||||
@@ -340,6 +324,10 @@ mod tests {
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
|
||||
ConcreteDataType::String(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
|
||||
ConcreteDataType::String(_)
|
||||
));
|
||||
assert_eq!(
|
||||
ConcreteDataType::from_arrow_type(&ArrowDataType::List(Box::new(Field::new(
|
||||
"item",
|
||||
@@ -357,48 +345,31 @@ mod tests {
|
||||
#[test]
|
||||
fn test_from_arrow_timestamp() {
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Millisecond)
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Millisecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Microsecond)
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Microsecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Nanosecond)
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Nanosecond)
|
||||
);
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::from_arrow_time_unit(&ArrowTimeUnit::Second)
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Second),
|
||||
ConcreteDataType::from_arrow_time_unit(&arrow::datatypes::TimeUnit::Second)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_timestamp_compatible() {
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp_compatible());
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp_compatible()
|
||||
);
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_timestamp_compatible());
|
||||
assert!(ConcreteDataType::int64_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::null_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::binary_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::boolean_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::date_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::string_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_timestamp_compatible());
|
||||
assert!(!ConcreteDataType::uint64_datatype().is_timestamp_compatible());
|
||||
fn test_is_timestamp() {
|
||||
assert!(ConcreteDataType::timestamp_millis_datatype().is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp());
|
||||
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp());
|
||||
assert!(ConcreteDataType::int64_datatype().is_timestamp());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -406,81 +377,4 @@ mod tests {
|
||||
assert!(ConcreteDataType::null_datatype().is_null());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_float() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_float());
|
||||
assert!(ConcreteDataType::float32_datatype().is_float());
|
||||
assert!(ConcreteDataType::float64_datatype().is_float());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_boolean() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_boolean());
|
||||
assert!(!ConcreteDataType::float32_datatype().is_boolean());
|
||||
assert!(ConcreteDataType::boolean_datatype().is_boolean());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_stringifiable() {
|
||||
assert!(!ConcreteDataType::int32_datatype().is_stringifiable());
|
||||
assert!(!ConcreteDataType::float32_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::string_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::date_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_stringifiable());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_stringifiable());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_signed() {
|
||||
assert!(ConcreteDataType::int8_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int16_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int32_datatype().is_signed());
|
||||
assert!(ConcreteDataType::int64_datatype().is_signed());
|
||||
assert!(ConcreteDataType::date_datatype().is_signed());
|
||||
assert!(ConcreteDataType::datetime_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_second_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_signed());
|
||||
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_signed());
|
||||
|
||||
assert!(!ConcreteDataType::uint8_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint16_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint32_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::uint64_datatype().is_signed());
|
||||
|
||||
assert!(!ConcreteDataType::float32_datatype().is_signed());
|
||||
assert!(!ConcreteDataType::float64_datatype().is_signed());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_unsigned() {
|
||||
assert!(!ConcreteDataType::int8_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int16_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::int64_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::date_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::datetime_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_second_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_millisecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_microsecond_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::timestamp_nanosecond_datatype().is_unsigned());
|
||||
|
||||
assert!(ConcreteDataType::uint8_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint16_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint32_datatype().is_unsigned());
|
||||
assert!(ConcreteDataType::uint64_datatype().is_unsigned());
|
||||
|
||||
assert!(!ConcreteDataType::float32_datatype().is_unsigned());
|
||||
assert!(!ConcreteDataType::float64_datatype().is_unsigned());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_numerics() {
|
||||
let nums = ConcreteDataType::numerics();
|
||||
assert_eq!(10, nums.len());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,6 @@ pub mod prelude;
|
||||
mod scalars;
|
||||
pub mod schema;
|
||||
pub mod serialize;
|
||||
mod timestamp;
|
||||
pub mod type_id;
|
||||
pub mod types;
|
||||
pub mod value;
|
||||
|
||||
@@ -12,9 +12,27 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//! Some helper macros for datatypes, copied from databend.
|
||||
///! Some helper macros for datatypes, copied from databend.
|
||||
#[macro_export]
|
||||
macro_rules! for_all_scalar_types {
|
||||
($macro:tt $(, $x:tt)*) => {
|
||||
$macro! {
|
||||
[$($x),*],
|
||||
{ i8 },
|
||||
{ i16 },
|
||||
{ i32 },
|
||||
{ i64 },
|
||||
{ u8 },
|
||||
{ u16 },
|
||||
{ u32 },
|
||||
{ u64 },
|
||||
{ f32 },
|
||||
{ f64 },
|
||||
{ bool },
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Apply the macro rules to all primitive types.
|
||||
#[macro_export]
|
||||
macro_rules! for_all_primitive_types {
|
||||
($macro:tt $(, $x:tt)*) => {
|
||||
@@ -34,8 +52,6 @@ macro_rules! for_all_primitive_types {
|
||||
};
|
||||
}
|
||||
|
||||
/// Match the logical type and apply `$body` to all primitive types and
|
||||
/// `nbody` to other types.
|
||||
#[macro_export]
|
||||
macro_rules! with_match_primitive_type_id {
|
||||
($key_type:expr, | $_:tt $T:ident | $body:tt, $nbody:tt) => {{
|
||||
@@ -46,21 +62,17 @@ macro_rules! with_match_primitive_type_id {
|
||||
}
|
||||
|
||||
use $crate::type_id::LogicalTypeId;
|
||||
use $crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
|
||||
UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
match $key_type {
|
||||
LogicalTypeId::Int8 => __with_ty__! { Int8Type },
|
||||
LogicalTypeId::Int16 => __with_ty__! { Int16Type },
|
||||
LogicalTypeId::Int32 => __with_ty__! { Int32Type },
|
||||
LogicalTypeId::Int64 => __with_ty__! { Int64Type },
|
||||
LogicalTypeId::UInt8 => __with_ty__! { UInt8Type },
|
||||
LogicalTypeId::UInt16 => __with_ty__! { UInt16Type },
|
||||
LogicalTypeId::UInt32 => __with_ty__! { UInt32Type },
|
||||
LogicalTypeId::UInt64 => __with_ty__! { UInt64Type },
|
||||
LogicalTypeId::Float32 => __with_ty__! { Float32Type },
|
||||
LogicalTypeId::Float64 => __with_ty__! { Float64Type },
|
||||
LogicalTypeId::Int8 => __with_ty__! { i8 },
|
||||
LogicalTypeId::Int16 => __with_ty__! { i16 },
|
||||
LogicalTypeId::Int32 => __with_ty__! { i32 },
|
||||
LogicalTypeId::Int64 => __with_ty__! { i64 },
|
||||
LogicalTypeId::UInt8 => __with_ty__! { u8 },
|
||||
LogicalTypeId::UInt16 => __with_ty__! { u16 },
|
||||
LogicalTypeId::UInt32 => __with_ty__! { u32 },
|
||||
LogicalTypeId::UInt64 => __with_ty__! { u64 },
|
||||
LogicalTypeId::Float32 => __with_ty__! { f32 },
|
||||
LogicalTypeId::Float64 => __with_ty__! { f64 },
|
||||
|
||||
_ => $nbody,
|
||||
}
|
||||
|
||||
@@ -16,5 +16,8 @@ pub use crate::data_type::{ConcreteDataType, DataType, DataTypeRef};
|
||||
pub use crate::macros::*;
|
||||
pub use crate::scalars::{Scalar, ScalarRef, ScalarVector, ScalarVectorBuilder};
|
||||
pub use crate::type_id::LogicalTypeId;
|
||||
pub use crate::types::Primitive;
|
||||
pub use crate::value::{Value, ValueRef};
|
||||
pub use crate::vectors::{MutableVector, Validity, Vector, VectorRef};
|
||||
pub use crate::vectors::{
|
||||
Helper as VectorHelper, MutableVector, Validity, Vector, VectorBuilder, VectorRef,
|
||||
};
|
||||
|
||||
@@ -14,17 +14,11 @@
|
||||
|
||||
use std::any::Any;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
|
||||
use crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
|
||||
UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::{ListValue, ListValueRef, Value};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, ListVector, MutableVector,
|
||||
PrimitiveVector, StringVector, Vector,
|
||||
};
|
||||
use crate::prelude::*;
|
||||
use crate::value::{ListValue, ListValueRef};
|
||||
use crate::vectors::*;
|
||||
|
||||
fn get_iter_capacity<T, I: Iterator<Item = T>>(iter: &I) -> usize {
|
||||
match iter.size_hint() {
|
||||
@@ -41,7 +35,7 @@ where
|
||||
for<'a> Self::VectorType: ScalarVector<RefItem<'a> = Self::RefType<'a>>,
|
||||
{
|
||||
type VectorType: ScalarVector<OwnedItem = Self>;
|
||||
type RefType<'a>: ScalarRef<'a, ScalarType = Self>
|
||||
type RefType<'a>: ScalarRef<'a, ScalarType = Self, VectorType = Self::VectorType>
|
||||
where
|
||||
Self: 'a;
|
||||
/// Get a reference of the current value.
|
||||
@@ -52,6 +46,7 @@ where
|
||||
}
|
||||
|
||||
pub trait ScalarRef<'a>: std::fmt::Debug + Clone + Copy + Send + 'a {
|
||||
type VectorType: ScalarVector<RefItem<'a> = Self>;
|
||||
/// The corresponding [`Scalar`] type.
|
||||
type ScalarType: Scalar<RefType<'a> = Self>;
|
||||
|
||||
@@ -68,7 +63,7 @@ where
|
||||
{
|
||||
type OwnedItem: Scalar<VectorType = Self>;
|
||||
/// The reference item of this vector.
|
||||
type RefItem<'a>: ScalarRef<'a, ScalarType = Self::OwnedItem>
|
||||
type RefItem<'a>: ScalarRef<'a, ScalarType = Self::OwnedItem, VectorType = Self>
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
@@ -142,46 +137,47 @@ pub trait ScalarVectorBuilder: MutableVector {
|
||||
fn finish(&mut self) -> Self::VectorType;
|
||||
}
|
||||
|
||||
macro_rules! impl_scalar_for_native {
|
||||
($Native: ident, $DataType: ident) => {
|
||||
impl Scalar for $Native {
|
||||
type VectorType = PrimitiveVector<$DataType>;
|
||||
type RefType<'a> = $Native;
|
||||
macro_rules! impl_primitive_scalar_type {
|
||||
($native:ident) => {
|
||||
impl Scalar for $native {
|
||||
type VectorType = PrimitiveVector<$native>;
|
||||
type RefType<'a> = $native;
|
||||
|
||||
#[inline]
|
||||
fn as_scalar_ref(&self) -> $Native {
|
||||
fn as_scalar_ref(&self) -> $native {
|
||||
*self
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_lifetimes)]
|
||||
#[inline]
|
||||
fn upcast_gat<'short, 'long: 'short>(long: $Native) -> $Native {
|
||||
fn upcast_gat<'short, 'long: 'short>(long: $native) -> $native {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement [`ScalarRef`] for primitive types. Note that primitive types are both [`Scalar`] and [`ScalarRef`].
|
||||
impl<'a> ScalarRef<'a> for $Native {
|
||||
type ScalarType = $Native;
|
||||
impl<'a> ScalarRef<'a> for $native {
|
||||
type VectorType = PrimitiveVector<$native>;
|
||||
type ScalarType = $native;
|
||||
|
||||
#[inline]
|
||||
fn to_owned_scalar(&self) -> $Native {
|
||||
fn to_owned_scalar(&self) -> $native {
|
||||
*self
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_scalar_for_native!(u8, UInt8Type);
|
||||
impl_scalar_for_native!(u16, UInt16Type);
|
||||
impl_scalar_for_native!(u32, UInt32Type);
|
||||
impl_scalar_for_native!(u64, UInt64Type);
|
||||
impl_scalar_for_native!(i8, Int8Type);
|
||||
impl_scalar_for_native!(i16, Int16Type);
|
||||
impl_scalar_for_native!(i32, Int32Type);
|
||||
impl_scalar_for_native!(i64, Int64Type);
|
||||
impl_scalar_for_native!(f32, Float32Type);
|
||||
impl_scalar_for_native!(f64, Float64Type);
|
||||
impl_primitive_scalar_type!(u8);
|
||||
impl_primitive_scalar_type!(u16);
|
||||
impl_primitive_scalar_type!(u32);
|
||||
impl_primitive_scalar_type!(u64);
|
||||
impl_primitive_scalar_type!(i8);
|
||||
impl_primitive_scalar_type!(i16);
|
||||
impl_primitive_scalar_type!(i32);
|
||||
impl_primitive_scalar_type!(i64);
|
||||
impl_primitive_scalar_type!(f32);
|
||||
impl_primitive_scalar_type!(f64);
|
||||
|
||||
impl Scalar for bool {
|
||||
type VectorType = BooleanVector;
|
||||
@@ -200,6 +196,7 @@ impl Scalar for bool {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for bool {
|
||||
type VectorType = BooleanVector;
|
||||
type ScalarType = bool;
|
||||
|
||||
#[inline]
|
||||
@@ -224,6 +221,7 @@ impl Scalar for String {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for &'a str {
|
||||
type VectorType = StringVector;
|
||||
type ScalarType = String;
|
||||
|
||||
#[inline]
|
||||
@@ -248,6 +246,7 @@ impl Scalar for Vec<u8> {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for &'a [u8] {
|
||||
type VectorType = BinaryVector;
|
||||
type ScalarType = Vec<u8>;
|
||||
|
||||
#[inline]
|
||||
@@ -270,6 +269,7 @@ impl Scalar for Date {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for Date {
|
||||
type VectorType = DateVector;
|
||||
type ScalarType = Date;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
@@ -291,6 +291,7 @@ impl Scalar for DateTime {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for DateTime {
|
||||
type VectorType = DateTimeVector;
|
||||
type ScalarType = DateTime;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
@@ -298,7 +299,27 @@ impl<'a> ScalarRef<'a> for DateTime {
|
||||
}
|
||||
}
|
||||
|
||||
// Timestamp types implement Scalar and ScalarRef in `src/timestamp.rs`.
|
||||
impl Scalar for Timestamp {
|
||||
type VectorType = TimestampVector;
|
||||
type RefType<'a> = Timestamp;
|
||||
|
||||
fn as_scalar_ref(&self) -> Self::RefType<'_> {
|
||||
*self
|
||||
}
|
||||
|
||||
fn upcast_gat<'short, 'long: 'short>(long: Self::RefType<'long>) -> Self::RefType<'short> {
|
||||
long
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for Timestamp {
|
||||
type VectorType = TimestampVector;
|
||||
type ScalarType = Timestamp;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl Scalar for ListValue {
|
||||
type VectorType = ListVector;
|
||||
@@ -314,6 +335,7 @@ impl Scalar for ListValue {
|
||||
}
|
||||
|
||||
impl<'a> ScalarRef<'a> for ListValueRef<'a> {
|
||||
type VectorType = ListVector;
|
||||
type ScalarType = ListValue;
|
||||
|
||||
fn to_owned_scalar(&self) -> Self::ScalarType {
|
||||
@@ -335,9 +357,8 @@ impl<'a> ScalarRef<'a> for ListValueRef<'a> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::timestamp::TimestampSecond;
|
||||
use crate::vectors::{BinaryVector, Int32Vector, ListVectorBuilder, TimestampSecondVector};
|
||||
use crate::vectors::binary::BinaryVector;
|
||||
use crate::vectors::primitive::Int32Vector;
|
||||
|
||||
fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
|
||||
let mut builder = T::Builder::with_capacity(items.len());
|
||||
@@ -433,11 +454,11 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_build_timestamp_vector() {
|
||||
let expect: Vec<Option<TimestampSecond>> = vec![Some(10.into()), None, Some(42.into())];
|
||||
let vector: TimestampSecondVector = build_vector_from_slice(&expect);
|
||||
let expect: Vec<Option<Timestamp>> = vec![Some(10.into()), None, Some(42.into())];
|
||||
let vector: TimestampVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
let val = vector.get_data(0).unwrap();
|
||||
assert_eq!(val, val.as_scalar_ref());
|
||||
assert_eq!(TimestampSecond::from(10), val.to_owned_scalar());
|
||||
assert_eq!(10, val.to_owned_scalar().value());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,27 +12,128 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
mod column_schema;
|
||||
mod constraint;
|
||||
mod raw;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub use arrow::datatypes::Metadata;
|
||||
use arrow::datatypes::{Field, Schema as ArrowSchema};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::{ensure, ResultExt};
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::error::{self, Error, Result};
|
||||
pub use crate::schema::column_schema::{ColumnSchema, Metadata};
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, DeserializeSnafu, Error, Result, SerializeSnafu};
|
||||
pub use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
pub use crate::schema::raw::RawSchema;
|
||||
use crate::vectors::VectorRef;
|
||||
|
||||
/// Key used to store whether the column is time index in arrow field's metadata.
|
||||
const TIME_INDEX_KEY: &str = "greptime:time_index";
|
||||
/// Key used to store version number of the schema in metadata.
|
||||
const VERSION_KEY: &str = "greptime:version";
|
||||
/// Key used to store default constraint in arrow field's metadata.
|
||||
const ARROW_FIELD_DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
|
||||
|
||||
/// Schema of a column, used as an immutable struct.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ColumnSchema {
|
||||
pub name: String,
|
||||
pub data_type: ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
is_time_index: bool,
|
||||
default_constraint: Option<ColumnDefaultConstraint>,
|
||||
metadata: Metadata,
|
||||
}
|
||||
|
||||
impl ColumnSchema {
|
||||
pub fn new<T: Into<String>>(
|
||||
name: T,
|
||||
data_type: ConcreteDataType,
|
||||
is_nullable: bool,
|
||||
) -> ColumnSchema {
|
||||
ColumnSchema {
|
||||
name: name.into(),
|
||||
data_type,
|
||||
is_nullable,
|
||||
is_time_index: false,
|
||||
default_constraint: None,
|
||||
metadata: Metadata::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_time_index(&self) -> bool {
|
||||
self.is_time_index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_nullable(&self) -> bool {
|
||||
self.is_nullable
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn default_constraint(&self) -> Option<&ColumnDefaultConstraint> {
|
||||
self.default_constraint.as_ref()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn metadata(&self) -> &Metadata {
|
||||
&self.metadata
|
||||
}
|
||||
|
||||
pub fn with_time_index(mut self, is_time_index: bool) -> Self {
|
||||
self.is_time_index = is_time_index;
|
||||
if is_time_index {
|
||||
self.metadata
|
||||
.insert(TIME_INDEX_KEY.to_string(), "true".to_string());
|
||||
} else {
|
||||
self.metadata.remove(TIME_INDEX_KEY);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_default_constraint(
|
||||
mut self,
|
||||
default_constraint: Option<ColumnDefaultConstraint>,
|
||||
) -> Result<Self> {
|
||||
if let Some(constraint) = &default_constraint {
|
||||
constraint.validate(&self.data_type, self.is_nullable)?;
|
||||
}
|
||||
|
||||
self.default_constraint = default_constraint;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Creates a new [`ColumnSchema`] with given metadata.
|
||||
pub fn with_metadata(mut self, metadata: Metadata) -> Self {
|
||||
self.metadata = metadata;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn create_default_vector(&self, num_rows: usize) -> Result<Option<VectorRef>> {
|
||||
match &self.default_constraint {
|
||||
Some(c) => c
|
||||
.create_default_vector(&self.data_type, self.is_nullable, num_rows)
|
||||
.map(Some),
|
||||
None => {
|
||||
if self.is_nullable {
|
||||
// No default constraint, use null as default value.
|
||||
// TODO(yingwen): Use NullVector once it supports setting logical type.
|
||||
ColumnDefaultConstraint::null_value()
|
||||
.create_default_vector(&self.data_type, self.is_nullable, num_rows)
|
||||
.map(Some)
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A common schema, should be immutable.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Schema {
|
||||
column_schemas: Vec<ColumnSchema>,
|
||||
name_to_index: HashMap<String, usize>,
|
||||
@@ -130,7 +231,7 @@ impl Schema {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn metadata(&self) -> &HashMap<String, String> {
|
||||
pub fn metadata(&self) -> &Metadata {
|
||||
&self.arrow_schema.metadata
|
||||
}
|
||||
}
|
||||
@@ -142,7 +243,7 @@ pub struct SchemaBuilder {
|
||||
fields: Vec<Field>,
|
||||
timestamp_index: Option<usize>,
|
||||
version: u32,
|
||||
metadata: HashMap<String, String>,
|
||||
metadata: Metadata,
|
||||
}
|
||||
|
||||
impl TryFrom<Vec<ColumnSchema>> for SchemaBuilder {
|
||||
@@ -191,7 +292,7 @@ impl SchemaBuilder {
|
||||
self.metadata
|
||||
.insert(VERSION_KEY.to_string(), self.version.to_string());
|
||||
|
||||
let arrow_schema = ArrowSchema::new(self.fields).with_metadata(self.metadata);
|
||||
let arrow_schema = ArrowSchema::from(self.fields).with_metadata(self.metadata);
|
||||
|
||||
Ok(Schema {
|
||||
column_schemas: self.column_schemas,
|
||||
@@ -246,7 +347,7 @@ fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: us
|
||||
|
||||
let column_schema = &column_schemas[timestamp_index];
|
||||
ensure!(
|
||||
column_schema.data_type.is_timestamp_compatible(),
|
||||
column_schema.data_type.is_timestamp(),
|
||||
error::InvalidTimestampIndexSnafu {
|
||||
index: timestamp_index,
|
||||
}
|
||||
@@ -263,6 +364,58 @@ fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: us
|
||||
|
||||
pub type SchemaRef = Arc<Schema>;
|
||||
|
||||
impl TryFrom<&Field> for ColumnSchema {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(field: &Field) -> Result<ColumnSchema> {
|
||||
let data_type = ConcreteDataType::try_from(&field.data_type)?;
|
||||
let mut metadata = field.metadata.clone();
|
||||
let default_constraint = match metadata.remove(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY) {
|
||||
Some(json) => Some(serde_json::from_str(&json).context(DeserializeSnafu { json })?),
|
||||
None => None,
|
||||
};
|
||||
let is_time_index = metadata.contains_key(TIME_INDEX_KEY);
|
||||
|
||||
Ok(ColumnSchema {
|
||||
name: field.name.clone(),
|
||||
data_type,
|
||||
is_nullable: field.is_nullable,
|
||||
is_time_index,
|
||||
default_constraint,
|
||||
metadata,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&ColumnSchema> for Field {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(column_schema: &ColumnSchema) -> Result<Field> {
|
||||
let mut metadata = column_schema.metadata.clone();
|
||||
if let Some(value) = &column_schema.default_constraint {
|
||||
// Adds an additional metadata to store the default constraint.
|
||||
let old = metadata.insert(
|
||||
ARROW_FIELD_DEFAULT_CONSTRAINT_KEY.to_string(),
|
||||
serde_json::to_string(&value).context(SerializeSnafu)?,
|
||||
);
|
||||
|
||||
ensure!(
|
||||
old.is_none(),
|
||||
error::DuplicateMetaSnafu {
|
||||
key: ARROW_FIELD_DEFAULT_CONSTRAINT_KEY,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
Ok(Field::new(
|
||||
column_schema.name.clone(),
|
||||
column_schema.data_type.as_arrow_type(),
|
||||
column_schema.is_nullable(),
|
||||
)
|
||||
.with_metadata(metadata))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Arc<ArrowSchema>> for Schema {
|
||||
type Error = Error;
|
||||
|
||||
@@ -271,7 +424,7 @@ impl TryFrom<Arc<ArrowSchema>> for Schema {
|
||||
let mut name_to_index = HashMap::with_capacity(arrow_schema.fields.len());
|
||||
for field in &arrow_schema.fields {
|
||||
let column_schema = ColumnSchema::try_from(field)?;
|
||||
name_to_index.insert(field.name().to_string(), column_schemas.len());
|
||||
name_to_index.insert(field.name.clone(), column_schemas.len());
|
||||
column_schemas.push(column_schema);
|
||||
}
|
||||
|
||||
@@ -312,7 +465,7 @@ impl TryFrom<ArrowSchema> for Schema {
|
||||
}
|
||||
}
|
||||
|
||||
fn try_parse_version(metadata: &HashMap<String, String>, key: &str) -> Result<u32> {
|
||||
fn try_parse_version(metadata: &Metadata, key: &str) -> Result<u32> {
|
||||
if let Some(value) = metadata.get(key) {
|
||||
let version = value
|
||||
.parse()
|
||||
@@ -326,8 +479,127 @@ fn try_parse_version(metadata: &HashMap<String, String>, key: &str) -> Result<u3
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::value::Value;
|
||||
|
||||
#[test]
|
||||
fn test_column_schema() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("test", field.name);
|
||||
assert_eq!(ArrowDataType::Int32, field.data_type);
|
||||
assert!(field.is_nullable);
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_default_constraint() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::Value(Value::from(99))))
|
||||
.unwrap();
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.is_none());
|
||||
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("test", field.name);
|
||||
assert_eq!(ArrowDataType::Int32, field.data_type);
|
||||
assert!(field.is_nullable);
|
||||
assert_eq!(
|
||||
"{\"Value\":{\"Int32\":99}}",
|
||||
field
|
||||
.metadata
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_metadata() {
|
||||
let mut metadata = Metadata::new();
|
||||
metadata.insert("k1".to_string(), "v1".to_string());
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
assert_eq!("v1", column_schema.metadata().get("k1").unwrap());
|
||||
assert!(column_schema
|
||||
.metadata()
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.is_none());
|
||||
|
||||
let field = Field::try_from(&column_schema).unwrap();
|
||||
assert_eq!("v1", field.metadata.get("k1").unwrap());
|
||||
assert!(field
|
||||
.metadata
|
||||
.get(ARROW_FIELD_DEFAULT_CONSTRAINT_KEY)
|
||||
.is_some());
|
||||
|
||||
let new_column_schema = ColumnSchema::try_from(&field).unwrap();
|
||||
assert_eq!(column_schema, new_column_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_with_duplicate_metadata() {
|
||||
let mut metadata = Metadata::new();
|
||||
metadata.insert(
|
||||
ARROW_FIELD_DEFAULT_CONSTRAINT_KEY.to_string(),
|
||||
"v1".to_string(),
|
||||
);
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_metadata(metadata)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
Field::try_from(&column_schema).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_invalid_default_constraint() {
|
||||
ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_default_constraint_try_into_from() {
|
||||
let default_constraint = ColumnDefaultConstraint::Value(Value::from(42i64));
|
||||
|
||||
let bytes: Vec<u8> = default_constraint.clone().try_into().unwrap();
|
||||
let from_value = ColumnDefaultConstraint::try_from(&bytes[..]).unwrap();
|
||||
|
||||
assert_eq!(default_constraint, from_value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_create_default_null() {
|
||||
// Implicit default null.
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true);
|
||||
let v = column_schema.create_default_vector(5).unwrap().unwrap();
|
||||
assert_eq!(5, v.len());
|
||||
assert!(v.only_null());
|
||||
|
||||
// Explicit default null.
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), true)
|
||||
.with_default_constraint(Some(ColumnDefaultConstraint::null_value()))
|
||||
.unwrap();
|
||||
let v = column_schema.create_default_vector(5).unwrap().unwrap();
|
||||
assert_eq!(5, v.len());
|
||||
assert!(v.only_null());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_column_schema_no_default() {
|
||||
let column_schema = ColumnSchema::new("test", ConcreteDataType::int32_datatype(), false);
|
||||
assert!(column_schema.create_default_vector(5).unwrap().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_empty_schema() {
|
||||
@@ -382,12 +654,8 @@ mod tests {
|
||||
fn test_schema_with_timestamp() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas.clone())
|
||||
.unwrap()
|
||||
|
||||
@@ -22,7 +22,7 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{Int64Vector, TimestampMillisecondVector, VectorRef};
|
||||
use crate::vectors::{Int64Vector, TimestampVector, VectorRef};
|
||||
|
||||
const CURRENT_TIMESTAMP: &str = "current_timestamp()";
|
||||
|
||||
@@ -81,7 +81,7 @@ impl ColumnDefaultConstraint {
|
||||
error::UnsupportedDefaultExprSnafu { expr }
|
||||
);
|
||||
ensure!(
|
||||
data_type.is_timestamp_compatible(),
|
||||
data_type.is_timestamp(),
|
||||
error::DefaultValueTypeSnafu {
|
||||
reason: "return value of the function must has timestamp type",
|
||||
}
|
||||
@@ -162,10 +162,8 @@ fn create_current_timestamp_vector(
|
||||
data_type: &ConcreteDataType,
|
||||
num_rows: usize,
|
||||
) -> Result<VectorRef> {
|
||||
// FIXME(yingwen): We should implements cast in VectorOp so we could cast the millisecond vector
|
||||
// to other data type and avoid this match.
|
||||
match data_type {
|
||||
ConcreteDataType::Timestamp(_) => Ok(Arc::new(TimestampMillisecondVector::from_values(
|
||||
ConcreteDataType::Timestamp(_) => Ok(Arc::new(TimestampVector::from_values(
|
||||
std::iter::repeat(util::current_time_millis()).take(num_rows),
|
||||
))),
|
||||
ConcreteDataType::Int64(_) => Ok(Arc::new(Int64Vector::from_values(
|
||||
@@ -219,7 +217,7 @@ mod tests {
|
||||
fn test_validate_function_constraint() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
constraint
|
||||
.validate(&ConcreteDataType::timestamp_millisecond_datatype(), false)
|
||||
.validate(&ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.unwrap();
|
||||
constraint
|
||||
.validate(&ConcreteDataType::boolean_datatype(), false)
|
||||
@@ -227,7 +225,7 @@ mod tests {
|
||||
|
||||
let constraint = ColumnDefaultConstraint::Function("hello()".to_string());
|
||||
constraint
|
||||
.validate(&ConcreteDataType::timestamp_millisecond_datatype(), false)
|
||||
.validate(&ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.unwrap_err();
|
||||
}
|
||||
|
||||
@@ -264,7 +262,7 @@ mod tests {
|
||||
fn test_create_default_vector_by_func() {
|
||||
let constraint = ColumnDefaultConstraint::Function(CURRENT_TIMESTAMP.to_string());
|
||||
// Timestamp type.
|
||||
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
|
||||
let data_type = ConcreteDataType::timestamp_millis_datatype();
|
||||
let v = constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap();
|
||||
@@ -288,7 +286,7 @@ mod tests {
|
||||
);
|
||||
|
||||
let constraint = ColumnDefaultConstraint::Function("no".to_string());
|
||||
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
|
||||
let data_type = ConcreteDataType::timestamp_millis_datatype();
|
||||
constraint
|
||||
.create_default_vector(&data_type, false, 4)
|
||||
.unwrap_err();
|
||||
|
||||
@@ -20,7 +20,7 @@ use crate::schema::{ColumnSchema, Schema, SchemaBuilder};
|
||||
/// Struct used to serialize and deserialize [`Schema`](crate::schema::Schema).
|
||||
///
|
||||
/// This struct only contains necessary data to recover the Schema.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RawSchema {
|
||||
pub column_schemas: Vec<ColumnSchema>,
|
||||
pub timestamp_index: Option<usize>,
|
||||
@@ -56,12 +56,8 @@ mod tests {
|
||||
fn test_raw_convert() {
|
||||
let column_schemas = vec![
|
||||
ColumnSchema::new("col1", ConcreteDataType::int32_datatype(), true),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_time_index(true),
|
||||
ColumnSchema::new("ts", ConcreteDataType::timestamp_millis_datatype(), false)
|
||||
.with_time_index(true),
|
||||
];
|
||||
let schema = SchemaBuilder::try_from(column_schemas)
|
||||
.unwrap()
|
||||
|
||||
@@ -42,10 +42,7 @@ pub enum LogicalTypeId {
|
||||
/// seconds/milliseconds/microseconds/nanoseconds, determined by precision.
|
||||
DateTime,
|
||||
|
||||
TimestampSecond,
|
||||
TimestampMillisecond,
|
||||
TimestampMicrosecond,
|
||||
TimestampNanosecond,
|
||||
Timestamp,
|
||||
|
||||
List,
|
||||
}
|
||||
@@ -77,14 +74,7 @@ impl LogicalTypeId {
|
||||
LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
|
||||
LogicalTypeId::Date => ConcreteDataType::date_datatype(),
|
||||
LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
|
||||
LogicalTypeId::TimestampSecond => ConcreteDataType::timestamp_second_datatype(),
|
||||
LogicalTypeId::TimestampMillisecond => {
|
||||
ConcreteDataType::timestamp_millisecond_datatype()
|
||||
}
|
||||
LogicalTypeId::TimestampMicrosecond => {
|
||||
ConcreteDataType::timestamp_microsecond_datatype()
|
||||
}
|
||||
LogicalTypeId::TimestampNanosecond => ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
LogicalTypeId::Timestamp => ConcreteDataType::timestamp_millis_datatype(), // to timestamp type with default time unit
|
||||
LogicalTypeId::List => {
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
|
||||
}
|
||||
|
||||
@@ -14,24 +14,25 @@
|
||||
|
||||
mod binary_type;
|
||||
mod boolean_type;
|
||||
mod date_type;
|
||||
mod datetime_type;
|
||||
mod date;
|
||||
mod datetime;
|
||||
mod list_type;
|
||||
mod null_type;
|
||||
mod primitive_traits;
|
||||
mod primitive_type;
|
||||
mod string_type;
|
||||
|
||||
mod timestamp_type;
|
||||
mod timestamp;
|
||||
|
||||
pub use binary_type::BinaryType;
|
||||
pub use boolean_type::BooleanType;
|
||||
pub use date_type::DateType;
|
||||
pub use datetime_type::DateTimeType;
|
||||
pub use date::DateType;
|
||||
pub use datetime::DateTimeType;
|
||||
pub use list_type::ListType;
|
||||
pub use null_type::NullType;
|
||||
pub use primitive_traits::{OrdPrimitive, Primitive};
|
||||
pub use primitive_type::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LogicalPrimitiveType,
|
||||
NativeType, UInt16Type, UInt32Type, UInt64Type, UInt8Type, WrapperType,
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, PrimitiveElement,
|
||||
PrimitiveType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
pub use string_type::StringType;
|
||||
pub use timestamp_type::*;
|
||||
pub use timestamp::TimestampType;
|
||||
|
||||
@@ -53,8 +53,4 @@ impl DataType for BinaryType {
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,8 +52,4 @@ impl DataType for BooleanType {
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BooleanVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,17 +15,15 @@
|
||||
use arrow::datatypes::{DataType as ArrowDataType, Field};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::{ListValue, Value};
|
||||
use crate::prelude::*;
|
||||
use crate::value::ListValue;
|
||||
use crate::vectors::{ListVectorBuilder, MutableVector};
|
||||
|
||||
/// Used to represent the List datatype.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ListType {
|
||||
/// The type of List's item.
|
||||
// Use Box to avoid recursive dependency, as enum ConcreteDataType depends on ListType.
|
||||
item_type: Box<ConcreteDataType>,
|
||||
/// The type of List's inner data.
|
||||
inner: Box<ConcreteDataType>,
|
||||
}
|
||||
|
||||
impl Default for ListType {
|
||||
@@ -35,10 +33,9 @@ impl Default for ListType {
|
||||
}
|
||||
|
||||
impl ListType {
|
||||
/// Create a new `ListType` whose item's data type is `item_type`.
|
||||
pub fn new(item_type: ConcreteDataType) -> Self {
|
||||
pub fn new(datatype: ConcreteDataType) -> Self {
|
||||
ListType {
|
||||
item_type: Box::new(item_type),
|
||||
inner: Box::new(datatype),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -53,24 +50,20 @@ impl DataType for ListType {
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::List(ListValue::new(None, *self.item_type.clone()))
|
||||
Value::List(ListValue::new(None, *self.inner.clone()))
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
let field = Box::new(Field::new("item", self.item_type.as_arrow_type(), true));
|
||||
let field = Box::new(Field::new("item", self.inner.as_arrow_type(), true));
|
||||
ArrowDataType::List(field)
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(ListVectorBuilder::with_type_capacity(
|
||||
*self.item_type.clone(),
|
||||
*self.inner.clone(),
|
||||
capacity,
|
||||
))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -27,7 +27,7 @@ pub struct NullType;
|
||||
|
||||
impl NullType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(NullType)
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,8 +51,4 @@ impl DataType for NullType {
|
||||
fn create_mutable_vector(&self, _capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(NullVectorBuilder::default())
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,11 +12,12 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::cmp::Ordering;
|
||||
use std::any::TypeId;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use arrow::datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType as ArrowDataType};
|
||||
use common_time::{Date, DateTime};
|
||||
use num::NumCast;
|
||||
use arrow::array::PrimitiveArray;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
@@ -24,226 +25,92 @@ use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{Scalar, ScalarRef, ScalarVectorBuilder};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{DateTimeType, DateType};
|
||||
use crate::types::primitive_traits::Primitive;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{MutableVector, PrimitiveVector, PrimitiveVectorBuilder, Vector};
|
||||
|
||||
/// Data types that can be used as arrow's native type.
|
||||
pub trait NativeType: ArrowNativeType + NumCast {
|
||||
/// Largest numeric type this primitive type can be cast to.
|
||||
type LargestType: NativeType;
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct PrimitiveType<T: Primitive> {
|
||||
#[serde(skip)]
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
macro_rules! impl_native_type {
|
||||
($Type: ident, $LargestType: ident) => {
|
||||
impl NativeType for $Type {
|
||||
type LargestType = $LargestType;
|
||||
}
|
||||
};
|
||||
impl<T: Primitive, U: Primitive> PartialEq<PrimitiveType<U>> for PrimitiveType<T> {
|
||||
fn eq(&self, _other: &PrimitiveType<U>) -> bool {
|
||||
TypeId::of::<T>() == TypeId::of::<U>()
|
||||
}
|
||||
}
|
||||
|
||||
impl_native_type!(u8, u64);
|
||||
impl_native_type!(u16, u64);
|
||||
impl_native_type!(u32, u64);
|
||||
impl_native_type!(u64, u64);
|
||||
impl_native_type!(i8, i64);
|
||||
impl_native_type!(i16, i64);
|
||||
impl_native_type!(i32, i64);
|
||||
impl_native_type!(i64, i64);
|
||||
impl_native_type!(f32, f64);
|
||||
impl_native_type!(f64, f64);
|
||||
impl<T: Primitive> Eq for PrimitiveType<T> {}
|
||||
|
||||
/// Represents the wrapper type that wraps a native type using the `newtype pattern`,
|
||||
/// such as [Date](`common_time::Date`) is a wrapper type for the underlying native
|
||||
/// type `i32`.
|
||||
pub trait WrapperType:
|
||||
Copy
|
||||
+ Scalar
|
||||
+ PartialEq
|
||||
+ Into<Value>
|
||||
+ Into<ValueRef<'static>>
|
||||
+ Serialize
|
||||
+ Into<serde_json::Value>
|
||||
/// A trait that provide helper methods for a primitive type to implementing the [PrimitiveVector].
|
||||
pub trait PrimitiveElement
|
||||
where
|
||||
for<'a> Self: Primitive
|
||||
+ Scalar<VectorType = PrimitiveVector<Self>>
|
||||
+ ScalarRef<'a, ScalarType = Self, VectorType = PrimitiveVector<Self>>
|
||||
+ Scalar<RefType<'a> = Self>,
|
||||
{
|
||||
/// Logical primitive type that this wrapper type belongs to.
|
||||
type LogicalType: LogicalPrimitiveType<Wrapper = Self, Native = Self::Native>;
|
||||
/// The underlying native type.
|
||||
type Native: NativeType;
|
||||
|
||||
/// Convert native type into this wrapper type.
|
||||
fn from_native(value: Self::Native) -> Self;
|
||||
|
||||
/// Convert this wrapper type into native type.
|
||||
fn into_native(self) -> Self::Native;
|
||||
}
|
||||
|
||||
/// Trait bridging the logical primitive type with [ArrowPrimitiveType].
|
||||
pub trait LogicalPrimitiveType: 'static + Sized {
|
||||
/// Arrow primitive type of this logical type.
|
||||
type ArrowPrimitive: ArrowPrimitiveType<Native = Self::Native>;
|
||||
/// Native (physical) type of this logical type.
|
||||
type Native: NativeType;
|
||||
/// Wrapper type that the vector returns.
|
||||
type Wrapper: WrapperType<LogicalType = Self, Native = Self::Native>
|
||||
+ for<'a> Scalar<VectorType = PrimitiveVector<Self>, RefType<'a> = Self::Wrapper>
|
||||
+ for<'a> ScalarRef<'a, ScalarType = Self::Wrapper>;
|
||||
|
||||
/// Construct the data type struct.
|
||||
fn build_data_type() -> ConcreteDataType;
|
||||
|
||||
/// Return the name of the type.
|
||||
fn type_name() -> &'static str;
|
||||
/// Returns the name of the type id.
|
||||
fn type_name() -> String;
|
||||
|
||||
/// Dynamic cast the vector to the concrete vector type.
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveVector<Self>>;
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveArray<Self>>;
|
||||
|
||||
/// Cast value ref to the primitive type.
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self::Wrapper>>;
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self>>;
|
||||
}
|
||||
|
||||
/// A new type for [WrapperType], complement the `Ord` feature for it. Wrapping non ordered
|
||||
/// primitive types like `f32` and `f64` in `OrdPrimitive` can make them be used in places that
|
||||
/// require `Ord`. For example, in `Median` or `Percentile` UDAFs.
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct OrdPrimitive<T: WrapperType>(pub T);
|
||||
macro_rules! impl_primitive_element {
|
||||
($Type:ident, $TypeId:ident) => {
|
||||
paste::paste! {
|
||||
impl PrimitiveElement for $Type {
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::$TypeId(PrimitiveType::<$Type>::default())
|
||||
}
|
||||
|
||||
impl<T: WrapperType> OrdPrimitive<T> {
|
||||
pub fn as_primitive(&self) -> T {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
fn type_name() -> String {
|
||||
stringify!($TypeId).to_string()
|
||||
}
|
||||
|
||||
impl<T: WrapperType> Eq for OrdPrimitive<T> {}
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveArray<$Type>> {
|
||||
let primitive_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveVector<$Type>>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast {} to vector of primitive type {}",
|
||||
vector.vector_type_name(),
|
||||
stringify!($TypeId)
|
||||
),
|
||||
})?;
|
||||
Ok(&primitive_vector.array)
|
||||
}
|
||||
|
||||
impl<T: WrapperType> PartialOrd for OrdPrimitive<T> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: WrapperType> Ord for OrdPrimitive<T> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
Into::<Value>::into(self.0).cmp(&Into::<Value>::into(other.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: WrapperType> From<OrdPrimitive<T>> for Value {
|
||||
fn from(p: OrdPrimitive<T>) -> Self {
|
||||
p.0.into()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_wrapper {
|
||||
($Type: ident, $LogicalType: ident) => {
|
||||
impl WrapperType for $Type {
|
||||
type LogicalType = $LogicalType;
|
||||
type Native = $Type;
|
||||
|
||||
fn from_native(value: Self::Native) -> Self {
|
||||
value
|
||||
}
|
||||
|
||||
fn into_native(self) -> Self::Native {
|
||||
self
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_wrapper!(u8, UInt8Type);
|
||||
impl_wrapper!(u16, UInt16Type);
|
||||
impl_wrapper!(u32, UInt32Type);
|
||||
impl_wrapper!(u64, UInt64Type);
|
||||
impl_wrapper!(i8, Int8Type);
|
||||
impl_wrapper!(i16, Int16Type);
|
||||
impl_wrapper!(i32, Int32Type);
|
||||
impl_wrapper!(i64, Int64Type);
|
||||
impl_wrapper!(f32, Float32Type);
|
||||
impl_wrapper!(f64, Float64Type);
|
||||
|
||||
impl WrapperType for Date {
|
||||
type LogicalType = DateType;
|
||||
type Native = i32;
|
||||
|
||||
fn from_native(value: i32) -> Self {
|
||||
Date::new(value)
|
||||
}
|
||||
|
||||
fn into_native(self) -> i32 {
|
||||
self.val()
|
||||
}
|
||||
}
|
||||
|
||||
impl WrapperType for DateTime {
|
||||
type LogicalType = DateTimeType;
|
||||
type Native = i64;
|
||||
|
||||
fn from_native(value: Self::Native) -> Self {
|
||||
DateTime::new(value)
|
||||
}
|
||||
|
||||
fn into_native(self) -> Self::Native {
|
||||
self.val()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! define_logical_primitive_type {
|
||||
($Native: ident, $TypeId: ident, $DataType: ident) => {
|
||||
// We need to define it as an empty struct `struct DataType {}` instead of a struct-unit
|
||||
// `struct DataType;` to ensure the serialized JSON string is compatible with previous
|
||||
// implementation.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct $DataType {}
|
||||
|
||||
impl LogicalPrimitiveType for $DataType {
|
||||
type ArrowPrimitive = arrow::datatypes::$DataType;
|
||||
type Native = $Native;
|
||||
type Wrapper = $Native;
|
||||
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::$TypeId($DataType::default())
|
||||
}
|
||||
|
||||
fn type_name() -> &'static str {
|
||||
stringify!($TypeId)
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveVector<$DataType>> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveVector<$DataType>>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast {} to vector of primitive type {}",
|
||||
vector.vector_type_name(),
|
||||
stringify!($TypeId)
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<$Native>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::$TypeId(v) => Ok(Some(v.into())),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast value {:?} to primitive type {}",
|
||||
other,
|
||||
stringify!($TypeId),
|
||||
),
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::$TypeId(v) => Ok(Some(v.into())),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast value {:?} to primitive type {}",
|
||||
other,
|
||||
stringify!($TypeId),
|
||||
),
|
||||
}.fail(),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! define_non_timestamp_primitive {
|
||||
($Native: ident, $TypeId: ident, $DataType: ident) => {
|
||||
define_logical_primitive_type!($Native, $TypeId, $DataType);
|
||||
|
||||
impl DataType for $DataType {
|
||||
macro_rules! impl_numeric {
|
||||
($Type:ident, $TypeId:ident) => {
|
||||
impl DataType for PrimitiveType<$Type> {
|
||||
fn name(&self) -> &str {
|
||||
stringify!($TypeId)
|
||||
}
|
||||
@@ -253,7 +120,7 @@ macro_rules! define_non_timestamp_primitive {
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
$Native::default().into()
|
||||
$Type::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
@@ -261,98 +128,61 @@ macro_rules! define_non_timestamp_primitive {
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(PrimitiveVectorBuilder::<$DataType>::with_capacity(capacity))
|
||||
Box::new(PrimitiveVectorBuilder::<$Type>::with_capacity(capacity))
|
||||
}
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
impl std::fmt::Debug for PrimitiveType<$Type> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", self.name())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PrimitiveType<$Type> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_primitive_element!($Type, $TypeId);
|
||||
|
||||
paste! {
|
||||
pub type [<$TypeId Type>]=PrimitiveType<$Type>;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
define_non_timestamp_primitive!(u8, UInt8, UInt8Type);
|
||||
define_non_timestamp_primitive!(u16, UInt16, UInt16Type);
|
||||
define_non_timestamp_primitive!(u32, UInt32, UInt32Type);
|
||||
define_non_timestamp_primitive!(u64, UInt64, UInt64Type);
|
||||
define_non_timestamp_primitive!(i8, Int8, Int8Type);
|
||||
define_non_timestamp_primitive!(i16, Int16, Int16Type);
|
||||
define_non_timestamp_primitive!(i32, Int32, Int32Type);
|
||||
define_non_timestamp_primitive!(f32, Float32, Float32Type);
|
||||
define_non_timestamp_primitive!(f64, Float64, Float64Type);
|
||||
|
||||
// Timestamp primitive:
|
||||
define_logical_primitive_type!(i64, Int64, Int64Type);
|
||||
|
||||
impl DataType for Int64Type {
|
||||
fn name(&self) -> &str {
|
||||
"Int64"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Int64
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::Int64(0)
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Int64
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(PrimitiveVectorBuilder::<Int64Type>::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
impl_numeric!(u8, UInt8);
|
||||
impl_numeric!(u16, UInt16);
|
||||
impl_numeric!(u32, UInt32);
|
||||
impl_numeric!(u64, UInt64);
|
||||
impl_numeric!(i8, Int8);
|
||||
impl_numeric!(i16, Int16);
|
||||
impl_numeric!(i32, Int32);
|
||||
impl_numeric!(i64, Int64);
|
||||
impl_numeric!(f32, Float32);
|
||||
impl_numeric!(f64, Float64);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::BinaryHeap;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_ord_primitive() {
|
||||
struct Foo<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
{
|
||||
heap: BinaryHeap<OrdPrimitive<T>>,
|
||||
}
|
||||
fn test_eq() {
|
||||
assert_eq!(UInt8Type::default(), UInt8Type::default());
|
||||
assert_eq!(UInt16Type::default(), UInt16Type::default());
|
||||
assert_eq!(UInt32Type::default(), UInt32Type::default());
|
||||
assert_eq!(UInt64Type::default(), UInt64Type::default());
|
||||
assert_eq!(Int8Type::default(), Int8Type::default());
|
||||
assert_eq!(Int16Type::default(), Int16Type::default());
|
||||
assert_eq!(Int32Type::default(), Int32Type::default());
|
||||
assert_eq!(Int64Type::default(), Int64Type::default());
|
||||
assert_eq!(Float32Type::default(), Float32Type::default());
|
||||
assert_eq!(Float64Type::default(), Float64Type::default());
|
||||
|
||||
impl<T> Foo<T>
|
||||
where
|
||||
T: WrapperType,
|
||||
{
|
||||
fn push(&mut self, value: T) {
|
||||
let value = OrdPrimitive::<T>(value);
|
||||
self.heap.push(value);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! test {
|
||||
($Type:ident) => {
|
||||
let mut foo = Foo::<$Type> {
|
||||
heap: BinaryHeap::new(),
|
||||
};
|
||||
foo.push($Type::default());
|
||||
};
|
||||
}
|
||||
|
||||
test!(u8);
|
||||
test!(u16);
|
||||
test!(u32);
|
||||
test!(u64);
|
||||
test!(i8);
|
||||
test!(i16);
|
||||
test!(i32);
|
||||
test!(i64);
|
||||
test!(f32);
|
||||
test!(f64);
|
||||
assert_ne!(Float32Type::default(), Float64Type::default());
|
||||
assert_ne!(Float32Type::default(), Int32Type::default());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,10 +18,9 @@ use arrow::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::StringBytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::prelude::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{DataTypeRef, LogicalTypeId, Value};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::vectors::{MutableVector, StringVectorBuilder};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
@@ -53,8 +52,4 @@ impl DataType for StringType {
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(StringVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
|
||||
fn is_timestamp_compatible(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,7 +110,6 @@ impl Value {
|
||||
/// # Panics
|
||||
/// Panics if the data type is not supported.
|
||||
pub fn data_type(&self) -> ConcreteDataType {
|
||||
// TODO(yingwen): Implement this once all data types are implemented.
|
||||
match self {
|
||||
Value::Null => ConcreteDataType::null_datatype(),
|
||||
Value::Boolean(_) => ConcreteDataType::boolean_datatype(),
|
||||
@@ -126,10 +125,10 @@ impl Value {
|
||||
Value::Float64(_) => ConcreteDataType::float64_datatype(),
|
||||
Value::String(_) => ConcreteDataType::string_datatype(),
|
||||
Value::Binary(_) => ConcreteDataType::binary_datatype(),
|
||||
Value::List(list) => ConcreteDataType::list_datatype(list.datatype().clone()),
|
||||
Value::Date(_) => ConcreteDataType::date_datatype(),
|
||||
Value::DateTime(_) => ConcreteDataType::datetime_datatype(),
|
||||
Value::Timestamp(v) => ConcreteDataType::timestamp_datatype(v.unit()),
|
||||
Value::List(list) => ConcreteDataType::list_datatype(list.datatype().clone()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -194,12 +193,7 @@ impl Value {
|
||||
Value::List(_) => LogicalTypeId::List,
|
||||
Value::Date(_) => LogicalTypeId::Date,
|
||||
Value::DateTime(_) => LogicalTypeId::DateTime,
|
||||
Value::Timestamp(t) => match t.unit() {
|
||||
TimeUnit::Second => LogicalTypeId::TimestampSecond,
|
||||
TimeUnit::Millisecond => LogicalTypeId::TimestampMillisecond,
|
||||
TimeUnit::Microsecond => LogicalTypeId::TimestampMicrosecond,
|
||||
TimeUnit::Nanosecond => LogicalTypeId::TimestampNanosecond,
|
||||
},
|
||||
Value::Timestamp(_) => LogicalTypeId::Timestamp,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -283,9 +277,6 @@ impl_value_from!(Float32, f32);
|
||||
impl_value_from!(Float64, f64);
|
||||
impl_value_from!(String, StringBytes);
|
||||
impl_value_from!(Binary, Bytes);
|
||||
impl_value_from!(Date, Date);
|
||||
impl_value_from!(DateTime, DateTime);
|
||||
impl_value_from!(Timestamp, Timestamp);
|
||||
|
||||
impl From<String> for Value {
|
||||
fn from(string: String) -> Value {
|
||||
@@ -305,6 +296,12 @@ impl From<Vec<u8>> for Value {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Timestamp> for Value {
|
||||
fn from(v: Timestamp) -> Self {
|
||||
Value::Timestamp(v)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&[u8]> for Value {
|
||||
fn from(bytes: &[u8]) -> Value {
|
||||
Value::Binary(bytes.into())
|
||||
@@ -340,7 +337,6 @@ impl TryFrom<Value> for serde_json::Value {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(yingwen): Consider removing the `datatype` field from `ListValue`.
|
||||
/// List value.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ListValue {
|
||||
@@ -395,7 +391,6 @@ impl TryFrom<ScalarValue> for Value {
|
||||
|
||||
fn try_from(v: ScalarValue) -> Result<Self> {
|
||||
let v = match v {
|
||||
ScalarValue::Null => Value::Null,
|
||||
ScalarValue::Boolean(b) => Value::from(b),
|
||||
ScalarValue::Float32(f) => Value::from(f),
|
||||
ScalarValue::Float64(f) => Value::from(f),
|
||||
@@ -410,10 +405,8 @@ impl TryFrom<ScalarValue> for Value {
|
||||
ScalarValue::Utf8(s) | ScalarValue::LargeUtf8(s) => {
|
||||
Value::from(s.map(StringBytes::from))
|
||||
}
|
||||
ScalarValue::Binary(b)
|
||||
| ScalarValue::LargeBinary(b)
|
||||
| ScalarValue::FixedSizeBinary(_, b) => Value::from(b.map(Bytes::from)),
|
||||
ScalarValue::List(vs, field) => {
|
||||
ScalarValue::Binary(b) | ScalarValue::LargeBinary(b) => Value::from(b.map(Bytes::from)),
|
||||
ScalarValue::List(vs, t) => {
|
||||
let items = if let Some(vs) = vs {
|
||||
let vs = vs
|
||||
.into_iter()
|
||||
@@ -423,7 +416,7 @@ impl TryFrom<ScalarValue> for Value {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let datatype = ConcreteDataType::try_from(field.data_type())?;
|
||||
let datatype = t.as_ref().try_into()?;
|
||||
Value::List(ListValue::new(items, datatype))
|
||||
}
|
||||
ScalarValue::Date32(d) => d.map(|x| Value::Date(Date::new(x))).unwrap_or(Value::Null),
|
||||
@@ -442,13 +435,7 @@ impl TryFrom<ScalarValue> for Value {
|
||||
ScalarValue::TimestampNanosecond(t, _) => t
|
||||
.map(|x| Value::Timestamp(Timestamp::new(x, TimeUnit::Nanosecond)))
|
||||
.unwrap_or(Value::Null),
|
||||
ScalarValue::Decimal128(_, _, _)
|
||||
| ScalarValue::Time64(_)
|
||||
| ScalarValue::IntervalYearMonth(_)
|
||||
| ScalarValue::IntervalDayTime(_)
|
||||
| ScalarValue::IntervalMonthDayNano(_)
|
||||
| ScalarValue::Struct(_, _)
|
||||
| ScalarValue::Dictionary(_, _) => {
|
||||
_ => {
|
||||
return error::UnsupportedArrowTypeSnafu {
|
||||
arrow_type: v.get_datatype(),
|
||||
}
|
||||
@@ -558,6 +545,15 @@ impl<'a> Ord for ValueRef<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A helper trait to convert copyable types to `ValueRef`.
|
||||
///
|
||||
/// It could replace the usage of `Into<ValueRef<'a>>`, thus avoid confusion between `Into<Value>`
|
||||
/// and `Into<ValueRef<'a>>` in generic codes. One typical usage is the [`Primitive`](crate::primitive_traits::Primitive) trait.
|
||||
pub trait IntoValueRef<'a> {
|
||||
/// Convert itself to [ValueRef].
|
||||
fn into_value_ref(self) -> ValueRef<'a>;
|
||||
}
|
||||
|
||||
macro_rules! impl_value_ref_from {
|
||||
($Variant:ident, $Type:ident) => {
|
||||
impl From<$Type> for ValueRef<'_> {
|
||||
@@ -566,6 +562,12 @@ macro_rules! impl_value_ref_from {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoValueRef<'a> for $Type {
|
||||
fn into_value_ref(self) -> ValueRef<'a> {
|
||||
ValueRef::$Variant(self.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Option<$Type>> for ValueRef<'_> {
|
||||
fn from(value: Option<$Type>) -> Self {
|
||||
match value {
|
||||
@@ -574,6 +576,15 @@ macro_rules! impl_value_ref_from {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoValueRef<'a> for Option<$Type> {
|
||||
fn into_value_ref(self) -> ValueRef<'a> {
|
||||
match self {
|
||||
Some(v) => ValueRef::$Variant(v.into()),
|
||||
None => ValueRef::Null,
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -588,9 +599,6 @@ impl_value_ref_from!(Int32, i32);
|
||||
impl_value_ref_from!(Int64, i64);
|
||||
impl_value_ref_from!(Float32, f32);
|
||||
impl_value_ref_from!(Float64, f64);
|
||||
impl_value_ref_from!(Date, Date);
|
||||
impl_value_ref_from!(DateTime, DateTime);
|
||||
impl_value_ref_from!(Timestamp, Timestamp);
|
||||
|
||||
impl<'a> From<&'a str> for ValueRef<'a> {
|
||||
fn from(string: &'a str) -> ValueRef<'a> {
|
||||
@@ -620,7 +628,6 @@ impl<'a> From<Option<ListValueRef<'a>>> for ValueRef<'a> {
|
||||
/// if it becomes bottleneck.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum ListValueRef<'a> {
|
||||
// TODO(yingwen): Consider replace this by VectorRef.
|
||||
Indexed { vector: &'a ListVector, idx: usize },
|
||||
Ref { val: &'a ListValue },
|
||||
}
|
||||
@@ -778,16 +785,19 @@ mod tests {
|
||||
Some(Box::new(vec![Value::Int32(1), Value::Null])),
|
||||
ConcreteDataType::int32_datatype()
|
||||
)),
|
||||
ScalarValue::new_list(
|
||||
Some(vec![ScalarValue::Int32(Some(1)), ScalarValue::Int32(None)]),
|
||||
ArrowDataType::Int32,
|
||||
ScalarValue::List(
|
||||
Some(Box::new(vec![
|
||||
ScalarValue::Int32(Some(1)),
|
||||
ScalarValue::Int32(None)
|
||||
])),
|
||||
Box::new(ArrowDataType::Int32)
|
||||
)
|
||||
.try_into()
|
||||
.unwrap()
|
||||
);
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(None, ConcreteDataType::uint32_datatype())),
|
||||
ScalarValue::new_list(None, ArrowDataType::UInt32)
|
||||
ScalarValue::List(None, Box::new(ArrowDataType::UInt32))
|
||||
.try_into()
|
||||
.unwrap()
|
||||
);
|
||||
@@ -970,10 +980,6 @@ mod tests {
|
||||
ConcreteDataType::int32_datatype(),
|
||||
)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::list_datatype(ConcreteDataType::null_datatype()),
|
||||
&Value::List(ListValue::default()),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::date_datatype(),
|
||||
&Value::Date(Date::new(1)),
|
||||
@@ -983,7 +989,7 @@ mod tests {
|
||||
&Value::DateTime(DateTime::new(1)),
|
||||
);
|
||||
check_type_and_value(
|
||||
&ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
&ConcreteDataType::timestamp_millis_datatype(),
|
||||
&Value::Timestamp(Timestamp::from_millis(1)),
|
||||
);
|
||||
}
|
||||
@@ -1202,6 +1208,59 @@ mod tests {
|
||||
assert!(wrong_value.as_list().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_into_value_ref() {
|
||||
macro_rules! check_into_value_ref {
|
||||
($Variant: ident, $data: expr, $PrimitiveType: ident, $Wrapper: ident) => {
|
||||
let data: $PrimitiveType = $data;
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
data.into_value_ref()
|
||||
);
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
ValueRef::from(data)
|
||||
);
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
Some(data).into_value_ref()
|
||||
);
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
ValueRef::from(Some(data))
|
||||
);
|
||||
let x: Option<$PrimitiveType> = None;
|
||||
assert_eq!(ValueRef::Null, x.into_value_ref());
|
||||
assert_eq!(ValueRef::Null, x.into());
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! check_primitive_into_value_ref {
|
||||
($Variant: ident, $data: expr, $PrimitiveType: ident) => {
|
||||
check_into_value_ref!($Variant, $data, $PrimitiveType, $PrimitiveType)
|
||||
};
|
||||
}
|
||||
|
||||
check_primitive_into_value_ref!(Boolean, true, bool);
|
||||
check_primitive_into_value_ref!(UInt8, 10, u8);
|
||||
check_primitive_into_value_ref!(UInt16, 20, u16);
|
||||
check_primitive_into_value_ref!(UInt32, 30, u32);
|
||||
check_primitive_into_value_ref!(UInt64, 40, u64);
|
||||
check_primitive_into_value_ref!(Int8, -10, i8);
|
||||
check_primitive_into_value_ref!(Int16, -20, i16);
|
||||
check_primitive_into_value_ref!(Int32, -30, i32);
|
||||
check_primitive_into_value_ref!(Int64, -40, i64);
|
||||
check_into_value_ref!(Float32, 10.0, f32, OrderedF32);
|
||||
check_into_value_ref!(Float64, 10.0, f64, OrderedF64);
|
||||
|
||||
let hello = "hello";
|
||||
assert_eq!(
|
||||
ValueRef::Binary(hello.as_bytes()),
|
||||
ValueRef::from(hello.as_bytes())
|
||||
);
|
||||
assert_eq!(ValueRef::String(hello), ValueRef::from(hello));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
assert_eq!(Value::Null.to_string(), "Null");
|
||||
@@ -1242,34 +1301,10 @@ mod tests {
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(vec![])),
|
||||
ConcreteDataType::timestamp_second_datatype(),
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond),
|
||||
))
|
||||
.to_string(),
|
||||
"TimestampSecondType[]"
|
||||
);
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(vec![])),
|
||||
ConcreteDataType::timestamp_millisecond_datatype(),
|
||||
))
|
||||
.to_string(),
|
||||
"TimestampMillisecondType[]"
|
||||
);
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(vec![])),
|
||||
ConcreteDataType::timestamp_microsecond_datatype(),
|
||||
))
|
||||
.to_string(),
|
||||
"TimestampMicrosecondType[]"
|
||||
);
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(vec![])),
|
||||
ConcreteDataType::timestamp_nanosecond_datatype(),
|
||||
))
|
||||
.to_string(),
|
||||
"TimestampNanosecondType[]"
|
||||
"Timestamp[]"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,59 +12,68 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub mod binary;
|
||||
pub mod boolean;
|
||||
mod builder;
|
||||
pub mod constant;
|
||||
pub mod date;
|
||||
pub mod datetime;
|
||||
mod eq;
|
||||
mod helper;
|
||||
mod list;
|
||||
pub mod mutable;
|
||||
pub mod null;
|
||||
mod operations;
|
||||
pub mod primitive;
|
||||
mod string;
|
||||
mod timestamp;
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use arrow::bitmap::Bitmap;
|
||||
pub use binary::*;
|
||||
pub use boolean::*;
|
||||
pub use builder::VectorBuilder;
|
||||
pub use constant::*;
|
||||
pub use date::*;
|
||||
pub use datetime::*;
|
||||
pub use helper::Helper;
|
||||
pub use list::*;
|
||||
pub use mutable::MutableVector;
|
||||
pub use null::*;
|
||||
pub use operations::VectorOp;
|
||||
pub use primitive::*;
|
||||
use snafu::ensure;
|
||||
pub use string::*;
|
||||
pub use timestamp::*;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::operations::VectorOp;
|
||||
|
||||
mod binary;
|
||||
mod boolean;
|
||||
mod constant;
|
||||
mod date;
|
||||
mod datetime;
|
||||
mod eq;
|
||||
mod helper;
|
||||
mod list;
|
||||
mod null;
|
||||
mod operations;
|
||||
mod primitive;
|
||||
mod string;
|
||||
mod timestamp;
|
||||
mod validity;
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Validity<'a> {
|
||||
/// Whether the array slot is valid or not (null).
|
||||
Slots(&'a Bitmap),
|
||||
/// All slots are valid.
|
||||
AllValid,
|
||||
/// All slots are null.
|
||||
AllNull,
|
||||
}
|
||||
|
||||
pub use binary::{BinaryVector, BinaryVectorBuilder};
|
||||
pub use boolean::{BooleanVector, BooleanVectorBuilder};
|
||||
pub use constant::ConstantVector;
|
||||
pub use date::{DateVector, DateVectorBuilder};
|
||||
pub use datetime::{DateTimeVector, DateTimeVectorBuilder};
|
||||
pub use helper::Helper;
|
||||
pub use list::{ListIter, ListVector, ListVectorBuilder};
|
||||
pub use null::{NullVector, NullVectorBuilder};
|
||||
pub use primitive::{
|
||||
Float32Vector, Float32VectorBuilder, Float64Vector, Float64VectorBuilder, Int16Vector,
|
||||
Int16VectorBuilder, Int32Vector, Int32VectorBuilder, Int64Vector, Int64VectorBuilder,
|
||||
Int8Vector, Int8VectorBuilder, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder,
|
||||
UInt16Vector, UInt16VectorBuilder, UInt32Vector, UInt32VectorBuilder, UInt64Vector,
|
||||
UInt64VectorBuilder, UInt8Vector, UInt8VectorBuilder,
|
||||
};
|
||||
pub use string::{StringVector, StringVectorBuilder};
|
||||
pub use timestamp::{
|
||||
TimestampMicrosecondVector, TimestampMicrosecondVectorBuilder, TimestampMillisecondVector,
|
||||
TimestampMillisecondVectorBuilder, TimestampNanosecondVector, TimestampNanosecondVectorBuilder,
|
||||
TimestampSecondVector, TimestampSecondVectorBuilder,
|
||||
};
|
||||
pub use validity::Validity;
|
||||
impl<'a> Validity<'a> {
|
||||
pub fn slots(&self) -> Option<&Bitmap> {
|
||||
match self {
|
||||
Validity::Slots(bitmap) => Some(bitmap),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(yingwen): arrow 28.0 implements Clone for all arrays, we could upgrade to it and simplify
|
||||
// some codes in methods such as `to_arrow_array()` and `to_boxed_arrow_array()`.
|
||||
/// Vector of data values.
|
||||
pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
|
||||
/// Returns the data type of the vector.
|
||||
@@ -101,7 +110,13 @@ pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
|
||||
/// The number of null slots on this [`Vector`].
|
||||
/// # Implementation
|
||||
/// This is `O(1)`.
|
||||
fn null_count(&self) -> usize;
|
||||
fn null_count(&self) -> usize {
|
||||
match self.validity() {
|
||||
Validity::Slots(bitmap) => bitmap.null_count(),
|
||||
Validity::AllValid => 0,
|
||||
Validity::AllNull => self.len(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true when it's a ConstantColumn
|
||||
fn is_const(&self) -> bool {
|
||||
@@ -150,42 +165,6 @@ pub trait Vector: Send + Sync + Serializable + Debug + VectorOp {
|
||||
|
||||
pub type VectorRef = Arc<dyn Vector>;
|
||||
|
||||
/// Mutable vector that could be used to build an immutable vector.
|
||||
pub trait MutableVector: Send + Sync {
|
||||
/// Returns the data type of the vector.
|
||||
fn data_type(&self) -> ConcreteDataType;
|
||||
|
||||
/// Returns the length of the vector.
|
||||
fn len(&self) -> usize;
|
||||
|
||||
/// Returns whether the vector is empty.
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Convert to Any, to enable dynamic casting.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
|
||||
/// Convert to mutable Any, to enable dynamic casting.
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any;
|
||||
|
||||
/// Convert `self` to an (immutable) [VectorRef] and reset `self`.
|
||||
fn to_vector(&mut self) -> VectorRef;
|
||||
|
||||
/// Push value ref to this mutable vector.
|
||||
///
|
||||
/// Returns error if data type unmatch.
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()>;
|
||||
|
||||
/// Extend this mutable vector by slice of `vector`.
|
||||
///
|
||||
/// Returns error if data type unmatch.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `offset + length > vector.len()`.
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()>;
|
||||
}
|
||||
|
||||
/// Helper to define `try_from_arrow_array(array: arrow::array::ArrayRef)` function.
|
||||
macro_rules! impl_try_from_arrow_array_for_vector {
|
||||
($Array: ident, $Vector: ident) => {
|
||||
@@ -193,20 +172,16 @@ macro_rules! impl_try_from_arrow_array_for_vector {
|
||||
pub fn try_from_arrow_array(
|
||||
array: impl AsRef<dyn arrow::array::Array>,
|
||||
) -> crate::error::Result<$Vector> {
|
||||
use snafu::OptionExt;
|
||||
|
||||
let data = array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<$Array>()
|
||||
.with_context(|| crate::error::ConversionSnafu {
|
||||
from: std::format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.data()
|
||||
.clone();
|
||||
|
||||
let concrete_array = $Array::from(data);
|
||||
Ok($Vector::from(concrete_array))
|
||||
Ok($Vector::from(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<$Array>()
|
||||
.with_context(|| crate::error::ConversionSnafu {
|
||||
from: std::format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
))
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -214,7 +189,10 @@ macro_rules! impl_try_from_arrow_array_for_vector {
|
||||
|
||||
macro_rules! impl_validity_for_vector {
|
||||
($array: expr) => {
|
||||
Validity::from_array_data($array.data())
|
||||
match $array.validity() {
|
||||
Some(bitmap) => Validity::Slots(bitmap),
|
||||
None => Validity::AllValid,
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -241,11 +219,10 @@ macro_rules! impl_get_ref_for_vector {
|
||||
}
|
||||
|
||||
macro_rules! impl_extend_for_builder {
|
||||
($mutable_vector: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
|
||||
($mutable_array: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
|
||||
use snafu::OptionExt;
|
||||
|
||||
let sliced_vector = $vector.slice($offset, $length);
|
||||
let concrete_vector = sliced_vector
|
||||
let concrete_vector = $vector
|
||||
.as_any()
|
||||
.downcast_ref::<$VectorType>()
|
||||
.with_context(|| crate::error::CastTypeSnafu {
|
||||
@@ -255,9 +232,8 @@ macro_rules! impl_extend_for_builder {
|
||||
stringify!($VectorType)
|
||||
),
|
||||
})?;
|
||||
for value in concrete_vector.iter_data() {
|
||||
$mutable_vector.push(value);
|
||||
}
|
||||
let slice = concrete_vector.array.slice($offset, $length);
|
||||
$mutable_array.extend_trusted_len(slice.iter());
|
||||
Ok(())
|
||||
}};
|
||||
}
|
||||
@@ -269,27 +245,27 @@ pub(crate) use {
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use arrow::array::{Array, Int32Array, UInt8Array};
|
||||
use arrow::array::{Array, PrimitiveArray};
|
||||
use serde_json;
|
||||
|
||||
use super::helper::Helper;
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::{Int32Type, LogicalPrimitiveType};
|
||||
use crate::vectors::helper::Helper;
|
||||
use crate::types::PrimitiveElement;
|
||||
|
||||
#[test]
|
||||
fn test_df_columns_to_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
|
||||
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1, 2, 3]));
|
||||
let vector = Helper::try_into_vector(df_column).unwrap();
|
||||
assert_eq!(
|
||||
Int32Type::build_data_type().as_arrow_type(),
|
||||
i32::build_data_type().as_arrow_type(),
|
||||
vector.data_type().as_arrow_type()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_i32_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(Int32Array::from(vec![1, 2, 3]));
|
||||
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::<i32>::from_slice(vec![1, 2, 3]));
|
||||
let json_value = Helper::try_into_vector(df_column)
|
||||
.unwrap()
|
||||
.serialize_to_json()
|
||||
@@ -299,7 +275,7 @@ pub mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_serialize_i8_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(UInt8Array::from(vec![1, 2, 3]));
|
||||
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1u8, 2u8, 3u8]));
|
||||
let json_value = Helper::try_into_vector(df_column)
|
||||
.unwrap()
|
||||
.serialize_to_json()
|
||||
|
||||
@@ -15,8 +15,9 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef};
|
||||
use snafu::ResultExt;
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use arrow::array::{ArrayIter, GenericByteArray};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::arrow_array::{BinaryArray, MutableBinaryArray};
|
||||
use crate::data_type::ConcreteDataType;
|
||||
@@ -36,16 +37,6 @@ impl BinaryVector {
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> BinaryVector {
|
||||
BinaryVector {
|
||||
array: BinaryArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BinaryArray> for BinaryVector {
|
||||
@@ -57,7 +48,7 @@ impl From<BinaryArray> for BinaryVector {
|
||||
impl From<Vec<Option<Vec<u8>>>> for BinaryVector {
|
||||
fn from(data: Vec<Option<Vec<u8>>>) -> Self {
|
||||
Self {
|
||||
array: BinaryArray::from_iter(data),
|
||||
array: BinaryArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -80,13 +71,11 @@ impl Vector for BinaryVector {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let data = self.to_array_data();
|
||||
Arc::new(BinaryArray::from(data))
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(BinaryArray::from(data))
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
@@ -94,11 +83,7 @@ impl Vector for BinaryVector {
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
self.array.values().len() + self.array.offsets().len() * std::mem::size_of::<i64>()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
@@ -106,8 +91,7 @@ impl Vector for BinaryVector {
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
Arc::new(Self::from(self.array.slice(offset, length)))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
@@ -164,15 +148,12 @@ impl MutableVector for BinaryVectorBuilder {
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
match value.as_binary()? {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
self.mutable_array.push(value.as_binary()?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self, vector, BinaryVector, offset, length)
|
||||
vectors::impl_extend_for_builder!(self.mutable_array, vector, BinaryVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -181,20 +162,17 @@ impl ScalarVectorBuilder for BinaryVectorBuilder {
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutableBinaryArray::with_capacity(capacity, 0),
|
||||
mutable_array: MutableBinaryArray::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
match value {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
self.mutable_array.push(value);
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
BinaryVector {
|
||||
array: self.mutable_array.finish(),
|
||||
array: std::mem::take(&mut self.mutable_array).into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -227,17 +205,14 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_binary_vector_misc() {
|
||||
let v = BinaryVector::from(BinaryArray::from_iter_values(&[
|
||||
vec![1, 2, 3],
|
||||
vec![1, 2, 3],
|
||||
]));
|
||||
let v = BinaryVector::from(BinaryArray::from_slice(&[vec![1, 2, 3], vec![1, 2, 3]]));
|
||||
|
||||
assert_eq!(2, v.len());
|
||||
assert_eq!("BinaryVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert_eq!(Validity::AllValid, v.validity());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(128, v.memory_size());
|
||||
assert_eq!(30, v.memory_size());
|
||||
|
||||
for i in 0..2 {
|
||||
assert!(!v.is_null(i));
|
||||
@@ -252,10 +227,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_serialize_binary_vector_to_json() {
|
||||
let vector = BinaryVector::from(BinaryArray::from_iter_values(&[
|
||||
vec![1, 2, 3],
|
||||
vec![1, 2, 3],
|
||||
]));
|
||||
let vector = BinaryVector::from(BinaryArray::from_slice(&[vec![1, 2, 3], vec![1, 2, 3]]));
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
@@ -281,8 +253,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let arrow_array = BinaryArray::from_iter_values(&[vec![1, 2, 3], vec![1, 2, 3]]);
|
||||
let original = BinaryArray::from(arrow_array.data().clone());
|
||||
let arrow_array = BinaryArray::from_slice(&[vec![1, 2, 3], vec![1, 2, 3]]);
|
||||
let original = arrow_array.clone();
|
||||
let vector = BinaryVector::from(arrow_array);
|
||||
assert_eq!(original, vector.array);
|
||||
}
|
||||
@@ -317,7 +289,7 @@ mod tests {
|
||||
builder.push(Some(b"world"));
|
||||
let vector = builder.finish();
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert!(vector.validity().is_all_valid());
|
||||
assert_eq!(Validity::AllValid, vector.validity());
|
||||
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(b"hello"));
|
||||
@@ -326,10 +298,9 @@ mod tests {
|
||||
let vector = builder.finish();
|
||||
assert_eq!(1, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
assert!(!validity.is_set(1));
|
||||
|
||||
assert_eq!(1, validity.null_count());
|
||||
assert!(!validity.is_set(1));
|
||||
let slots = validity.slots().unwrap();
|
||||
assert_eq!(1, slots.null_count());
|
||||
assert!(!slots.get_bit(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -16,10 +16,9 @@ use std::any::Any;
|
||||
use std::borrow::Borrow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef, BooleanArray, BooleanBuilder,
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
use arrow::array::{Array, ArrayRef, BooleanArray, MutableArray, MutableBooleanArray};
|
||||
use arrow::bitmap::utils::{BitmapIter, ZipValidity};
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::Result;
|
||||
@@ -42,26 +41,12 @@ impl BooleanVector {
|
||||
pub(crate) fn as_boolean_array(&self) -> &BooleanArray {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> BooleanVector {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from(data),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn false_count(&self) -> usize {
|
||||
self.array.false_count()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<bool>> for BooleanVector {
|
||||
fn from(data: Vec<bool>) -> Self {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from(data),
|
||||
array: BooleanArray::from_slice(&data),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -106,13 +91,11 @@ impl Vector for BooleanVector {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let data = self.to_array_data();
|
||||
Arc::new(BooleanArray::from(data))
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(BooleanArray::from(data))
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
@@ -120,11 +103,7 @@ impl Vector for BooleanVector {
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
self.array.values().as_slice().0.len()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
@@ -132,8 +111,7 @@ impl Vector for BooleanVector {
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
Arc::new(Self::from(self.array.slice(offset, length)))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
@@ -148,7 +126,7 @@ impl Vector for BooleanVector {
|
||||
impl ScalarVector for BooleanVector {
|
||||
type OwnedItem = bool;
|
||||
type RefItem<'a> = bool;
|
||||
type Iter<'a> = ArrayIter<&'a BooleanArray>;
|
||||
type Iter<'a> = ZipValidity<'a, bool, BitmapIter<'a>>;
|
||||
type Builder = BooleanVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
@@ -165,7 +143,7 @@ impl ScalarVector for BooleanVector {
|
||||
}
|
||||
|
||||
pub struct BooleanVectorBuilder {
|
||||
mutable_array: BooleanBuilder,
|
||||
mutable_array: MutableBooleanArray,
|
||||
}
|
||||
|
||||
impl MutableVector for BooleanVectorBuilder {
|
||||
@@ -190,15 +168,12 @@ impl MutableVector for BooleanVectorBuilder {
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
match value.as_boolean()? {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
self.mutable_array.push(value.as_boolean()?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self, vector, BooleanVector, offset, length)
|
||||
vectors::impl_extend_for_builder!(self.mutable_array, vector, BooleanVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -207,20 +182,17 @@ impl ScalarVectorBuilder for BooleanVectorBuilder {
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: BooleanBuilder::with_capacity(capacity),
|
||||
mutable_array: MutableBooleanArray::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
match value {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
self.mutable_array.push(value);
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
BooleanVector {
|
||||
array: self.mutable_array.finish(),
|
||||
array: std::mem::take(&mut self.mutable_array).into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -253,9 +225,9 @@ mod tests {
|
||||
assert_eq!(9, v.len());
|
||||
assert_eq!("BooleanVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert_eq!(Validity::AllValid, v.validity());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(64, v.memory_size());
|
||||
assert_eq!(2, v.memory_size());
|
||||
|
||||
for (i, b) in bools.iter().enumerate() {
|
||||
assert!(!v.is_null(i));
|
||||
@@ -344,12 +316,13 @@ mod tests {
|
||||
let vector = BooleanVector::from(vec![Some(true), None, Some(false)]);
|
||||
assert_eq!(1, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
assert_eq!(1, validity.null_count());
|
||||
assert!(!validity.is_set(1));
|
||||
let slots = validity.slots().unwrap();
|
||||
assert_eq!(1, slots.null_count());
|
||||
assert!(!slots.get_bit(1));
|
||||
|
||||
let vector = BooleanVector::from(vec![true, false, false]);
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert!(vector.validity().is_all_valid());
|
||||
assert_eq!(Validity::AllValid, vector.validity());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -55,27 +55,6 @@ impl ConstantVector {
|
||||
pub fn get_constant_ref(&self) -> ValueRef {
|
||||
self.vector.get_ref(0)
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_vector(&self, offsets: &[usize]) -> VectorRef {
|
||||
assert_eq!(offsets.len(), self.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return self.slice(0, 0);
|
||||
}
|
||||
|
||||
Arc::new(ConstantVector::new(
|
||||
self.vector.clone(),
|
||||
*offsets.last().unwrap(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn filter_vector(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
let length = self.len() - filter.false_count();
|
||||
if length == self.len() {
|
||||
return Ok(Arc::new(self.clone()));
|
||||
}
|
||||
Ok(Arc::new(ConstantVector::new(self.inner().clone(), length)))
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for ConstantVector {
|
||||
@@ -111,9 +90,9 @@ impl Vector for ConstantVector {
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
if self.vector.is_null(0) {
|
||||
Validity::all_null(self.length)
|
||||
Validity::AllNull
|
||||
} else {
|
||||
Validity::all_valid(self.length)
|
||||
Validity::AllValid
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,14 +122,6 @@ impl Vector for ConstantVector {
|
||||
fn get_ref(&self, _index: usize) -> ValueRef {
|
||||
self.vector.get_ref(0)
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
if self.only_null() {
|
||||
self.len()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ConstantVector {
|
||||
@@ -169,6 +140,33 @@ impl Serializable for ConstantVector {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_constant(vector: &ConstantVector, offsets: &[usize]) -> VectorRef {
|
||||
assert_eq!(offsets.len(), vector.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return vector.slice(0, 0);
|
||||
}
|
||||
|
||||
Arc::new(ConstantVector::new(
|
||||
vector.vector.clone(),
|
||||
*offsets.last().unwrap(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn filter_constant(
|
||||
vector: &ConstantVector,
|
||||
filter: &BooleanVector,
|
||||
) -> Result<VectorRef> {
|
||||
let length = filter.len() - filter.as_boolean_array().values().null_count();
|
||||
if length == vector.len() {
|
||||
return Ok(Arc::new(vector.clone()));
|
||||
}
|
||||
Ok(Arc::new(ConstantVector::new(
|
||||
vector.inner().clone(),
|
||||
length,
|
||||
)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
@@ -184,9 +182,9 @@ mod tests {
|
||||
assert_eq!("ConstantVector", c.vector_type_name());
|
||||
assert!(c.is_const());
|
||||
assert_eq!(10, c.len());
|
||||
assert!(c.validity().is_all_valid());
|
||||
assert_eq!(Validity::AllValid, c.validity());
|
||||
assert!(!c.only_null());
|
||||
assert_eq!(64, c.memory_size());
|
||||
assert_eq!(4, c.memory_size());
|
||||
|
||||
for i in 0..10 {
|
||||
assert!(!c.is_null(i));
|
||||
|
||||
@@ -12,28 +12,258 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::types::DateType;
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
// Vector for [`Date`](common_time::Date).
|
||||
pub type DateVector = PrimitiveVector<DateType>;
|
||||
// Builder to build DateVector.
|
||||
pub type DateVectorBuilder = PrimitiveVectorBuilder<DateType>;
|
||||
use arrow::array::{Array, ArrayRef, PrimitiveArray};
|
||||
use common_time::date::Date;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::prelude::*;
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::vectors::{MutableVector, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct DateVector {
|
||||
array: PrimitiveVector<i32>,
|
||||
}
|
||||
|
||||
impl DateVector {
|
||||
pub fn new(array: PrimitiveArray<i32>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector { array },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
Ok(Self::new(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<i32>>()
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
self.array.as_arrow()
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for DateVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::date_datatype()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"DateVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Arc::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Date32,
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Box::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Date32,
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
self.array.validity()
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.memory_size()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self {
|
||||
array: PrimitiveVector::new(self.array.array.slice(offset, length)),
|
||||
})
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
match self.array.get(index) {
|
||||
Value::Int32(v) => Value::Date(Date::new(v)),
|
||||
Value::Null => Value::Null,
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
match self.array.get(index) {
|
||||
Value::Int32(v) => ValueRef::Date(Date::new(v)),
|
||||
Value::Null => ValueRef::Null,
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<i32>>> for DateVector {
|
||||
fn from(data: Vec<Option<i32>>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector::<i32>::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DateIter<'a> {
|
||||
iter: PrimitiveIter<'a, i32>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for DateIter<'a> {
|
||||
type Item = Option<Date>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|v| v.map(Date::new))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for DateVector {
|
||||
type OwnedItem = Date;
|
||||
type RefItem<'a> = Date;
|
||||
type Iter<'a> = DateIter<'a>;
|
||||
|
||||
type Builder = DateVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
self.array.get_data(idx).map(Date::new)
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
DateIter {
|
||||
iter: self.array.iter_data(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for DateVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
Ok(self
|
||||
.array
|
||||
.iter_data()
|
||||
.map(|v| v.map(Date::new))
|
||||
.map(|v| match v {
|
||||
None => serde_json::Value::Null,
|
||||
Some(v) => v.into(),
|
||||
})
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DateVectorBuilder {
|
||||
buffer: PrimitiveVectorBuilder<i32>,
|
||||
}
|
||||
|
||||
impl MutableVector for DateVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::date_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.buffer.push(value.as_date()?.map(|d| d.val()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let concrete_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to convert vector from {} to DateVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
self.buffer
|
||||
.extend_slice_of(&concrete_vector.array, offset, length)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for DateVectorBuilder {
|
||||
type VectorType = DateVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
buffer: PrimitiveVectorBuilder::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.buffer.push(value.map(|d| d.val()))
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
Self::VectorType {
|
||||
array: self.buffer.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_date(vector: &DateVector, offsets: &[usize]) -> VectorRef {
|
||||
let array = crate::vectors::primitive::replicate_primitive_with_type(
|
||||
&vector.array,
|
||||
offsets,
|
||||
vector.data_type(),
|
||||
);
|
||||
Arc::new(DateVector { array })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::Array;
|
||||
use common_time::date::Date;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::DateType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{Vector, VectorRef};
|
||||
|
||||
#[test]
|
||||
fn test_build_date_vector() {
|
||||
@@ -58,7 +288,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_date_scalar() {
|
||||
let vector = DateVector::from_slice(&[1, 2]);
|
||||
let vector = DateVector::from_slice(&[Date::new(1), Date::new(2)]);
|
||||
assert_eq!(2, vector.len());
|
||||
assert_eq!(Some(Date::new(1)), vector.get_data(0));
|
||||
assert_eq!(Some(Date::new(2)), vector.get_data(1));
|
||||
@@ -66,7 +296,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_date_vector_builder() {
|
||||
let input = DateVector::from_slice(&[1, 2, 3]);
|
||||
let input = DateVector::from_slice(&[Date::new(1), Date::new(2), Date::new(3)]);
|
||||
|
||||
let mut builder = DateType::default().create_mutable_vector(3);
|
||||
builder
|
||||
@@ -79,25 +309,19 @@ mod tests {
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateVector::from_slice(&[5, 2, 3]));
|
||||
let expect: VectorRef = Arc::new(DateVector::from_slice(&[
|
||||
Date::new(5),
|
||||
Date::new(2),
|
||||
Date::new(3),
|
||||
]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_from_arrow() {
|
||||
let vector = DateVector::from_slice(&[1, 2]);
|
||||
let vector = DateVector::from_slice(&[Date::new(1), Date::new(2)]);
|
||||
let arrow = vector.as_arrow().slice(0, vector.len());
|
||||
let vector2 = DateVector::try_from_arrow_array(&arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_date_vector() {
|
||||
let vector = DateVector::from_slice(&[-1, 0, 1]);
|
||||
let serialized_json = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(
|
||||
r#"["1969-12-31","1970-01-01","1970-01-02"]"#,
|
||||
serialized_json
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,32 +12,264 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::types::DateTimeType;
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Vector of [`DateTime`](common_time::Date)
|
||||
pub type DateTimeVector = PrimitiveVector<DateTimeType>;
|
||||
/// Builder for [`DateTimeVector`].
|
||||
pub type DateTimeVectorBuilder = PrimitiveVectorBuilder<DateTimeType>;
|
||||
use arrow::array::{Array, ArrayRef, PrimitiveArray};
|
||||
use common_time::datetime::DateTime;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::prelude::{
|
||||
MutableVector, ScalarVector, ScalarVectorBuilder, Validity, Value, ValueRef, Vector, VectorRef,
|
||||
};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::vectors::{PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct DateTimeVector {
|
||||
array: PrimitiveVector<i64>,
|
||||
}
|
||||
|
||||
impl DateTimeVector {
|
||||
pub fn new(array: PrimitiveArray<i64>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector { array },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
Ok(Self::new(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<i64>>()
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
self.array.as_arrow()
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for DateTimeVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::datetime_datatype()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"DateTimeVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Arc::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Date64,
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Box::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Date64,
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
self.array.validity()
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.memory_size()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self {
|
||||
array: PrimitiveVector::new(self.array.array.slice(offset, length)),
|
||||
})
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
match self.array.get(index) {
|
||||
Value::Int64(v) => Value::DateTime(DateTime::new(v)),
|
||||
Value::Null => Value::Null,
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
match self.array.get(index) {
|
||||
Value::Int64(v) => ValueRef::DateTime(DateTime::new(v)),
|
||||
Value::Null => ValueRef::Null,
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for DateTimeVector {
|
||||
fn serialize_to_json(&self) -> crate::Result<Vec<serde_json::Value>> {
|
||||
Ok(self
|
||||
.array
|
||||
.iter_data()
|
||||
.map(|v| v.map(DateTime::new))
|
||||
.map(|v| match v {
|
||||
None => serde_json::Value::Null,
|
||||
Some(v) => v.into(),
|
||||
})
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<i64>>> for DateTimeVector {
|
||||
fn from(data: Vec<Option<i64>>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector::<i64>::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DateTimeVectorBuilder {
|
||||
buffer: PrimitiveVectorBuilder<i64>,
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for DateTimeVectorBuilder {
|
||||
type VectorType = DateTimeVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
buffer: PrimitiveVectorBuilder::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.buffer.push(value.map(|d| d.val()))
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
Self::VectorType {
|
||||
array: self.buffer.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MutableVector for DateTimeVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::datetime_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.buffer.push(value.as_datetime()?.map(|d| d.val()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let concrete_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateTimeVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to convert vector from {} to DateVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
self.buffer
|
||||
.extend_slice_of(&concrete_vector.array, offset, length)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DateTimeIter<'a> {
|
||||
iter: PrimitiveIter<'a, i64>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for DateTimeIter<'a> {
|
||||
type Item = Option<DateTime>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|v| v.map(DateTime::new))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for DateTimeVector {
|
||||
type OwnedItem = DateTime;
|
||||
type RefItem<'a> = DateTime;
|
||||
type Iter<'a> = DateTimeIter<'a>;
|
||||
type Builder = DateTimeVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
self.array.get_data(idx).map(DateTime::new)
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
DateTimeIter {
|
||||
iter: self.array.iter_data(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_datetime(vector: &DateTimeVector, offsets: &[usize]) -> VectorRef {
|
||||
let array = crate::vectors::primitive::replicate_primitive_with_type(
|
||||
&vector.array,
|
||||
offsets,
|
||||
vector.data_type(),
|
||||
);
|
||||
Arc::new(DateTimeVector { array })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, PrimitiveArray};
|
||||
use common_time::DateTime;
|
||||
use datafusion_common::from_slice::FromSlice;
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{
|
||||
ConcreteDataType, ScalarVector, ScalarVectorBuilder, Value, ValueRef, Vector, VectorRef,
|
||||
};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::DateTimeType;
|
||||
|
||||
#[test]
|
||||
fn test_datetime_vector() {
|
||||
let v = DateTimeVector::new(PrimitiveArray::from_slice(&[1, 2, 3]));
|
||||
let v = DateTimeVector::new(PrimitiveArray::from_vec(vec![1, 2, 3]));
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
|
||||
assert_eq!(3, v.len());
|
||||
assert_eq!("DateTimeVector", v.vector_type_name());
|
||||
@@ -55,8 +287,9 @@ mod tests {
|
||||
assert_eq!(Some(DateTime::new(2)), iter.next().unwrap());
|
||||
assert_eq!(Some(DateTime::new(3)), iter.next().unwrap());
|
||||
assert!(!v.is_null(0));
|
||||
assert_eq!(64, v.memory_size());
|
||||
assert_eq!(24, v.memory_size()); // size of i64 * 3
|
||||
|
||||
assert_matches!(v.validity(), Validity::AllValid);
|
||||
if let Value::DateTime(d) = v.get(0) {
|
||||
assert_eq!(1, d.val());
|
||||
} else {
|
||||
@@ -81,11 +314,8 @@ mod tests {
|
||||
assert_eq!(Value::Null, v.get(1));
|
||||
assert_eq!(Value::DateTime(DateTime::new(-1)), v.get(2));
|
||||
|
||||
let input = DateTimeVector::from_wrapper_slice(&[
|
||||
DateTime::new(1),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
]);
|
||||
let input =
|
||||
DateTimeVector::from_slice(&[DateTime::new(1), DateTime::new(2), DateTime::new(3)]);
|
||||
|
||||
let mut builder = DateTimeType::default().create_mutable_vector(3);
|
||||
builder
|
||||
@@ -98,7 +328,7 @@ mod tests {
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateTimeVector::from_wrapper_slice(&[
|
||||
let expect: VectorRef = Arc::new(DateTimeVector::from_slice(&[
|
||||
DateTime::new(5),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
@@ -108,7 +338,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_datetime_from_arrow() {
|
||||
let vector = DateTimeVector::from_wrapper_slice(&[DateTime::new(1), DateTime::new(2)]);
|
||||
let vector = DateTimeVector::from_slice(&[DateTime::new(1), DateTime::new(2)]);
|
||||
let arrow = vector.as_arrow().slice(0, vector.len());
|
||||
let vector2 = DateTimeVector::try_from_arrow_array(&arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
|
||||
@@ -15,12 +15,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::TimestampType;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, DateTimeVector, DateVector, ListVector, PrimitiveVector,
|
||||
StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
|
||||
TimestampNanosecondVector, TimestampSecondVector, Vector,
|
||||
BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, ListVector,
|
||||
PrimitiveVector, StringVector, TimestampVector, Vector,
|
||||
};
|
||||
use crate::with_match_primitive_type_id;
|
||||
|
||||
@@ -79,20 +76,7 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
|
||||
String(_) => is_vector_eq!(StringVector, lhs, rhs),
|
||||
Date(_) => is_vector_eq!(DateVector, lhs, rhs),
|
||||
DateTime(_) => is_vector_eq!(DateTimeVector, lhs, rhs),
|
||||
Timestamp(t) => match t {
|
||||
TimestampType::Second(_) => {
|
||||
is_vector_eq!(TimestampSecondVector, lhs, rhs)
|
||||
}
|
||||
TimestampType::Millisecond(_) => {
|
||||
is_vector_eq!(TimestampMillisecondVector, lhs, rhs)
|
||||
}
|
||||
TimestampType::Microsecond(_) => {
|
||||
is_vector_eq!(TimestampMicrosecondVector, lhs, rhs)
|
||||
}
|
||||
TimestampType::Nanosecond(_) => {
|
||||
is_vector_eq!(TimestampNanosecondVector, lhs, rhs)
|
||||
}
|
||||
},
|
||||
Timestamp(_) => is_vector_eq!(TimestampVector, lhs, rhs),
|
||||
List(_) => is_vector_eq!(ListVector, lhs, rhs),
|
||||
UInt8(_) | UInt16(_) | UInt32(_) | UInt64(_) | Int8(_) | Int16(_) | Int32(_) | Int64(_)
|
||||
| Float32(_) | Float64(_) => {
|
||||
@@ -111,10 +95,13 @@ fn equal(lhs: &dyn Vector, rhs: &dyn Vector) -> bool {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{ListArray, MutableListArray, MutablePrimitiveArray, TryExtend};
|
||||
|
||||
use super::*;
|
||||
use crate::vectors::{
|
||||
list, Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector,
|
||||
NullVector, UInt16Vector, UInt32Vector, UInt64Vector, UInt8Vector, VectorRef,
|
||||
Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector,
|
||||
NullVector, TimestampVector, UInt16Vector, UInt32Vector, UInt64Vector, UInt8Vector,
|
||||
VectorRef,
|
||||
};
|
||||
|
||||
fn assert_vector_ref_eq(vector: VectorRef) {
|
||||
@@ -145,21 +132,14 @@ mod tests {
|
||||
assert_vector_ref_eq(Arc::new(BooleanVector::from(vec![true, false])));
|
||||
assert_vector_ref_eq(Arc::new(DateVector::from(vec![Some(100), Some(120)])));
|
||||
assert_vector_ref_eq(Arc::new(DateTimeVector::from(vec![Some(100), Some(120)])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampSecondVector::from_values([100, 120])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampMillisecondVector::from_values([
|
||||
100, 120,
|
||||
])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampMicrosecondVector::from_values([
|
||||
100, 120,
|
||||
])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampNanosecondVector::from_values([100, 120])));
|
||||
assert_vector_ref_eq(Arc::new(TimestampVector::from_values([100, 120])));
|
||||
|
||||
let list_vector = list::tests::new_list_vector(&[
|
||||
Some(vec![Some(1), Some(2)]),
|
||||
None,
|
||||
Some(vec![Some(3), Some(4)]),
|
||||
]);
|
||||
assert_vector_ref_eq(Arc::new(list_vector));
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i64>>::new();
|
||||
arrow_array
|
||||
.try_extend(vec![Some(vec![Some(1), Some(2), Some(3)])])
|
||||
.unwrap();
|
||||
let arrow_array: ListArray<i32> = arrow_array.into();
|
||||
assert_vector_ref_eq(Arc::new(ListVector::from(arrow_array)));
|
||||
|
||||
assert_vector_ref_eq(Arc::new(NullVector::new(4)));
|
||||
assert_vector_ref_eq(Arc::new(StringVector::from(vec![
|
||||
|
||||
@@ -17,26 +17,19 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, StringArray};
|
||||
use arrow::array::Array;
|
||||
use arrow::compute;
|
||||
use arrow::compute::kernels::comparison;
|
||||
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use datafusion_common::ScalarValue;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{Scalar, ScalarVectorBuilder};
|
||||
use crate::value::{ListValue, ListValueRef};
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, Float32Vector,
|
||||
Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector, ListVector,
|
||||
ListVectorBuilder, MutableVector, NullVector, StringVector, TimestampMicrosecondVector,
|
||||
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt16Vector,
|
||||
UInt32Vector, UInt64Vector, UInt8Vector, Vector, VectorRef,
|
||||
};
|
||||
use crate::arrow_array::StringArray;
|
||||
use crate::error::{ConversionSnafu, Result, UnknownVectorSnafu};
|
||||
use crate::scalars::*;
|
||||
use crate::vectors::date::DateVector;
|
||||
use crate::vectors::datetime::DateTimeVector;
|
||||
use crate::vectors::*;
|
||||
|
||||
/// Helper functions for `Vector`.
|
||||
pub struct Helper;
|
||||
|
||||
impl Helper {
|
||||
@@ -54,7 +47,7 @@ impl Helper {
|
||||
let arr = vector
|
||||
.as_any()
|
||||
.downcast_ref::<<T as Scalar>::VectorType>()
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
.with_context(|| UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
vector.vector_type_name(),
|
||||
@@ -68,7 +61,7 @@ impl Helper {
|
||||
let arr = vector
|
||||
.as_any()
|
||||
.downcast_ref::<T>()
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
.with_context(|| UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
vector.vector_type_name(),
|
||||
@@ -85,7 +78,7 @@ impl Helper {
|
||||
let arr = vector
|
||||
.as_mut_any()
|
||||
.downcast_mut()
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
.with_context(|| UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
ty,
|
||||
@@ -101,7 +94,7 @@ impl Helper {
|
||||
let arr = vector
|
||||
.as_any()
|
||||
.downcast_ref::<<T as Scalar>::VectorType>()
|
||||
.with_context(|| error::UnknownVectorSnafu {
|
||||
.with_context(|| UnknownVectorSnafu {
|
||||
msg: format!(
|
||||
"downcast vector error, vector type: {:?}, expected vector: {:?}",
|
||||
vector.vector_type_name(),
|
||||
@@ -112,9 +105,11 @@ impl Helper {
|
||||
}
|
||||
|
||||
/// Try to cast an arrow scalar value into vector
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if given scalar value is not supported.
|
||||
pub fn try_from_scalar_value(value: ScalarValue, length: usize) -> Result<VectorRef> {
|
||||
let vector = match value {
|
||||
ScalarValue::Null => ConstantVector::new(Arc::new(NullVector::new(1)), length),
|
||||
ScalarValue::Boolean(v) => {
|
||||
ConstantVector::new(Arc::new(BooleanVector::from(vec![v])), length)
|
||||
}
|
||||
@@ -148,29 +143,17 @@ impl Helper {
|
||||
ScalarValue::UInt64(v) => {
|
||||
ConstantVector::new(Arc::new(UInt64Vector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Utf8(v) | ScalarValue::LargeUtf8(v) => {
|
||||
ScalarValue::Utf8(v) => {
|
||||
ConstantVector::new(Arc::new(StringVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Binary(v)
|
||||
| ScalarValue::LargeBinary(v)
|
||||
| ScalarValue::FixedSizeBinary(_, v) => {
|
||||
ScalarValue::LargeUtf8(v) => {
|
||||
ConstantVector::new(Arc::new(StringVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Binary(v) => {
|
||||
ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::List(v, field) => {
|
||||
let item_type = ConcreteDataType::try_from(field.data_type())?;
|
||||
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
|
||||
if let Some(values) = v {
|
||||
let values = values
|
||||
.into_iter()
|
||||
.map(ScalarValue::try_into)
|
||||
.collect::<Result<_>>()?;
|
||||
let list_value = ListValue::new(Some(Box::new(values)), item_type);
|
||||
builder.push(Some(ListValueRef::Ref { val: &list_value }));
|
||||
} else {
|
||||
builder.push(None);
|
||||
}
|
||||
let list_vector = builder.to_vector();
|
||||
ConstantVector::new(list_vector, length)
|
||||
ScalarValue::LargeBinary(v) => {
|
||||
ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Date32(v) => {
|
||||
ConstantVector::new(Arc::new(DateVector::from(vec![v])), length)
|
||||
@@ -178,30 +161,8 @@ impl Helper {
|
||||
ScalarValue::Date64(v) => {
|
||||
ConstantVector::new(Arc::new(DateTimeVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampSecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampSecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampMillisecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampMillisecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampMicrosecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampMicrosecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::TimestampNanosecond(v, _) => {
|
||||
// Timezone is unimplemented now.
|
||||
ConstantVector::new(Arc::new(TimestampNanosecondVector::from(vec![v])), length)
|
||||
}
|
||||
ScalarValue::Decimal128(_, _, _)
|
||||
| ScalarValue::Time64(_)
|
||||
| ScalarValue::IntervalYearMonth(_)
|
||||
| ScalarValue::IntervalDayTime(_)
|
||||
| ScalarValue::IntervalMonthDayNano(_)
|
||||
| ScalarValue::Struct(_, _)
|
||||
| ScalarValue::Dictionary(_, _) => {
|
||||
return error::ConversionSnafu {
|
||||
_ => {
|
||||
return ConversionSnafu {
|
||||
from: format!("Unsupported scalar value: {}", value),
|
||||
}
|
||||
.fail()
|
||||
@@ -219,7 +180,9 @@ impl Helper {
|
||||
Ok(match array.as_ref().data_type() {
|
||||
ArrowDataType::Null => Arc::new(NullVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Boolean => Arc::new(BooleanVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::LargeBinary => Arc::new(BinaryVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => {
|
||||
Arc::new(BinaryVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
ArrowDataType::Int8 => Arc::new(Int8Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Int16 => Arc::new(Int16Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Int32 => Arc::new(Int32Vector::try_from_arrow_array(array)?),
|
||||
@@ -230,80 +193,48 @@ impl Helper {
|
||||
ArrowDataType::UInt64 => Arc::new(UInt64Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Float32 => Arc::new(Float32Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Float64 => Arc::new(Float64Vector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Utf8 => Arc::new(StringVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
|
||||
Arc::new(StringVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
ArrowDataType::Date32 => Arc::new(DateVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Date64 => Arc::new(DateTimeVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::List(_) => Arc::new(ListVector::try_from_arrow_array(array)?),
|
||||
ArrowDataType::Timestamp(unit, _) => match unit {
|
||||
TimeUnit::Second => Arc::new(TimestampSecondVector::try_from_arrow_array(array)?),
|
||||
TimeUnit::Millisecond => {
|
||||
Arc::new(TimestampMillisecondVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
TimeUnit::Microsecond => {
|
||||
Arc::new(TimestampMicrosecondVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
TimeUnit::Nanosecond => {
|
||||
Arc::new(TimestampNanosecondVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
},
|
||||
ArrowDataType::Float16
|
||||
| ArrowDataType::Time32(_)
|
||||
| ArrowDataType::Time64(_)
|
||||
| ArrowDataType::Duration(_)
|
||||
| ArrowDataType::Interval(_)
|
||||
| ArrowDataType::Binary
|
||||
| ArrowDataType::FixedSizeBinary(_)
|
||||
| ArrowDataType::LargeUtf8
|
||||
| ArrowDataType::LargeList(_)
|
||||
| ArrowDataType::FixedSizeList(_, _)
|
||||
| ArrowDataType::Struct(_)
|
||||
| ArrowDataType::Union(_, _, _)
|
||||
| ArrowDataType::Dictionary(_, _)
|
||||
| ArrowDataType::Decimal128(_, _)
|
||||
| ArrowDataType::Decimal256(_, _)
|
||||
| ArrowDataType::Map(_, _) => {
|
||||
unimplemented!("Arrow array datatype: {:?}", array.as_ref().data_type())
|
||||
ArrowDataType::Timestamp(_, _) => {
|
||||
Arc::new(TimestampVector::try_from_arrow_array(array)?)
|
||||
}
|
||||
_ => unimplemented!("Arrow array datatype: {:?}", array.as_ref().data_type()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Try to cast slice of `arrays` to vectors.
|
||||
pub fn try_into_vectors(arrays: &[ArrayRef]) -> Result<Vec<VectorRef>> {
|
||||
arrays.iter().map(Self::try_into_vector).collect()
|
||||
}
|
||||
|
||||
/// Perform SQL like operation on `names` and a scalar `s`.
|
||||
pub fn like_utf8(names: Vec<String>, s: &str) -> Result<VectorRef> {
|
||||
let array = StringArray::from(names);
|
||||
let array = StringArray::from_slice(&names);
|
||||
|
||||
let filter = comparison::like_utf8_scalar(&array, s).context(error::ArrowComputeSnafu)?;
|
||||
let filter =
|
||||
compute::like::like_utf8_scalar(&array, s).context(error::ArrowComputeSnafu)?;
|
||||
|
||||
let result = compute::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
|
||||
let result = compute::filter::filter(&array, &filter).context(error::ArrowComputeSnafu)?;
|
||||
Helper::try_into_vector(result)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{
|
||||
ArrayRef, BooleanArray, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array,
|
||||
Int32Array, Int64Array, Int8Array, LargeBinaryArray, ListArray, NullArray,
|
||||
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
|
||||
TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
|
||||
};
|
||||
use arrow::datatypes::{Field, Int32Type};
|
||||
use common_time::{Date, DateTime};
|
||||
use arrow::array::Int32Array;
|
||||
use common_time::date::Date;
|
||||
use common_time::datetime::DateTime;
|
||||
|
||||
use super::*;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::ConcreteDataType;
|
||||
|
||||
#[test]
|
||||
fn test_try_into_vectors() {
|
||||
let arrays: Vec<ArrayRef> = vec![
|
||||
Arc::new(Int32Array::from(vec![1])),
|
||||
Arc::new(Int32Array::from(vec![2])),
|
||||
Arc::new(Int32Array::from(vec![3])),
|
||||
Arc::new(Int32Array::from_vec(vec![1])),
|
||||
Arc::new(Int32Array::from_vec(vec![2])),
|
||||
Arc::new(Int32Array::from_vec(vec![3])),
|
||||
];
|
||||
let vectors = Helper::try_into_vectors(&arrays);
|
||||
assert!(vectors.is_ok());
|
||||
@@ -315,10 +246,10 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_into_date_vector() {
|
||||
pub fn test_try_into_date_vector() {
|
||||
let vector = DateVector::from(vec![Some(1), Some(2), None]);
|
||||
let arrow_array = vector.to_arrow_array();
|
||||
assert_eq!(&ArrowDataType::Date32, arrow_array.data_type());
|
||||
assert_eq!(&arrow::datatypes::DataType::Date32, arrow_array.data_type());
|
||||
let vector_converted = Helper::try_into_vector(arrow_array).unwrap();
|
||||
assert_eq!(vector.len(), vector_converted.len());
|
||||
for i in 0..vector_converted.len() {
|
||||
@@ -327,7 +258,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_scalar_date_value() {
|
||||
pub fn test_try_from_scalar_date_value() {
|
||||
let vector = Helper::try_from_scalar_value(ScalarValue::Date32(Some(42)), 3).unwrap();
|
||||
assert_eq!(ConcreteDataType::date_datatype(), vector.data_type());
|
||||
assert_eq!(3, vector.len());
|
||||
@@ -337,7 +268,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_scalar_datetime_value() {
|
||||
pub fn test_try_from_scalar_datetime_value() {
|
||||
let vector = Helper::try_from_scalar_value(ScalarValue::Date64(Some(42)), 3).unwrap();
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), vector.data_type());
|
||||
assert_eq!(3, vector.len());
|
||||
@@ -346,28 +277,6 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_from_list_value() {
|
||||
let value = ScalarValue::List(
|
||||
Some(vec![
|
||||
ScalarValue::Int32(Some(1)),
|
||||
ScalarValue::Int32(Some(2)),
|
||||
]),
|
||||
Box::new(Field::new("item", ArrowDataType::Int32, true)),
|
||||
);
|
||||
let vector = Helper::try_from_scalar_value(value, 3).unwrap();
|
||||
assert_eq!(
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
|
||||
vector.data_type()
|
||||
);
|
||||
assert_eq!(3, vector.len());
|
||||
for i in 0..vector.len() {
|
||||
let v = vector.get(i);
|
||||
let items = v.as_list().unwrap().unwrap().items().as_ref().unwrap();
|
||||
assert_eq!(vec![Value::Int32(1), Value::Int32(2)], **items);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_like_utf8() {
|
||||
fn assert_vector(expected: Vec<&str>, actual: &VectorRef) {
|
||||
@@ -392,40 +301,4 @@ mod tests {
|
||||
let ret = Helper::like_utf8(names, "%").unwrap();
|
||||
assert_vector(vec!["greptime", "hello", "public", "world"], &ret);
|
||||
}
|
||||
|
||||
fn check_try_into_vector(array: impl Array + 'static) {
|
||||
let array: ArrayRef = Arc::new(array);
|
||||
let vector = Helper::try_into_vector(array.clone()).unwrap();
|
||||
assert_eq!(&array, &vector.to_arrow_array());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_into_vector() {
|
||||
check_try_into_vector(NullArray::new(2));
|
||||
check_try_into_vector(BooleanArray::from(vec![true, false]));
|
||||
check_try_into_vector(LargeBinaryArray::from(vec![
|
||||
"hello".as_bytes(),
|
||||
"world".as_bytes(),
|
||||
]));
|
||||
check_try_into_vector(Int8Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Int16Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Int32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Int64Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt8Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt16Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(UInt64Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Float32Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_try_into_vector(Float64Array::from(vec![1.0, 2.0, 3.0]));
|
||||
check_try_into_vector(StringArray::from(vec!["hello", "world"]));
|
||||
check_try_into_vector(Date32Array::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(Date64Array::from(vec![1, 2, 3]));
|
||||
let data = vec![None, Some(vec![Some(6), Some(7)])];
|
||||
let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
|
||||
check_try_into_vector(list_array);
|
||||
check_try_into_vector(TimestampSecondArray::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(TimestampMillisecondArray::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(TimestampMicrosecondArray::from(vec![1, 2, 3]));
|
||||
check_try_into_vector(TimestampNanosecondArray::from(vec![1, 2, 3]));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,48 +13,39 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
Array, ArrayData, ArrayRef, BooleanBufferBuilder, Int32BufferBuilder, ListArray,
|
||||
};
|
||||
use arrow::buffer::Buffer;
|
||||
use arrow::array::{Array, ArrayRef, ListArray};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use arrow::bitmap::MutableBitmap;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde_json::Value as JsonValue;
|
||||
use snafu::prelude::*;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::Result;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::prelude::*;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::ListType;
|
||||
use crate::value::{ListValue, ListValueRef, Value, ValueRef};
|
||||
use crate::vectors::{self, Helper, MutableVector, Validity, Vector, VectorRef};
|
||||
use crate::value::{ListValue, ListValueRef};
|
||||
use crate::vectors::{impl_try_from_arrow_array_for_vector, impl_validity_for_vector};
|
||||
|
||||
type ArrowListArray = ListArray<i32>;
|
||||
|
||||
/// Vector of Lists, basically backed by Arrow's `ListArray`.
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ListVector {
|
||||
array: ListArray,
|
||||
/// The datatype of the items in the list.
|
||||
item_type: ConcreteDataType,
|
||||
array: ArrowListArray,
|
||||
inner_datatype: ConcreteDataType,
|
||||
}
|
||||
|
||||
impl ListVector {
|
||||
/// Iterate elements as [VectorRef].
|
||||
pub fn values_iter(&self) -> impl Iterator<Item = Result<Option<VectorRef>>> + '_ {
|
||||
self.array
|
||||
.iter()
|
||||
.map(|value_opt| value_opt.map(Helper::try_into_vector).transpose())
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data_and_type(data: ArrayData, item_type: ConcreteDataType) -> Self {
|
||||
Self {
|
||||
array: ListArray::from(data),
|
||||
item_type,
|
||||
}
|
||||
/// Only iterate values in the [ListVector].
|
||||
///
|
||||
/// Be careful to use this method as it would ignore validity and replace null
|
||||
/// by empty vector.
|
||||
pub fn values_iter(&self) -> Box<dyn Iterator<Item = Result<VectorRef>> + '_> {
|
||||
Box::new(self.array.values_iter().map(VectorHelper::try_into_vector))
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
@@ -64,7 +55,7 @@ impl ListVector {
|
||||
|
||||
impl Vector for ListVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::List(ListType::new(self.item_type.clone()))
|
||||
ConcreteDataType::List(ListType::new(self.inner_datatype.clone()))
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
@@ -80,25 +71,21 @@ impl Vector for ListVector {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let data = self.to_array_data();
|
||||
Arc::new(ListArray::from(data))
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(ListArray::from(data))
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
impl_validity_for_vector!(self.array)
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
let offsets_bytes = self.array.offsets().len() * std::mem::size_of::<i64>();
|
||||
let value_refs_bytes = self.array.values().len() * std::mem::size_of::<Arc<dyn Array>>();
|
||||
offsets_bytes + value_refs_bytes
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
@@ -106,8 +93,7 @@ impl Vector for ListVector {
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data_and_type(data, self.item_type.clone()))
|
||||
Arc::new(ListVector::from(self.array.slice(offset, length)))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
@@ -116,7 +102,7 @@ impl Vector for ListVector {
|
||||
}
|
||||
|
||||
let array = &self.array.value(index);
|
||||
let vector = Helper::try_into_vector(array).unwrap_or_else(|_| {
|
||||
let vector = VectorHelper::try_into_vector(array).unwrap_or_else(|_| {
|
||||
panic!(
|
||||
"arrow array with datatype {:?} cannot converted to our vector",
|
||||
array.data_type()
|
||||
@@ -127,7 +113,7 @@ impl Vector for ListVector {
|
||||
.collect::<Vec<Value>>();
|
||||
Value::List(ListValue::new(
|
||||
Some(Box::new(values)),
|
||||
self.item_type.clone(),
|
||||
self.inner_datatype.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
@@ -145,7 +131,7 @@ impl Serializable for ListVector {
|
||||
.iter()
|
||||
.map(|v| match v {
|
||||
None => Ok(JsonValue::Null),
|
||||
Some(v) => Helper::try_into_vector(v)
|
||||
Some(v) => VectorHelper::try_into_vector(v)
|
||||
.and_then(|v| v.serialize_to_json())
|
||||
.map(JsonValue::Array),
|
||||
})
|
||||
@@ -153,64 +139,70 @@ impl Serializable for ListVector {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ListArray> for ListVector {
|
||||
fn from(array: ListArray) -> Self {
|
||||
let item_type = ConcreteDataType::from_arrow_type(match array.data_type() {
|
||||
ArrowDataType::List(field) => field.data_type(),
|
||||
other => panic!(
|
||||
"Try to create ListVector from an arrow array with type {:?}",
|
||||
other
|
||||
),
|
||||
impl From<ArrowListArray> for ListVector {
|
||||
fn from(array: ArrowListArray) -> Self {
|
||||
let inner_datatype = ConcreteDataType::from_arrow_type(match array.data_type() {
|
||||
ArrowDataType::List(field) => &field.data_type,
|
||||
_ => unreachable!(),
|
||||
});
|
||||
Self { array, item_type }
|
||||
Self {
|
||||
array,
|
||||
inner_datatype,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vectors::impl_try_from_arrow_array_for_vector!(ListArray, ListVector);
|
||||
impl_try_from_arrow_array_for_vector!(ArrowListArray, ListVector);
|
||||
|
||||
pub struct ListIter<'a> {
|
||||
pub struct ListVectorIter<'a> {
|
||||
vector: &'a ListVector,
|
||||
idx: usize,
|
||||
iter: ZipValidity<'a, usize, Range<usize>>,
|
||||
}
|
||||
|
||||
impl<'a> ListIter<'a> {
|
||||
fn new(vector: &'a ListVector) -> ListIter {
|
||||
ListIter { vector, idx: 0 }
|
||||
impl<'a> ListVectorIter<'a> {
|
||||
pub fn new(vector: &'a ListVector) -> ListVectorIter<'a> {
|
||||
let iter = ZipValidity::new(
|
||||
0..vector.len(),
|
||||
vector.array.validity().as_ref().map(|x| x.iter()),
|
||||
);
|
||||
|
||||
Self { vector, iter }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for ListIter<'a> {
|
||||
impl<'a> Iterator for ListVectorIter<'a> {
|
||||
type Item = Option<ListValueRef<'a>>;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.idx >= self.vector.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let idx = self.idx;
|
||||
self.idx += 1;
|
||||
|
||||
if self.vector.is_null(idx) {
|
||||
return Some(None);
|
||||
}
|
||||
|
||||
Some(Some(ListValueRef::Indexed {
|
||||
vector: self.vector,
|
||||
idx,
|
||||
}))
|
||||
self.iter.next().map(|idx_opt| {
|
||||
idx_opt.map(|idx| ListValueRef::Indexed {
|
||||
vector: self.vector,
|
||||
idx,
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
(self.vector.len(), Some(self.vector.len()))
|
||||
self.iter.size_hint()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn nth(&mut self, n: usize) -> Option<Self::Item> {
|
||||
self.iter.nth(n).map(|idx_opt| {
|
||||
idx_opt.map(|idx| ListValueRef::Indexed {
|
||||
vector: self.vector,
|
||||
idx,
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for ListVector {
|
||||
type OwnedItem = ListValue;
|
||||
type RefItem<'a> = ListValueRef<'a>;
|
||||
type Iter<'a> = ListIter<'a>;
|
||||
type Iter<'a> = ListVectorIter<'a>;
|
||||
type Builder = ListVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
@@ -222,68 +214,86 @@ impl ScalarVector for ListVector {
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
ListIter::new(self)
|
||||
ListVectorIter::new(self)
|
||||
}
|
||||
}
|
||||
|
||||
// Ports from arrow's GenericListBuilder.
|
||||
// See https://github.com/apache/arrow-rs/blob/94565bca99b5d9932a3e9a8e094aaf4e4384b1e5/arrow-array/src/builder/generic_list_builder.rs
|
||||
/// [ListVector] builder.
|
||||
// Some codes are ported from arrow2's MutableListArray.
|
||||
pub struct ListVectorBuilder {
|
||||
item_type: ConcreteDataType,
|
||||
offsets_builder: Int32BufferBuilder,
|
||||
null_buffer_builder: NullBufferBuilder,
|
||||
values_builder: Box<dyn MutableVector>,
|
||||
inner_type: ConcreteDataType,
|
||||
offsets: Vec<i32>,
|
||||
values: Box<dyn MutableVector>,
|
||||
validity: Option<MutableBitmap>,
|
||||
}
|
||||
|
||||
impl ListVectorBuilder {
|
||||
/// Creates a new [`ListVectorBuilder`]. `item_type` is the data type of the list item, `capacity`
|
||||
/// is the number of items to pre-allocate space for in this builder.
|
||||
pub fn with_type_capacity(item_type: ConcreteDataType, capacity: usize) -> ListVectorBuilder {
|
||||
let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
|
||||
offsets_builder.append(0);
|
||||
// The actual required capacity might be greater than the capacity of the `ListVector`
|
||||
// if the child vector has more than one element.
|
||||
let values_builder = item_type.create_mutable_vector(capacity);
|
||||
pub fn with_type_capacity(inner_type: ConcreteDataType, capacity: usize) -> ListVectorBuilder {
|
||||
let mut offsets = Vec::with_capacity(capacity + 1);
|
||||
offsets.push(0);
|
||||
// The actual required capacity might greater than the capacity of the `ListVector`
|
||||
// if there exists child vector that has more than one element.
|
||||
let values = inner_type.create_mutable_vector(capacity);
|
||||
|
||||
ListVectorBuilder {
|
||||
item_type,
|
||||
offsets_builder,
|
||||
null_buffer_builder: NullBufferBuilder::new(capacity),
|
||||
values_builder,
|
||||
inner_type,
|
||||
offsets,
|
||||
values,
|
||||
validity: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Finish the current variable-length list vector slot.
|
||||
fn finish_list(&mut self, is_valid: bool) {
|
||||
self.offsets_builder
|
||||
.append(i32::try_from(self.values_builder.len()).unwrap());
|
||||
self.null_buffer_builder.append(is_valid);
|
||||
#[inline]
|
||||
fn last_offset(&self) -> i32 {
|
||||
*self.offsets.last().unwrap()
|
||||
}
|
||||
|
||||
fn push_null(&mut self) {
|
||||
self.finish_list(false);
|
||||
self.offsets.push(self.last_offset());
|
||||
match &mut self.validity {
|
||||
Some(validity) => validity.push(false),
|
||||
None => self.init_validity(),
|
||||
}
|
||||
}
|
||||
|
||||
fn init_validity(&mut self) {
|
||||
let len = self.offsets.len() - 1;
|
||||
|
||||
let mut validity = MutableBitmap::with_capacity(self.offsets.capacity());
|
||||
validity.extend_constant(len, true);
|
||||
validity.set(len - 1, false);
|
||||
self.validity = Some(validity)
|
||||
}
|
||||
|
||||
fn push_list_value(&mut self, list_value: &ListValue) -> Result<()> {
|
||||
if let Some(items) = list_value.items() {
|
||||
for item in &**items {
|
||||
self.values_builder.push_value_ref(item.as_value_ref())?;
|
||||
self.values.push_value_ref(item.as_value_ref())?;
|
||||
}
|
||||
}
|
||||
|
||||
self.finish_list(true);
|
||||
self.push_valid();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Needs to be called when a valid value was extended to this builder.
|
||||
fn push_valid(&mut self) {
|
||||
let size = self.values.len();
|
||||
let size = i32::try_from(size).unwrap();
|
||||
assert!(size >= *self.offsets.last().unwrap());
|
||||
|
||||
self.offsets.push(size);
|
||||
if let Some(validity) = &mut self.validity {
|
||||
validity.push(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MutableVector for ListVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::list_datatype(self.item_type.clone())
|
||||
ConcreteDataType::list_datatype(self.inner_type.clone())
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.null_buffer_builder.len()
|
||||
self.offsets.len() - 1
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
@@ -338,181 +348,51 @@ impl ScalarVectorBuilder for ListVectorBuilder {
|
||||
self.push_value_ref(value.into()).unwrap_or_else(|e| {
|
||||
panic!(
|
||||
"Failed to push value, expect value type {:?}, err:{}",
|
||||
self.item_type, e
|
||||
self.inner_type, e
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
let len = self.len();
|
||||
let values_vector = self.values_builder.to_vector();
|
||||
let values_arr = values_vector.to_arrow_array();
|
||||
let values_data = values_arr.data();
|
||||
|
||||
let offset_buffer = self.offsets_builder.finish();
|
||||
let null_bit_buffer = self.null_buffer_builder.finish();
|
||||
// Re-initialize the offsets_builder.
|
||||
self.offsets_builder.append(0);
|
||||
let data_type = ConcreteDataType::list_datatype(self.item_type.clone()).as_arrow_type();
|
||||
let array_data_builder = ArrayData::builder(data_type)
|
||||
.len(len)
|
||||
.add_buffer(offset_buffer)
|
||||
.add_child_data(values_data.clone())
|
||||
.null_bit_buffer(null_bit_buffer);
|
||||
|
||||
let array_data = unsafe { array_data_builder.build_unchecked() };
|
||||
let array = ListArray::from(array_data);
|
||||
let array = ArrowListArray::try_new(
|
||||
ConcreteDataType::list_datatype(self.inner_type.clone()).as_arrow_type(),
|
||||
std::mem::take(&mut self.offsets).into(),
|
||||
self.values.to_vector().to_arrow_array(),
|
||||
std::mem::take(&mut self.validity).map(|x| x.into()),
|
||||
)
|
||||
.unwrap(); // The `ListVectorBuilder` itself should ensure it always builds a valid array.
|
||||
|
||||
ListVector {
|
||||
array,
|
||||
item_type: self.item_type.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ports from https://github.com/apache/arrow-rs/blob/94565bca99b5d9932a3e9a8e094aaf4e4384b1e5/arrow-array/src/builder/null_buffer_builder.rs
|
||||
/// Builder for creating the null bit buffer.
|
||||
/// This builder only materializes the buffer when we append `false`.
|
||||
/// If you only append `true`s to the builder, what you get will be
|
||||
/// `None` when calling [`finish`](#method.finish).
|
||||
/// This optimization is **very** important for the performance.
|
||||
#[derive(Debug)]
|
||||
struct NullBufferBuilder {
|
||||
bitmap_builder: Option<BooleanBufferBuilder>,
|
||||
/// Store the length of the buffer before materializing.
|
||||
len: usize,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl NullBufferBuilder {
|
||||
/// Creates a new empty builder.
|
||||
/// `capacity` is the number of bits in the null buffer.
|
||||
fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
bitmap_builder: None,
|
||||
len: 0,
|
||||
capacity,
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
if let Some(b) = &self.bitmap_builder {
|
||||
b.len()
|
||||
} else {
|
||||
self.len
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends a `true` into the builder
|
||||
/// to indicate that this item is not null.
|
||||
#[inline]
|
||||
fn append_non_null(&mut self) {
|
||||
if let Some(buf) = self.bitmap_builder.as_mut() {
|
||||
buf.append(true)
|
||||
} else {
|
||||
self.len += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Appends a `false` into the builder
|
||||
/// to indicate that this item is null.
|
||||
#[inline]
|
||||
fn append_null(&mut self) {
|
||||
self.materialize_if_needed();
|
||||
self.bitmap_builder.as_mut().unwrap().append(false);
|
||||
}
|
||||
|
||||
/// Appends a boolean value into the builder.
|
||||
#[inline]
|
||||
fn append(&mut self, not_null: bool) {
|
||||
if not_null {
|
||||
self.append_non_null()
|
||||
} else {
|
||||
self.append_null()
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the null buffer and resets the builder.
|
||||
/// Returns `None` if the builder only contains `true`s.
|
||||
fn finish(&mut self) -> Option<Buffer> {
|
||||
let buf = self.bitmap_builder.as_mut().map(|b| b.finish());
|
||||
self.bitmap_builder = None;
|
||||
self.len = 0;
|
||||
buf
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn materialize_if_needed(&mut self) {
|
||||
if self.bitmap_builder.is_none() {
|
||||
self.materialize()
|
||||
}
|
||||
}
|
||||
|
||||
#[cold]
|
||||
fn materialize(&mut self) {
|
||||
if self.bitmap_builder.is_none() {
|
||||
let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity));
|
||||
b.append_n(self.len, true);
|
||||
self.bitmap_builder = Some(b);
|
||||
inner_datatype: self.inner_type.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use arrow::array::{Int32Array, Int32Builder, ListBuilder};
|
||||
mod tests {
|
||||
use arrow::array::{MutableListArray, MutablePrimitiveArray, TryExtend};
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
use crate::scalars::ScalarRef;
|
||||
use crate::types::ListType;
|
||||
use crate::vectors::Int32Vector;
|
||||
|
||||
pub fn new_list_vector(data: &[Option<Vec<Option<i32>>>]) -> ListVector {
|
||||
let mut builder =
|
||||
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8);
|
||||
for vec_opt in data {
|
||||
if let Some(vec) = vec_opt {
|
||||
let values = vec.iter().map(|v| Value::from(*v)).collect();
|
||||
let values = Some(Box::new(values));
|
||||
let list_value = ListValue::new(values, ConcreteDataType::int32_datatype());
|
||||
|
||||
builder.push(Some(ListValueRef::Ref { val: &list_value }));
|
||||
} else {
|
||||
builder.push(None);
|
||||
}
|
||||
}
|
||||
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn new_list_array(data: &[Option<Vec<Option<i32>>>]) -> ListArray {
|
||||
let mut builder = ListBuilder::new(Int32Builder::new());
|
||||
for vec_opt in data {
|
||||
if let Some(vec) = vec_opt {
|
||||
for value_opt in vec {
|
||||
builder.values().append_option(*value_opt);
|
||||
}
|
||||
|
||||
builder.append(true);
|
||||
} else {
|
||||
builder.append(false);
|
||||
}
|
||||
}
|
||||
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_vector() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
Some(vec![Some(1i32), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let list_vector = new_list_vector(&data);
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i32>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
|
||||
let list_vector = ListVector {
|
||||
array: arrow_array.clone(),
|
||||
inner_datatype: ConcreteDataType::int32_datatype(),
|
||||
};
|
||||
assert_eq!(
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())),
|
||||
list_vector.data_type()
|
||||
@@ -523,34 +403,30 @@ pub mod tests {
|
||||
assert!(list_vector.is_null(1));
|
||||
assert!(!list_vector.is_null(2));
|
||||
|
||||
let arrow_array = new_list_array(&data);
|
||||
assert_eq!(
|
||||
arrow_array,
|
||||
*list_vector
|
||||
list_vector
|
||||
.to_arrow_array()
|
||||
.as_any()
|
||||
.downcast_ref::<ListArray>()
|
||||
.downcast_ref::<ArrowListArray>()
|
||||
.unwrap()
|
||||
.clone()
|
||||
);
|
||||
let validity = list_vector.validity();
|
||||
assert!(!validity.is_all_null());
|
||||
assert!(!validity.is_all_valid());
|
||||
assert!(validity.is_set(0));
|
||||
assert!(!validity.is_set(1));
|
||||
assert!(validity.is_set(2));
|
||||
assert_eq!(256, list_vector.memory_size());
|
||||
|
||||
let slice = list_vector.slice(0, 2).to_arrow_array();
|
||||
let sliced_array = slice.as_any().downcast_ref::<ListArray>().unwrap();
|
||||
assert_eq!(
|
||||
Int32Array::from_iter_values([1, 2, 3]),
|
||||
*sliced_array
|
||||
.value(0)
|
||||
.as_any()
|
||||
.downcast_ref::<Int32Array>()
|
||||
.unwrap()
|
||||
Validity::Slots(arrow_array.validity().unwrap()),
|
||||
list_vector.validity()
|
||||
);
|
||||
assert_eq!(
|
||||
arrow_array.offsets().len() * std::mem::size_of::<i64>()
|
||||
+ arrow_array.values().len() * std::mem::size_of::<Arc<dyn Array>>(),
|
||||
list_vector.memory_size()
|
||||
);
|
||||
|
||||
let slice = list_vector.slice(0, 2);
|
||||
assert_eq!(
|
||||
"ListArray[[1, 2, 3], None]",
|
||||
format!("{:?}", slice.to_arrow_array())
|
||||
);
|
||||
assert!(sliced_array.is_null(1));
|
||||
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
@@ -591,48 +467,52 @@ pub mod tests {
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
Some(vec![Some(1u32), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let arrow_array = new_list_array(&data);
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<u32>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
let array_ref: ArrayRef = Arc::new(arrow_array);
|
||||
let expect = new_list_vector(&data);
|
||||
|
||||
// Test try from ArrayRef
|
||||
let list_vector = ListVector::try_from_arrow_array(array_ref).unwrap();
|
||||
assert_eq!(expect, list_vector);
|
||||
|
||||
// Test from
|
||||
let arrow_array = new_list_array(&data);
|
||||
let list_vector = ListVector::from(arrow_array);
|
||||
assert_eq!(expect, list_vector);
|
||||
assert_eq!(
|
||||
"ListVector { array: ListArray[[1, 2, 3], None, [4, None, 6]], inner_datatype: UInt32(UInt32) }",
|
||||
format!("{:?}", list_vector)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iter_list_vector_values() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
Some(vec![Some(1i64), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let list_vector = new_list_vector(&data);
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i64>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
|
||||
let list_vector = ListVector::from(arrow_array);
|
||||
assert_eq!(
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())),
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())),
|
||||
list_vector.data_type()
|
||||
);
|
||||
let mut iter = list_vector.values_iter();
|
||||
assert_eq!(
|
||||
Arc::new(Int32Vector::from_slice(&[1, 2, 3])) as VectorRef,
|
||||
*iter.next().unwrap().unwrap().unwrap()
|
||||
"Int64[1, 2, 3]",
|
||||
format!("{:?}", iter.next().unwrap().unwrap().to_arrow_array())
|
||||
);
|
||||
assert!(iter.next().unwrap().unwrap().is_none());
|
||||
assert_eq!(
|
||||
Arc::new(Int32Vector::from(vec![Some(4), None, Some(6)])) as VectorRef,
|
||||
*iter.next().unwrap().unwrap().unwrap(),
|
||||
"Int64[]",
|
||||
format!("{:?}", iter.next().unwrap().unwrap().to_arrow_array())
|
||||
);
|
||||
assert_eq!(
|
||||
"Int64[4, None, 6]",
|
||||
format!("{:?}", iter.next().unwrap().unwrap().to_arrow_array())
|
||||
);
|
||||
assert!(iter.next().is_none())
|
||||
}
|
||||
@@ -640,18 +520,30 @@ pub mod tests {
|
||||
#[test]
|
||||
fn test_serialize_to_json() {
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
Some(vec![Some(1i64), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
];
|
||||
|
||||
let list_vector = new_list_vector(&data);
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i64>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
|
||||
let list_vector = ListVector::from(arrow_array);
|
||||
assert_eq!(
|
||||
vec![json!([1, 2, 3]), json!(null), json!([4, null, 6]),],
|
||||
list_vector.serialize_to_json().unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
fn new_list_vector(data: Vec<Option<Vec<Option<i32>>>>) -> ListVector {
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i32>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
|
||||
ListVector::from(arrow_array)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_vector_builder() {
|
||||
let mut builder =
|
||||
@@ -675,14 +567,14 @@ pub mod tests {
|
||||
None,
|
||||
Some(vec![Some(7), Some(8), None]),
|
||||
];
|
||||
let input = new_list_vector(&data);
|
||||
let input = new_list_vector(data);
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(new_list_vector(&[
|
||||
let expect: VectorRef = Arc::new(new_list_vector(vec![
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
None,
|
||||
Some(vec![Some(7), Some(8), None]),
|
||||
@@ -707,7 +599,7 @@ pub mod tests {
|
||||
}));
|
||||
let vector = builder.finish();
|
||||
|
||||
let expect = new_list_vector(&[None, Some(vec![Some(4), None, Some(6)])]);
|
||||
let expect = new_list_vector(vec![None, Some(vec![Some(4), None, Some(6)])]);
|
||||
assert_eq!(expect, vector);
|
||||
|
||||
assert!(vector.get_data(0).is_none());
|
||||
|
||||
@@ -16,7 +16,8 @@ use std::any::Any;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayData, ArrayRef, NullArray};
|
||||
use arrow::array::{Array, ArrayRef, NullArray};
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
@@ -26,28 +27,21 @@ use crate::types::NullType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// A vector where all elements are nulls.
|
||||
#[derive(PartialEq)]
|
||||
pub struct NullVector {
|
||||
array: NullArray,
|
||||
}
|
||||
|
||||
// TODO(yingwen): Support null vector with other logical types.
|
||||
impl NullVector {
|
||||
/// Create a new `NullVector` with `n` elements.
|
||||
pub fn new(n: usize) -> Self {
|
||||
Self {
|
||||
array: NullArray::new(n),
|
||||
array: NullArray::new(ArrowDataType::Null, n),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NullArray> for NullVector {
|
||||
@@ -74,28 +68,21 @@ impl Vector for NullVector {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
// TODO(yingwen): Replaced by clone after upgrading to arrow 28.0.
|
||||
let data = self.to_array_data();
|
||||
Arc::new(NullArray::from(data))
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(NullArray::from(data))
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
Validity::all_null(self.array.len())
|
||||
Validity::AllNull
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
}
|
||||
|
||||
fn is_null(&self, _row: usize) -> bool {
|
||||
true
|
||||
}
|
||||
@@ -230,7 +217,7 @@ mod tests {
|
||||
|
||||
assert_eq!("NullVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_null());
|
||||
assert_eq!(Validity::AllNull, v.validity());
|
||||
assert!(v.only_null());
|
||||
|
||||
for i in 0..32 {
|
||||
@@ -259,7 +246,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_null_vector_validity() {
|
||||
let vector = NullVector::new(5);
|
||||
assert!(vector.validity().is_all_null());
|
||||
assert_eq!(Validity::AllNull, vector.validity());
|
||||
assert_eq!(5, vector.null_count());
|
||||
}
|
||||
|
||||
|
||||
@@ -19,11 +19,10 @@ mod replicate;
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::types::LogicalPrimitiveType;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::types::PrimitiveElement;
|
||||
use crate::vectors::{
|
||||
BinaryVector, BooleanVector, ListVector, NullVector, PrimitiveVector, StringVector, Vector,
|
||||
VectorRef,
|
||||
BinaryVector, BooleanVector, ConstantVector, DateTimeVector, DateVector, ListVector,
|
||||
NullVector, PrimitiveVector, StringVector, TimestampVector, Vector, VectorRef,
|
||||
};
|
||||
|
||||
/// Vector compute operations.
|
||||
@@ -60,10 +59,10 @@ pub trait VectorOp {
|
||||
}
|
||||
|
||||
macro_rules! impl_scalar_vector_op {
|
||||
($($VectorType: ident),+) => {$(
|
||||
($( { $VectorType: ident, $replicate: ident } ),+) => {$(
|
||||
impl VectorOp for $VectorType {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
replicate::replicate_scalar(self, offsets)
|
||||
replicate::$replicate(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
@@ -78,21 +77,28 @@ macro_rules! impl_scalar_vector_op {
|
||||
)+};
|
||||
}
|
||||
|
||||
impl_scalar_vector_op!(BinaryVector, BooleanVector, ListVector, StringVector);
|
||||
impl_scalar_vector_op!(
|
||||
{ BinaryVector, replicate_scalar },
|
||||
{ BooleanVector, replicate_scalar },
|
||||
{ ListVector, replicate_scalar },
|
||||
{ StringVector, replicate_scalar },
|
||||
{ DateVector, replicate_date },
|
||||
{ DateTimeVector, replicate_datetime },
|
||||
{ TimestampVector, replicate_timestamp }
|
||||
);
|
||||
|
||||
impl<T: LogicalPrimitiveType> VectorOp for PrimitiveVector<T> {
|
||||
impl VectorOp for ConstantVector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
std::sync::Arc::new(replicate::replicate_primitive(self, offsets))
|
||||
replicate::replicate_constant(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector =
|
||||
prev_vector.and_then(|pv| pv.as_any().downcast_ref::<PrimitiveVector<T>>());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<ConstantVector>());
|
||||
find_unique::find_unique_constant(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
filter::filter_non_constant!(self, PrimitiveVector<T>, filter)
|
||||
filter::filter_constant(self, filter)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,17 +117,21 @@ impl VectorOp for NullVector {
|
||||
}
|
||||
}
|
||||
|
||||
impl VectorOp for ConstantVector {
|
||||
impl<T> VectorOp for PrimitiveVector<T>
|
||||
where
|
||||
T: PrimitiveElement,
|
||||
{
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
self.replicate_vector(offsets)
|
||||
replicate::replicate_primitive(self, offsets)
|
||||
}
|
||||
|
||||
fn find_unique(&self, selected: &mut BitVec, prev_vector: Option<&dyn Vector>) {
|
||||
let prev_vector = prev_vector.and_then(|pv| pv.as_any().downcast_ref::<ConstantVector>());
|
||||
find_unique::find_unique_constant(self, selected, prev_vector);
|
||||
let prev_vector =
|
||||
prev_vector.and_then(|pv| pv.as_any().downcast_ref::<PrimitiveVector<T>>());
|
||||
find_unique::find_unique_scalar(self, selected, prev_vector);
|
||||
}
|
||||
|
||||
fn filter(&self, filter: &BooleanVector) -> Result<VectorRef> {
|
||||
self.filter_vector(filter)
|
||||
filter::filter_non_constant!(self, PrimitiveVector<T>, filter)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,15 +12,16 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
pub(crate) use crate::vectors::constant::filter_constant;
|
||||
|
||||
macro_rules! filter_non_constant {
|
||||
($vector: expr, $VectorType: ty, $filter: ident) => {{
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::compute;
|
||||
use snafu::ResultExt;
|
||||
|
||||
let arrow_array = $vector.as_arrow();
|
||||
let filtered = compute::filter(arrow_array, $filter.as_boolean_array())
|
||||
let filtered = arrow::compute::filter::filter(arrow_array, $filter.as_boolean_array())
|
||||
.context(crate::error::ArrowComputeSnafu)?;
|
||||
Ok(Arc::new(<$VectorType>::try_from_arrow_array(filtered)?))
|
||||
}};
|
||||
@@ -32,16 +33,9 @@ pub(crate) use filter_non_constant;
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::timestamp::{
|
||||
TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
|
||||
};
|
||||
use crate::types::WrapperType;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{
|
||||
BooleanVector, Int32Vector, NullVector, StringVector, VectorOp, VectorRef,
|
||||
BooleanVector, ConstantVector, Int32Vector, NullVector, StringVector, VectorOp, VectorRef,
|
||||
};
|
||||
|
||||
fn check_filter_primitive(expect: &[i32], input: &[i32], filter: &[bool]) {
|
||||
@@ -111,6 +105,7 @@ mod tests {
|
||||
($VectorType: ident, $ValueType: ident, $method: ident) => {{
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::$ValueType;
|
||||
use $crate::vectors::{$VectorType, VectorRef};
|
||||
|
||||
let v = $VectorType::from_iterator((0..5).map($ValueType::$method));
|
||||
@@ -128,18 +123,6 @@ mod tests {
|
||||
fn test_filter_date_like() {
|
||||
impl_filter_date_like_test!(DateVector, Date, new);
|
||||
impl_filter_date_like_test!(DateTimeVector, DateTime, new);
|
||||
|
||||
impl_filter_date_like_test!(TimestampSecondVector, TimestampSecond, from_native);
|
||||
impl_filter_date_like_test!(
|
||||
TimestampMillisecondVector,
|
||||
TimestampMillisecond,
|
||||
from_native
|
||||
);
|
||||
impl_filter_date_like_test!(
|
||||
TimestampMicrosecondVector,
|
||||
TimestampMicrosecond,
|
||||
from_native
|
||||
);
|
||||
impl_filter_date_like_test!(TimestampNanosecondVector, TimestampNanosecond, from_native);
|
||||
impl_filter_date_like_test!(TimestampVector, Timestamp, from_millis);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,8 +15,7 @@
|
||||
use common_base::BitVec;
|
||||
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{NullVector, Vector};
|
||||
use crate::vectors::{ConstantVector, NullVector, Vector};
|
||||
|
||||
// To implement `find_unique()` correctly, we need to keep in mind that always marks an element as
|
||||
// selected when it is different from the previous one, and leaves the `selected` unchanged
|
||||
@@ -71,7 +70,7 @@ pub(crate) fn find_unique_null(
|
||||
return;
|
||||
}
|
||||
|
||||
let is_first_not_duplicate = prev_vector.map(NullVector::is_empty).unwrap_or(true);
|
||||
let is_first_not_duplicate = prev_vector.map(|pv| pv.is_empty()).unwrap_or(true);
|
||||
if is_first_not_duplicate {
|
||||
selected.set(0, true);
|
||||
}
|
||||
@@ -105,11 +104,8 @@ pub(crate) fn find_unique_constant(
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::{Date, DateTime};
|
||||
|
||||
use super::*;
|
||||
use crate::timestamp::*;
|
||||
use crate::vectors::{Int32Vector, StringVector, Vector, VectorOp};
|
||||
use crate::vectors::{Int32Vector, StringVector, VectorOp};
|
||||
|
||||
fn check_bitmap(expect: &[bool], selected: &BitVec) {
|
||||
let actual = selected.iter().collect::<Vec<_>>();
|
||||
@@ -125,7 +121,7 @@ mod tests {
|
||||
input: impl Iterator<Item = Option<i32>>,
|
||||
prev: Option<&[i32]>,
|
||||
) {
|
||||
let input = Int32Vector::from(input.collect::<Vec<_>>());
|
||||
let input = Int32Vector::from_iter(input);
|
||||
let prev = prev.map(Int32Vector::from_slice);
|
||||
|
||||
let mut selected = BitVec::repeat(false, input.len());
|
||||
@@ -345,6 +341,7 @@ mod tests {
|
||||
|
||||
macro_rules! impl_find_unique_date_like_test {
|
||||
($VectorType: ident, $ValueType: ident, $method: ident) => {{
|
||||
use common_time::$ValueType;
|
||||
use $crate::vectors::$VectorType;
|
||||
|
||||
let v = $VectorType::from_iterator([8, 8, 9, 10].into_iter().map($ValueType::$method));
|
||||
@@ -359,9 +356,6 @@ mod tests {
|
||||
fn test_find_unique_date_like() {
|
||||
impl_find_unique_date_like_test!(DateVector, Date, new);
|
||||
impl_find_unique_date_like_test!(DateTimeVector, DateTime, new);
|
||||
impl_find_unique_date_like_test!(TimestampSecondVector, TimestampSecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMillisecondVector, TimestampMillisecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampMicrosecondVector, TimestampMicrosecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampNanosecondVector, TimestampNanosecond, from);
|
||||
impl_find_unique_date_like_test!(TimestampVector, Timestamp, from_millis);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,8 +13,12 @@
|
||||
// limitations under the License.
|
||||
|
||||
use crate::prelude::*;
|
||||
pub(crate) use crate::vectors::constant::replicate_constant;
|
||||
pub(crate) use crate::vectors::date::replicate_date;
|
||||
pub(crate) use crate::vectors::datetime::replicate_datetime;
|
||||
pub(crate) use crate::vectors::null::replicate_null;
|
||||
pub(crate) use crate::vectors::primitive::replicate_primitive;
|
||||
pub(crate) use crate::vectors::timestamp::replicate_timestamp;
|
||||
|
||||
pub(crate) fn replicate_scalar<C: ScalarVector>(c: &C, offsets: &[usize]) -> VectorRef {
|
||||
assert_eq!(offsets.len(), c.len());
|
||||
@@ -39,13 +43,8 @@ pub(crate) fn replicate_scalar<C: ScalarVector>(c: &C, offsets: &[usize]) -> Vec
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use common_time::{Date, DateTime, Timestamp};
|
||||
use paste::paste;
|
||||
|
||||
use super::*;
|
||||
use crate::vectors::constant::ConstantVector;
|
||||
use crate::vectors::{Int32Vector, NullVector, StringVector, VectorOp};
|
||||
use crate::vectors::{ConstantVector, Int32Vector, NullVector, StringVector, VectorOp};
|
||||
|
||||
#[test]
|
||||
fn test_replicate_primitive() {
|
||||
@@ -121,6 +120,7 @@ mod tests {
|
||||
|
||||
macro_rules! impl_replicate_date_like_test {
|
||||
($VectorType: ident, $ValueType: ident, $method: ident) => {{
|
||||
use common_time::$ValueType;
|
||||
use $crate::vectors::$VectorType;
|
||||
|
||||
let v = $VectorType::from_iterator((0..5).map($ValueType::$method));
|
||||
@@ -138,33 +138,10 @@ mod tests {
|
||||
}};
|
||||
}
|
||||
|
||||
macro_rules! impl_replicate_timestamp_test {
|
||||
($unit: ident) => {{
|
||||
paste!{
|
||||
use $crate::vectors::[<Timestamp $unit Vector>];
|
||||
use $crate::timestamp::[<Timestamp $unit>];
|
||||
let v = [<Timestamp $unit Vector>]::from_iterator((0..5).map([<Timestamp $unit>]::from));
|
||||
let offsets = [0, 1, 2, 3, 4];
|
||||
let v = v.replicate(&offsets);
|
||||
assert_eq!(4, v.len());
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(i as i64 + 1, TimeUnit::$unit)),
|
||||
v.get(i)
|
||||
);
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replicate_date_like() {
|
||||
impl_replicate_date_like_test!(DateVector, Date, new);
|
||||
impl_replicate_date_like_test!(DateTimeVector, DateTime, new);
|
||||
|
||||
impl_replicate_timestamp_test!(Second);
|
||||
impl_replicate_timestamp_test!(Millisecond);
|
||||
impl_replicate_timestamp_test!(Microsecond);
|
||||
impl_replicate_timestamp_test!(Nanosecond);
|
||||
impl_replicate_date_like_test!(TimestampVector, Timestamp, from_millis);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,111 +13,75 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt;
|
||||
use std::iter::FromIterator;
|
||||
use std::slice::Iter;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{
|
||||
Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef, PrimitiveArray, PrimitiveBuilder,
|
||||
};
|
||||
use arrow::array::{Array, ArrayRef, MutableArray, MutablePrimitiveArray, PrimitiveArray};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use serde_json::Value as JsonValue;
|
||||
use snafu::OptionExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{ConversionSnafu, Result, SerializeSnafu};
|
||||
use crate::scalars::{Scalar, ScalarRef, ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::{
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, LogicalPrimitiveType,
|
||||
UInt16Type, UInt32Type, UInt64Type, UInt8Type, WrapperType,
|
||||
};
|
||||
use crate::types::{Primitive, PrimitiveElement};
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
pub type UInt8Vector = PrimitiveVector<UInt8Type>;
|
||||
pub type UInt16Vector = PrimitiveVector<UInt16Type>;
|
||||
pub type UInt32Vector = PrimitiveVector<UInt32Type>;
|
||||
pub type UInt64Vector = PrimitiveVector<UInt64Type>;
|
||||
|
||||
pub type Int8Vector = PrimitiveVector<Int8Type>;
|
||||
pub type Int16Vector = PrimitiveVector<Int16Type>;
|
||||
pub type Int32Vector = PrimitiveVector<Int32Type>;
|
||||
pub type Int64Vector = PrimitiveVector<Int64Type>;
|
||||
|
||||
pub type Float32Vector = PrimitiveVector<Float32Type>;
|
||||
pub type Float64Vector = PrimitiveVector<Float64Type>;
|
||||
|
||||
/// Vector for primitive data types.
|
||||
pub struct PrimitiveVector<T: LogicalPrimitiveType> {
|
||||
array: PrimitiveArray<T::ArrowPrimitive>,
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct PrimitiveVector<T: Primitive> {
|
||||
pub(crate) array: PrimitiveArray<T>,
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> PrimitiveVector<T> {
|
||||
pub fn new(array: PrimitiveArray<T::ArrowPrimitive>) -> Self {
|
||||
impl<T: Primitive> PrimitiveVector<T> {
|
||||
pub fn new(array: PrimitiveArray<T>) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
let data = array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<T::ArrowPrimitive>>()
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.data()
|
||||
.clone();
|
||||
let concrete_array = PrimitiveArray::<T::ArrowPrimitive>::from(data);
|
||||
Ok(Self::new(concrete_array))
|
||||
Ok(Self::new(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<T>>()
|
||||
.with_context(|| ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn from_slice<P: AsRef<[T::Native]>>(slice: P) -> Self {
|
||||
let iter = slice.as_ref().iter().copied();
|
||||
pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter_values(iter),
|
||||
array: PrimitiveArray::from_slice(slice),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_wrapper_slice<P: AsRef<[T::Wrapper]>>(slice: P) -> Self {
|
||||
let iter = slice.as_ref().iter().copied().map(WrapperType::into_native);
|
||||
pub fn from_vec(array: Vec<T>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter_values(iter),
|
||||
array: PrimitiveArray::from_vec(array),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_vec(array: Vec<T::Native>) -> Self {
|
||||
pub fn from_values<I: IntoIterator<Item = T>>(iter: I) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter_values(array),
|
||||
array: PrimitiveArray::from_values(iter),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter_values(iter),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &PrimitiveArray<T::ArrowPrimitive> {
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from(data),
|
||||
}
|
||||
}
|
||||
|
||||
// To distinguish with `Vector::slice()`.
|
||||
fn get_slice(&self, offset: usize, length: usize) -> Self {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Self::from_array_data(data)
|
||||
fn slice(&self, offset: usize, length: usize) -> Self {
|
||||
Self::from(self.array.slice(offset, length))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> Vector for PrimitiveVector<T> {
|
||||
impl<T: PrimitiveElement> Vector for PrimitiveVector<T> {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
T::build_data_type()
|
||||
}
|
||||
@@ -135,13 +99,11 @@ impl<T: LogicalPrimitiveType> Vector for PrimitiveVector<T> {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let data = self.to_array_data();
|
||||
Arc::new(PrimitiveArray::<T::ArrowPrimitive>::from(data))
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(PrimitiveArray::<T::ArrowPrimitive>::from(data))
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
@@ -149,11 +111,7 @@ impl<T: LogicalPrimitiveType> Vector for PrimitiveVector<T> {
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
self.array.values().len() * std::mem::size_of::<T>()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
@@ -161,80 +119,57 @@ impl<T: LogicalPrimitiveType> Vector for PrimitiveVector<T> {
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
Arc::new(self.slice(offset, length))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
if self.array.is_valid(index) {
|
||||
// Safety: The index have been checked by `is_valid()`.
|
||||
let wrapper = unsafe { T::Wrapper::from_native(self.array.value_unchecked(index)) };
|
||||
wrapper.into()
|
||||
} else {
|
||||
Value::Null
|
||||
}
|
||||
vectors::impl_get_for_vector!(self.array, index)
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
if self.array.is_valid(index) {
|
||||
// Safety: The index have been checked by `is_valid()`.
|
||||
let wrapper = unsafe { T::Wrapper::from_native(self.array.value_unchecked(index)) };
|
||||
wrapper.into()
|
||||
unsafe { self.array.value_unchecked(index).into_value_ref() }
|
||||
} else {
|
||||
ValueRef::Null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> fmt::Debug for PrimitiveVector<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("PrimitiveVector")
|
||||
.field("array", &self.array)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> From<PrimitiveArray<T::ArrowPrimitive>> for PrimitiveVector<T> {
|
||||
fn from(array: PrimitiveArray<T::ArrowPrimitive>) -> Self {
|
||||
impl<T: Primitive> From<PrimitiveArray<T>> for PrimitiveVector<T> {
|
||||
fn from(array: PrimitiveArray<T>) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> From<Vec<Option<T::Native>>> for PrimitiveVector<T> {
|
||||
fn from(v: Vec<Option<T::Native>>) -> Self {
|
||||
impl<T: Primitive> From<Vec<Option<T>>> for PrimitiveVector<T> {
|
||||
fn from(v: Vec<Option<T>>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveArray::from_iter(v),
|
||||
array: PrimitiveArray::<T>::from(v),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PrimitiveIter<'a, T: LogicalPrimitiveType> {
|
||||
iter: ArrayIter<&'a PrimitiveArray<T::ArrowPrimitive>>,
|
||||
}
|
||||
|
||||
impl<'a, T: LogicalPrimitiveType> Iterator for PrimitiveIter<'a, T> {
|
||||
type Item = Option<T::Wrapper>;
|
||||
|
||||
fn next(&mut self) -> Option<Option<T::Wrapper>> {
|
||||
self.iter
|
||||
.next()
|
||||
.map(|item| item.map(T::Wrapper::from_native))
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.iter.size_hint()
|
||||
impl<T: Primitive, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr> for PrimitiveVector<T> {
|
||||
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
|
||||
Self {
|
||||
array: MutablePrimitiveArray::<T>::from_iter(iter).into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> ScalarVector for PrimitiveVector<T> {
|
||||
type OwnedItem = T::Wrapper;
|
||||
type RefItem<'a> = T::Wrapper;
|
||||
impl<T> ScalarVector for PrimitiveVector<T>
|
||||
where
|
||||
T: PrimitiveElement,
|
||||
{
|
||||
type OwnedItem = T;
|
||||
type RefItem<'a> = T;
|
||||
type Iter<'a> = PrimitiveIter<'a, T>;
|
||||
type Builder = PrimitiveVectorBuilder<T>;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if self.array.is_valid(idx) {
|
||||
Some(T::Wrapper::from_native(self.array.value(idx)))
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -247,47 +182,59 @@ impl<T: LogicalPrimitiveType> ScalarVector for PrimitiveVector<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> Serializable for PrimitiveVector<T> {
|
||||
pub type UInt8Vector = PrimitiveVector<u8>;
|
||||
pub type UInt16Vector = PrimitiveVector<u16>;
|
||||
pub type UInt32Vector = PrimitiveVector<u32>;
|
||||
pub type UInt64Vector = PrimitiveVector<u64>;
|
||||
|
||||
pub type Int8Vector = PrimitiveVector<i8>;
|
||||
pub type Int16Vector = PrimitiveVector<i16>;
|
||||
pub type Int32Vector = PrimitiveVector<i32>;
|
||||
pub type Int64Vector = PrimitiveVector<i64>;
|
||||
|
||||
pub type Float32Vector = PrimitiveVector<f32>;
|
||||
pub type Float64Vector = PrimitiveVector<f64>;
|
||||
|
||||
pub struct PrimitiveIter<'a, T> {
|
||||
iter: ZipValidity<'a, &'a T, Iter<'a, T>>,
|
||||
}
|
||||
|
||||
impl<'a, T: Copy> Iterator for PrimitiveIter<'a, T> {
|
||||
type Item = Option<T>;
|
||||
|
||||
fn next(&mut self) -> Option<Option<T>> {
|
||||
self.iter.next().map(|v| v.copied())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PrimitiveElement> Serializable for PrimitiveVector<T> {
|
||||
fn serialize_to_json(&self) -> Result<Vec<JsonValue>> {
|
||||
let res = self
|
||||
.iter_data()
|
||||
.map(|v| match v {
|
||||
None => serde_json::Value::Null,
|
||||
// use WrapperType's Into<serde_json::Value> bound instead of
|
||||
// serde_json::to_value to facilitate customized serialization
|
||||
// for WrapperType
|
||||
Some(v) => v.into(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(res)
|
||||
self.array
|
||||
.iter()
|
||||
.map(serde_json::to_value)
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(SerializeSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> PartialEq for PrimitiveVector<T> {
|
||||
fn eq(&self, other: &PrimitiveVector<T>) -> bool {
|
||||
self.array == other.array
|
||||
}
|
||||
pub struct PrimitiveVectorBuilder<T: PrimitiveElement> {
|
||||
pub(crate) mutable_array: MutablePrimitiveArray<T>,
|
||||
}
|
||||
|
||||
pub type UInt8VectorBuilder = PrimitiveVectorBuilder<UInt8Type>;
|
||||
pub type UInt16VectorBuilder = PrimitiveVectorBuilder<UInt16Type>;
|
||||
pub type UInt32VectorBuilder = PrimitiveVectorBuilder<UInt32Type>;
|
||||
pub type UInt64VectorBuilder = PrimitiveVectorBuilder<UInt64Type>;
|
||||
pub type UInt8VectorBuilder = PrimitiveVectorBuilder<u8>;
|
||||
pub type UInt16VectorBuilder = PrimitiveVectorBuilder<u16>;
|
||||
pub type UInt32VectorBuilder = PrimitiveVectorBuilder<u32>;
|
||||
pub type UInt64VectorBuilder = PrimitiveVectorBuilder<u64>;
|
||||
|
||||
pub type Int8VectorBuilder = PrimitiveVectorBuilder<Int8Type>;
|
||||
pub type Int16VectorBuilder = PrimitiveVectorBuilder<Int16Type>;
|
||||
pub type Int32VectorBuilder = PrimitiveVectorBuilder<Int32Type>;
|
||||
pub type Int64VectorBuilder = PrimitiveVectorBuilder<Int64Type>;
|
||||
pub type Int8VectorBuilder = PrimitiveVectorBuilder<i8>;
|
||||
pub type Int16VectorBuilder = PrimitiveVectorBuilder<i16>;
|
||||
pub type Int32VectorBuilder = PrimitiveVectorBuilder<i32>;
|
||||
pub type Int64VectorBuilder = PrimitiveVectorBuilder<i64>;
|
||||
|
||||
pub type Float32VectorBuilder = PrimitiveVectorBuilder<Float32Type>;
|
||||
pub type Float64VectorBuilder = PrimitiveVectorBuilder<Float64Type>;
|
||||
pub type Float32VectorBuilder = PrimitiveVectorBuilder<f32>;
|
||||
pub type Float64VectorBuilder = PrimitiveVectorBuilder<f64>;
|
||||
|
||||
/// Builder to build a primitive vector.
|
||||
pub struct PrimitiveVectorBuilder<T: LogicalPrimitiveType> {
|
||||
mutable_array: PrimitiveBuilder<T::ArrowPrimitive>,
|
||||
}
|
||||
|
||||
impl<T: LogicalPrimitiveType> MutableVector for PrimitiveVectorBuilder<T> {
|
||||
impl<T: PrimitiveElement> MutableVector for PrimitiveVectorBuilder<T> {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
T::build_data_type()
|
||||
}
|
||||
@@ -310,62 +257,81 @@ impl<T: LogicalPrimitiveType> MutableVector for PrimitiveVectorBuilder<T> {
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
let primitive = T::cast_value_ref(value)?;
|
||||
match primitive {
|
||||
Some(v) => self.mutable_array.append_value(v.into_native()),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
self.mutable_array.push(primitive);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let primitive = T::cast_vector(vector)?;
|
||||
// Slice the underlying array to avoid creating a new Arc.
|
||||
let slice = primitive.get_slice(offset, length);
|
||||
for v in slice.iter_data() {
|
||||
self.push(v);
|
||||
}
|
||||
let slice = primitive.slice(offset, length);
|
||||
self.mutable_array.extend_trusted_len(slice.iter());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ScalarVectorBuilder for PrimitiveVectorBuilder<T>
|
||||
where
|
||||
T: LogicalPrimitiveType,
|
||||
T::Wrapper: Scalar<VectorType = PrimitiveVector<T>>,
|
||||
for<'a> T::Wrapper: ScalarRef<'a, ScalarType = T::Wrapper>,
|
||||
for<'a> T::Wrapper: Scalar<RefType<'a> = T::Wrapper>,
|
||||
T: Scalar<VectorType = PrimitiveVector<T>> + PrimitiveElement,
|
||||
for<'a> T: ScalarRef<'a, ScalarType = T, VectorType = PrimitiveVector<T>>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
type VectorType = PrimitiveVector<T>;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: PrimitiveBuilder::with_capacity(capacity),
|
||||
mutable_array: MutablePrimitiveArray::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.mutable_array
|
||||
.append_option(value.map(|v| v.into_native()));
|
||||
self.mutable_array.push(value);
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
PrimitiveVector {
|
||||
array: self.mutable_array.finish(),
|
||||
array: std::mem::take(&mut self.mutable_array).into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_primitive<T: LogicalPrimitiveType>(
|
||||
impl<T: PrimitiveElement> PrimitiveVectorBuilder<T> {
|
||||
fn with_type_capacity(data_type: ConcreteDataType, capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutablePrimitiveArray::with_capacity_from(
|
||||
capacity,
|
||||
data_type.as_arrow_type(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_primitive<T: PrimitiveElement>(
|
||||
vector: &PrimitiveVector<T>,
|
||||
offsets: &[usize],
|
||||
) -> VectorRef {
|
||||
Arc::new(replicate_primitive_with_type(
|
||||
vector,
|
||||
offsets,
|
||||
T::build_data_type(),
|
||||
))
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_primitive_with_type<T: PrimitiveElement>(
|
||||
vector: &PrimitiveVector<T>,
|
||||
offsets: &[usize],
|
||||
data_type: ConcreteDataType,
|
||||
) -> PrimitiveVector<T> {
|
||||
assert_eq!(offsets.len(), vector.len());
|
||||
|
||||
if offsets.is_empty() {
|
||||
return vector.get_slice(0, 0);
|
||||
return vector.slice(0, 0);
|
||||
}
|
||||
|
||||
let mut builder = PrimitiveVectorBuilder::<T>::with_capacity(*offsets.last().unwrap() as usize);
|
||||
let mut builder = PrimitiveVectorBuilder::<T>::with_type_capacity(
|
||||
data_type,
|
||||
*offsets.last().unwrap() as usize,
|
||||
);
|
||||
|
||||
let mut previous_offset = 0;
|
||||
|
||||
@@ -373,15 +339,14 @@ pub(crate) fn replicate_primitive<T: LogicalPrimitiveType>(
|
||||
let repeat_times = *offset - previous_offset;
|
||||
match value {
|
||||
Some(data) => {
|
||||
unsafe {
|
||||
// Safety: std::iter::Repeat and std::iter::Take implement TrustedLen.
|
||||
builder
|
||||
.mutable_array
|
||||
.append_trusted_len_iter(std::iter::repeat(data).take(repeat_times));
|
||||
}
|
||||
builder.mutable_array.extend_trusted_len(
|
||||
std::iter::repeat(*data)
|
||||
.take(repeat_times)
|
||||
.map(Option::Some),
|
||||
);
|
||||
}
|
||||
None => {
|
||||
builder.mutable_array.append_nulls(repeat_times);
|
||||
builder.mutable_array.extend_constant(repeat_times, None);
|
||||
}
|
||||
}
|
||||
previous_offset = *offset;
|
||||
@@ -391,7 +356,6 @@ pub(crate) fn replicate_primitive<T: LogicalPrimitiveType>(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::Int32Array;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde_json;
|
||||
|
||||
@@ -400,11 +364,11 @@ mod tests {
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::Int64Type;
|
||||
|
||||
fn check_vec(v: Int32Vector) {
|
||||
fn check_vec(v: PrimitiveVector<i32>) {
|
||||
assert_eq!(4, v.len());
|
||||
assert_eq!("Int32Vector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert_eq!(Validity::AllValid, v.validity());
|
||||
assert!(!v.only_null());
|
||||
|
||||
for i in 0..4 {
|
||||
@@ -423,26 +387,26 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_from_values() {
|
||||
let v = Int32Vector::from_values(vec![1, 2, 3, 4]);
|
||||
let v = PrimitiveVector::<i32>::from_values(vec![1, 2, 3, 4]);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_vec() {
|
||||
let v = Int32Vector::from_vec(vec![1, 2, 3, 4]);
|
||||
let v = PrimitiveVector::<i32>::from_vec(vec![1, 2, 3, 4]);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_slice() {
|
||||
let v = Int32Vector::from_slice(vec![1, 2, 3, 4]);
|
||||
let v = PrimitiveVector::<i32>::from_slice(vec![1, 2, 3, 4]);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_primitive_vector_with_null_to_json() {
|
||||
let input = [Some(1i32), Some(2i32), None, Some(4i32), None];
|
||||
let mut builder = Int32VectorBuilder::with_capacity(input.len());
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
@@ -457,15 +421,15 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let arrow_array = Int32Array::from(vec![1, 2, 3, 4]);
|
||||
let v = Int32Vector::from(arrow_array);
|
||||
let arrow_array = PrimitiveArray::from_slice(vec![1, 2, 3, 4]);
|
||||
let v = PrimitiveVector::from(arrow_array);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_primitive_vector_build_get() {
|
||||
let input = [Some(1i32), Some(2i32), None, Some(4i32), None];
|
||||
let mut builder = Int32VectorBuilder::with_capacity(input.len());
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
@@ -484,28 +448,29 @@ mod tests {
|
||||
#[test]
|
||||
fn test_primitive_vector_validity() {
|
||||
let input = [Some(1i32), Some(2i32), None, None];
|
||||
let mut builder = Int32VectorBuilder::with_capacity(input.len());
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let vector = builder.finish();
|
||||
assert_eq!(2, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
assert_eq!(2, validity.null_count());
|
||||
assert!(!validity.is_set(2));
|
||||
assert!(!validity.is_set(3));
|
||||
let slots = validity.slots().unwrap();
|
||||
assert_eq!(2, slots.null_count());
|
||||
assert!(!slots.get_bit(2));
|
||||
assert!(!slots.get_bit(3));
|
||||
|
||||
let vector = Int32Vector::from_slice(vec![1, 2, 3, 4]);
|
||||
let vector = PrimitiveVector::<i32>::from_slice(vec![1, 2, 3, 4]);
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert!(vector.validity().is_all_valid());
|
||||
assert_eq!(Validity::AllValid, vector.validity());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_memory_size() {
|
||||
let v = Int32Vector::from_slice((0..5).collect::<Vec<i32>>());
|
||||
assert_eq!(64, v.memory_size());
|
||||
let v = Int64Vector::from(vec![Some(0i64), Some(1i64), Some(2i64), None, None]);
|
||||
assert_eq!(128, v.memory_size());
|
||||
let v = PrimitiveVector::<i32>::from_slice((0..5).collect::<Vec<i32>>());
|
||||
assert_eq!(20, v.memory_size());
|
||||
let v = PrimitiveVector::<i64>::from(vec![Some(0i64), Some(1i64), Some(2i64), None, None]);
|
||||
assert_eq!(40, v.memory_size());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -524,29 +489,4 @@ mod tests {
|
||||
let expect: VectorRef = Arc::new(Int64Vector::from_slice(&[123, 8, 9]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_wrapper_slice() {
|
||||
macro_rules! test_from_wrapper_slice {
|
||||
($vec: ident, $ty: ident) => {
|
||||
let from_wrapper_slice = $vec::from_wrapper_slice(&[
|
||||
$ty::from_native($ty::MAX),
|
||||
$ty::from_native($ty::MIN),
|
||||
]);
|
||||
let from_slice = $vec::from_slice(&[$ty::MAX, $ty::MIN]);
|
||||
assert_eq!(from_wrapper_slice, from_slice);
|
||||
};
|
||||
}
|
||||
|
||||
test_from_wrapper_slice!(UInt8Vector, u8);
|
||||
test_from_wrapper_slice!(Int8Vector, i8);
|
||||
test_from_wrapper_slice!(UInt16Vector, u16);
|
||||
test_from_wrapper_slice!(Int16Vector, i16);
|
||||
test_from_wrapper_slice!(UInt32Vector, u32);
|
||||
test_from_wrapper_slice!(Int32Vector, i32);
|
||||
test_from_wrapper_slice!(UInt64Vector, u64);
|
||||
test_from_wrapper_slice!(Int64Vector, i64);
|
||||
test_from_wrapper_slice!(Float32Vector, f32);
|
||||
test_from_wrapper_slice!(Float64Vector, f64);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,19 +15,22 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayBuilder, ArrayData, ArrayIter, ArrayRef};
|
||||
use snafu::ResultExt;
|
||||
use arrow::array::{Array, ArrayRef, MutableArray, Utf8ValuesIter};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use serde_json::Value as JsonValue;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::arrow_array::{MutableStringArray, StringArray};
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{self, Result};
|
||||
use crate::error::{Result, SerializeSnafu};
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::StringType;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// Vector of strings.
|
||||
#[derive(Debug, PartialEq)]
|
||||
/// String array wrapper
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct StringVector {
|
||||
array: StringArray,
|
||||
}
|
||||
@@ -36,16 +39,6 @@ impl StringVector {
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
&self.array
|
||||
}
|
||||
|
||||
fn to_array_data(&self) -> ArrayData {
|
||||
self.array.data().clone()
|
||||
}
|
||||
|
||||
fn from_array_data(data: ArrayData) -> Self {
|
||||
Self {
|
||||
array: StringArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringArray> for StringVector {
|
||||
@@ -57,31 +50,7 @@ impl From<StringArray> for StringVector {
|
||||
impl From<Vec<Option<String>>> for StringVector {
|
||||
fn from(data: Vec<Option<String>>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<&str>>> for StringVector {
|
||||
fn from(data: Vec<Option<&str>>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&[Option<String>]> for StringVector {
|
||||
fn from(data: &[Option<String>]) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&[Option<&str>]> for StringVector {
|
||||
fn from(data: &[Option<&str>]) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data),
|
||||
array: StringArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -89,7 +58,19 @@ impl From<&[Option<&str>]> for StringVector {
|
||||
impl From<Vec<String>> for StringVector {
|
||||
fn from(data: Vec<String>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data.into_iter().map(Some)),
|
||||
array: StringArray::from(
|
||||
data.into_iter()
|
||||
.map(Option::Some)
|
||||
.collect::<Vec<Option<String>>>(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<&str>>> for StringVector {
|
||||
fn from(data: Vec<Option<&str>>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -97,14 +78,18 @@ impl From<Vec<String>> for StringVector {
|
||||
impl From<Vec<&str>> for StringVector {
|
||||
fn from(data: Vec<&str>) -> Self {
|
||||
Self {
|
||||
array: StringArray::from_iter(data.into_iter().map(Some)),
|
||||
array: StringArray::from(
|
||||
data.into_iter()
|
||||
.map(Option::Some)
|
||||
.collect::<Vec<Option<&str>>>(),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for StringVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::string_datatype()
|
||||
ConcreteDataType::String(StringType::default())
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
@@ -120,13 +105,11 @@ impl Vector for StringVector {
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let data = self.to_array_data();
|
||||
Arc::new(StringArray::from(data))
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let data = self.to_array_data();
|
||||
Box::new(StringArray::from(data))
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
@@ -134,11 +117,7 @@ impl Vector for StringVector {
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.get_buffer_memory_size()
|
||||
}
|
||||
|
||||
fn null_count(&self) -> usize {
|
||||
self.array.null_count()
|
||||
self.len() * std::mem::size_of::<i64>() + self.array.values().len()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
@@ -146,8 +125,7 @@ impl Vector for StringVector {
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
let data = self.array.data().slice(offset, length);
|
||||
Arc::new(Self::from_array_data(data))
|
||||
Arc::new(Self::from(self.array.slice(offset, length)))
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
@@ -162,7 +140,7 @@ impl Vector for StringVector {
|
||||
impl ScalarVector for StringVector {
|
||||
type OwnedItem = String;
|
||||
type RefItem<'a> = &'a str;
|
||||
type Iter<'a> = ArrayIter<&'a StringArray>;
|
||||
type Iter<'a> = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, i32>>;
|
||||
type Builder = StringVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
@@ -179,7 +157,7 @@ impl ScalarVector for StringVector {
|
||||
}
|
||||
|
||||
pub struct StringVectorBuilder {
|
||||
mutable_array: MutableStringArray,
|
||||
buffer: MutableStringArray,
|
||||
}
|
||||
|
||||
impl MutableVector for StringVectorBuilder {
|
||||
@@ -188,7 +166,7 @@ impl MutableVector for StringVectorBuilder {
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.mutable_array.len()
|
||||
self.buffer.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
@@ -204,15 +182,12 @@ impl MutableVector for StringVectorBuilder {
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
match value.as_string()? {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
self.buffer.push(value.as_string()?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self, vector, StringVector, offset, length)
|
||||
vectors::impl_extend_for_builder!(self.buffer, vector, StringVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,30 +196,30 @@ impl ScalarVectorBuilder for StringVectorBuilder {
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutableStringArray::with_capacity(capacity, 0),
|
||||
buffer: MutableStringArray::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
match value {
|
||||
Some(v) => self.mutable_array.append_value(v),
|
||||
None => self.mutable_array.append_null(),
|
||||
}
|
||||
self.buffer.push(value)
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
StringVector {
|
||||
array: self.mutable_array.finish(),
|
||||
Self::VectorType {
|
||||
array: std::mem::take(&mut self.buffer).into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for StringVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
fn serialize_to_json(&self) -> crate::error::Result<Vec<JsonValue>> {
|
||||
self.iter_data()
|
||||
.map(serde_json::to_value)
|
||||
.map(|v| match v {
|
||||
None => Ok(serde_json::Value::Null),
|
||||
Some(s) => serde_json::to_value(s),
|
||||
})
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(error::SerializeSnafu)
|
||||
.context(SerializeSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -252,9 +227,60 @@ vectors::impl_try_from_arrow_array_for_vector!(StringArray, StringVector);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::datatypes::DataType;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_misc() {
|
||||
let strs = vec!["hello", "greptime", "rust"];
|
||||
let v = StringVector::from(strs.clone());
|
||||
assert_eq!(3, v.len());
|
||||
assert_eq!("StringVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert_eq!(Validity::AllValid, v.validity());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(41, v.memory_size());
|
||||
|
||||
for (i, s) in strs.iter().enumerate() {
|
||||
assert_eq!(Value::from(*s), v.get(i));
|
||||
assert_eq!(ValueRef::from(*s), v.get_ref(i));
|
||||
assert_eq!(Value::from(*s), v.try_get(i).unwrap());
|
||||
}
|
||||
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
assert_eq!(3, arrow_arr.len());
|
||||
assert_eq!(&ArrowDataType::Utf8, arrow_arr.data_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_string_vector() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(3);
|
||||
builder.push(Some("hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some("world"));
|
||||
let string_vector = builder.finish();
|
||||
let serialized =
|
||||
serde_json::to_string(&string_vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["hello",null,"world"]"#, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let mut builder = MutableStringArray::new();
|
||||
builder.push(Some("A"));
|
||||
builder.push(Some("B"));
|
||||
builder.push::<&str>(None);
|
||||
builder.push(Some("D"));
|
||||
let string_array: StringArray = builder.into();
|
||||
let vector = StringVector::from(string_array);
|
||||
assert_eq!(
|
||||
r#"["A","B",null,"D"]"#,
|
||||
serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_build_get() {
|
||||
@@ -284,7 +310,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_builder() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(3);
|
||||
let mut builder = StringType::default().create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::String("hello")).unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
|
||||
@@ -298,73 +324,4 @@ mod tests {
|
||||
let expect: VectorRef = Arc::new(StringVector::from_slice(&["hello", "one", "two"]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_misc() {
|
||||
let strs = vec!["hello", "greptime", "rust"];
|
||||
let v = StringVector::from(strs.clone());
|
||||
assert_eq!(3, v.len());
|
||||
assert_eq!("StringVector", v.vector_type_name());
|
||||
assert!(!v.is_const());
|
||||
assert!(v.validity().is_all_valid());
|
||||
assert!(!v.only_null());
|
||||
assert_eq!(128, v.memory_size());
|
||||
|
||||
for (i, s) in strs.iter().enumerate() {
|
||||
assert_eq!(Value::from(*s), v.get(i));
|
||||
assert_eq!(ValueRef::from(*s), v.get_ref(i));
|
||||
assert_eq!(Value::from(*s), v.try_get(i).unwrap());
|
||||
}
|
||||
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
assert_eq!(3, arrow_arr.len());
|
||||
assert_eq!(&DataType::Utf8, arrow_arr.data_type());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_string_vector() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(3);
|
||||
builder.push(Some("hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some("world"));
|
||||
let string_vector = builder.finish();
|
||||
let serialized =
|
||||
serde_json::to_string(&string_vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["hello",null,"world"]"#, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let mut builder = MutableStringArray::new();
|
||||
builder.append_option(Some("A"));
|
||||
builder.append_option(Some("B"));
|
||||
builder.append_null();
|
||||
builder.append_option(Some("D"));
|
||||
let string_array: StringArray = builder.finish();
|
||||
let vector = StringVector::from(string_array);
|
||||
assert_eq!(
|
||||
r#"["A","B",null,"D"]"#,
|
||||
serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_non_option_string() {
|
||||
let nul = String::from_utf8(vec![0]).unwrap();
|
||||
let corpus = vec!["😅😅😅", "😍😍😍😍", "🥵🥵", nul.as_str()];
|
||||
let vector = StringVector::from(corpus);
|
||||
let serialized = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["😅😅😅","😍😍😍😍","🥵🥵","\u0000"]"#, serialized);
|
||||
|
||||
let corpus = vec![
|
||||
"🀀🀀🀀".to_string(),
|
||||
"🀁🀁🀁".to_string(),
|
||||
"🀂🀂🀂".to_string(),
|
||||
"🀃🀃🀃".to_string(),
|
||||
"🀆🀆".to_string(),
|
||||
];
|
||||
let vector = StringVector::from(corpus);
|
||||
let serialized = serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["🀀🀀🀀","🀁🀁🀁","🀂🀂🀂","🀃🀃🀃","🀆🀆"]"#, serialized);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,20 +12,308 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use crate::types::{
|
||||
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
|
||||
TimestampSecondType,
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, PrimitiveArray};
|
||||
use common_time::timestamp::{TimeUnit, Timestamp};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error;
|
||||
use crate::error::Result;
|
||||
use crate::prelude::{
|
||||
MutableVector, ScalarVector, ScalarVectorBuilder, Validity, Value, ValueRef, Vector, VectorRef,
|
||||
};
|
||||
use crate::vectors::{PrimitiveVector, PrimitiveVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::TimestampType;
|
||||
use crate::vectors::{PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder};
|
||||
|
||||
pub type TimestampSecondVector = PrimitiveVector<TimestampSecondType>;
|
||||
pub type TimestampSecondVectorBuilder = PrimitiveVectorBuilder<TimestampSecondType>;
|
||||
/// `TimestampVector` stores timestamp in millisecond since UNIX Epoch.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct TimestampVector {
|
||||
array: PrimitiveVector<i64>,
|
||||
}
|
||||
|
||||
pub type TimestampMillisecondVector = PrimitiveVector<TimestampMillisecondType>;
|
||||
pub type TimestampMillisecondVectorBuilder = PrimitiveVectorBuilder<TimestampMillisecondType>;
|
||||
impl TimestampVector {
|
||||
pub fn new(array: PrimitiveArray<i64>) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector { array },
|
||||
}
|
||||
}
|
||||
|
||||
pub type TimestampMicrosecondVector = PrimitiveVector<TimestampMicrosecondType>;
|
||||
pub type TimestampMicrosecondVectorBuilder = PrimitiveVectorBuilder<TimestampMicrosecondType>;
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
Ok(Self::new(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<i64>>()
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
))
|
||||
}
|
||||
|
||||
pub type TimestampNanosecondVector = PrimitiveVector<TimestampNanosecondType>;
|
||||
pub type TimestampNanosecondVectorBuilder = PrimitiveVectorBuilder<TimestampNanosecondType>;
|
||||
pub fn from_values<I: IntoIterator<Item = i64>>(iter: I) -> Self {
|
||||
Self {
|
||||
array: PrimitiveVector {
|
||||
array: PrimitiveArray::from_values(iter),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn as_arrow(&self) -> &dyn Array {
|
||||
self.array.as_arrow()
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for TimestampVector {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::timestamp_millis_datatype()
|
||||
}
|
||||
|
||||
fn vector_type_name(&self) -> String {
|
||||
"TimestampVector".to_string()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Arc::new(PrimitiveArray::new(
|
||||
TimestampType::new(TimeUnit::Millisecond).as_arrow_type(),
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let values = self.array.array.values().clone();
|
||||
Box::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Timestamp(arrow::datatypes::TimeUnit::Millisecond, None),
|
||||
values,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
self.array.validity()
|
||||
}
|
||||
|
||||
fn memory_size(&self) -> usize {
|
||||
self.array.memory_size()
|
||||
}
|
||||
|
||||
fn is_null(&self, row: usize) -> bool {
|
||||
self.array.is_null(row)
|
||||
}
|
||||
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef {
|
||||
Arc::new(Self {
|
||||
array: PrimitiveVector {
|
||||
array: self.array.array.slice(offset, length),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn get(&self, index: usize) -> Value {
|
||||
match self.array.get(index) {
|
||||
Value::Null => Value::Null,
|
||||
Value::Int64(v) => Value::Timestamp(Timestamp::from_millis(v)),
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
match self.array.get(index) {
|
||||
Value::Int64(v) => ValueRef::Timestamp(Timestamp::from_millis(v)),
|
||||
Value::Null => ValueRef::Null,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for TimestampVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
Ok(self
|
||||
.array
|
||||
.iter_data()
|
||||
.map(|v| match v {
|
||||
None => serde_json::Value::Null,
|
||||
Some(v) => v.into(),
|
||||
})
|
||||
.collect::<Vec<_>>())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for TimestampVector {
|
||||
type OwnedItem = Timestamp;
|
||||
type RefItem<'a> = Timestamp;
|
||||
type Iter<'a> = TimestampDataIter<'a>;
|
||||
type Builder = TimestampVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
self.array.get_data(idx).map(Timestamp::from_millis)
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
TimestampDataIter {
|
||||
iter: self.array.iter_data(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TimestampDataIter<'a> {
|
||||
iter: PrimitiveIter<'a, i64>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for TimestampDataIter<'a> {
|
||||
type Item = Option<Timestamp>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|v| v.map(Timestamp::from_millis))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TimestampVectorBuilder {
|
||||
buffer: PrimitiveVectorBuilder<i64>,
|
||||
}
|
||||
|
||||
impl MutableVector for TimestampVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::timestamp_millis_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
// TODO(hl): vector and vector builder should also support customized time unit.
|
||||
self.buffer.push(
|
||||
value
|
||||
.as_timestamp()?
|
||||
.map(|t| t.convert_to(TimeUnit::Millisecond)),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let concrete_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<TimestampVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to convert vector from {} to DateVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
|
||||
self.buffer
|
||||
.extend_slice_of(&concrete_vector.array, offset, length)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for TimestampVectorBuilder {
|
||||
type VectorType = TimestampVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
buffer: PrimitiveVectorBuilder::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
/// Pushes a Timestamp value into vector builder. The timestamp must be with time unit
|
||||
/// `Second`/`MilliSecond`/`Microsecond`.
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.buffer
|
||||
.push(value.map(|v| v.convert_to(TimeUnit::Millisecond)));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Self::VectorType {
|
||||
Self::VectorType {
|
||||
array: self.buffer.finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn replicate_timestamp(vector: &TimestampVector, offsets: &[usize]) -> VectorRef {
|
||||
let array = crate::vectors::primitive::replicate_primitive_with_type(
|
||||
&vector.array,
|
||||
offsets,
|
||||
vector.data_type(),
|
||||
);
|
||||
Arc::new(TimestampVector { array })
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
pub fn test_build_timestamp_vector() {
|
||||
let mut builder = TimestampVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(Timestamp::new(1, TimeUnit::Second)));
|
||||
builder.push(None);
|
||||
builder.push(Some(Timestamp::new(2, TimeUnit::Millisecond)));
|
||||
|
||||
let vector = builder.finish();
|
||||
assert_eq!(
|
||||
ConcreteDataType::timestamp_millis_datatype(),
|
||||
vector.data_type()
|
||||
);
|
||||
assert_eq!(3, vector.len());
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(1000, TimeUnit::Millisecond)),
|
||||
vector.get(0)
|
||||
);
|
||||
|
||||
assert_eq!(Value::Null, vector.get(1));
|
||||
assert_eq!(
|
||||
Value::Timestamp(Timestamp::new(2, TimeUnit::Millisecond)),
|
||||
vector.get(2)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
vec![
|
||||
Some(Timestamp::new(1000, TimeUnit::Millisecond)),
|
||||
None,
|
||||
Some(Timestamp::new(2, TimeUnit::Millisecond)),
|
||||
],
|
||||
vector.iter_data().collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_timestamp_from_arrow() {
|
||||
let vector =
|
||||
TimestampVector::from_slice(&[Timestamp::from_millis(1), Timestamp::from_millis(2)]);
|
||||
let arrow = vector.as_arrow().slice(0, vector.len());
|
||||
let vector2 = TimestampVector::try_from_arrow_array(&arrow).unwrap();
|
||||
assert_eq!(vector, vector2);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user