mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-04 04:12:55 +00:00
feat: Adds push_value_ref and extend_slice_of to MutableVector (#215)
* feat: Impl cmp_element() for Vector * chore: Add doc comments to MutableVector * feat: Add create_mutable() to DataType Add `create_mutable()` to create a MutableVector for each DataType. Implement ListVectorBuilder and NullVectorBuilder for ListType and NullType. * feat: Add ValueRef ValueRef is a reference to value, could be used to avoid some allocation when getting data from Vector. To support ValueRef, also implement a ListValueRef for ListValue, but comparision of ListValueRef still requires some allocation, due to the complexity of ListValue and ListVector. Impl some From trait for ValueRef * feat: Implement get_ref for Vector * feat: Remove cmp_element from Vector `cmp_element` could be replaced by `get_ref` and then compare * feat: Implement push/extend for PrimitiveVectorBuilder Implement push_value_ref() and extend_slice_of() for PrimitiveVectorBuilder. Also refactor the DataTypeBuilder trait for primitive types to PrimitiveElement trait, adds necessary cast helper methods to it. - Cast a reference to Vector to reference arrow's primitive array - Cast a ValueRef to primitive type - Also make PrimitiveElement super trait of Primitive * feat: Implement push/extend for all vector builders Implement push_value_ref() and extend_slice_of() for remaining vector builders. Add some helpful cast method to ValueRef and a method to cast Value to ValueRef. Change the behavior of PrimitiveElement::cast_xxx to panic when unable to cast, since push_value_ref() and extend_slice_of() always panic when given invalid input data type. * feat: MutableVector returns error if data type unmatch * test: Add tests for ValueRef * feat: Add tests for Vector::get_ref * feat: NullVector returns error if data type unmatch * test: Add tests for vector builders * fix: Fix compile error in python coprocessor * refactor: Add lifetime param to IntoValueRef The Primitive trait just use the `IntoValueRef<'static>` bound. Also rename create_mutable to create_mutable_vector. * chore: Address CR comments * feat: Customize PartialOrd/Ord for Value/ValueRef Panics if values/refs have different data type * style: Fix clippy * refactor: Use macro to generate body of ValueRef::as_xxx
This commit is contained in:
@@ -12,6 +12,7 @@ use crate::types::{
|
||||
Int8Type, ListType, NullType, StringType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::Value;
|
||||
use crate::vectors::MutableVector;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[enum_dispatch::enum_dispatch(DataType)]
|
||||
@@ -185,6 +186,9 @@ pub trait DataType: std::fmt::Debug + Send + Sync {
|
||||
|
||||
/// Convert this type as [arrow2::datatypes::DataType].
|
||||
fn as_arrow_type(&self) -> ArrowDataType;
|
||||
|
||||
/// Create a mutable vector with given `capacity` of this type.
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
|
||||
}
|
||||
|
||||
pub type DataTypeRef = Arc<dyn DataType>;
|
||||
|
||||
@@ -47,6 +47,9 @@ pub enum Error {
|
||||
|
||||
#[snafu(display("Invalid timestamp index: {}", index))]
|
||||
InvalidTimestampIndex { index: usize, backtrace: Backtrace },
|
||||
|
||||
#[snafu(display("{}", msg))]
|
||||
CastType { msg: String, backtrace: Backtrace },
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
|
||||
@@ -3,7 +3,7 @@ pub use crate::macros::*;
|
||||
pub use crate::scalars::{Scalar, ScalarRef, ScalarVector, ScalarVectorBuilder};
|
||||
pub use crate::type_id::LogicalTypeId;
|
||||
pub use crate::types::Primitive;
|
||||
pub use crate::value::Value;
|
||||
pub use crate::value::{Value, ValueRef};
|
||||
pub use crate::vectors::{
|
||||
Helper as VectorHelper, MutableVector, Validity, Vector, VectorBuilder, VectorRef,
|
||||
};
|
||||
|
||||
@@ -53,8 +53,9 @@ impl LogicalTypeId {
|
||||
LogicalTypeId::String => ConcreteDataType::string_datatype(),
|
||||
LogicalTypeId::Binary => ConcreteDataType::binary_datatype(),
|
||||
LogicalTypeId::Date => ConcreteDataType::date_datatype(),
|
||||
LogicalTypeId::DateTime | LogicalTypeId::List => {
|
||||
unimplemented!("Data type for {:?} is unimplemented", self)
|
||||
LogicalTypeId::DateTime => ConcreteDataType::datetime_datatype(),
|
||||
LogicalTypeId::List => {
|
||||
ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ pub use list_type::ListType;
|
||||
pub use null_type::NullType;
|
||||
pub use primitive_traits::Primitive;
|
||||
pub use primitive_type::{
|
||||
DataTypeBuilder, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
|
||||
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, PrimitiveElement,
|
||||
PrimitiveType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
pub use string_type::StringType;
|
||||
|
||||
@@ -5,8 +5,10 @@ use common_base::bytes::StringBytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{BinaryVectorBuilder, MutableVector};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct BinaryType;
|
||||
@@ -33,4 +35,8 @@ impl DataType for BinaryType {
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::LargeBinary
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BinaryVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,8 +4,10 @@ use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{BooleanVectorBuilder, MutableVector};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct BooleanType;
|
||||
@@ -32,4 +34,8 @@ impl DataType for BooleanType {
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Boolean
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(BooleanVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,8 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{DataTypeRef, LogicalTypeId, Value};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::vectors::{DateVectorBuilder, MutableVector};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DateType;
|
||||
@@ -25,6 +27,10 @@ impl DataType for DateType {
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Date32
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(DateVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
}
|
||||
|
||||
impl DateType {
|
||||
|
||||
@@ -5,6 +5,8 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::prelude::{LogicalTypeId, Value};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::vectors::{DateTimeVectorBuilder, MutableVector};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DateTimeType;
|
||||
@@ -28,6 +30,10 @@ impl DataType for DateTimeType {
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Date64
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(DateTimeVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
}
|
||||
|
||||
impl DateTimeType {
|
||||
|
||||
@@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::value::ListValue;
|
||||
use crate::vectors::{ListVectorBuilder, MutableVector};
|
||||
|
||||
/// Used to represent the List datatype.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
@@ -42,6 +43,13 @@ impl DataType for ListType {
|
||||
let field = Box::new(Field::new("item", self.inner.as_arrow_type(), true));
|
||||
ArrowDataType::List(field)
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(ListVectorBuilder::with_capacity(
|
||||
*self.inner.clone(),
|
||||
capacity,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -6,6 +6,7 @@ use serde::{Deserialize, Serialize};
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::{MutableVector, NullVectorBuilder};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct NullType;
|
||||
@@ -32,4 +33,8 @@ impl DataType for NullType {
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Null
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, _capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(NullVectorBuilder::default())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use arrow::types::NativeType;
|
||||
use num::NumCast;
|
||||
|
||||
use crate::prelude::Scalar;
|
||||
use crate::value::Value;
|
||||
use crate::value::{IntoValueRef, Value};
|
||||
|
||||
/// Primitive type.
|
||||
pub trait Primitive:
|
||||
@@ -12,6 +12,7 @@ pub trait Primitive:
|
||||
+ Clone
|
||||
+ Copy
|
||||
+ Into<Value>
|
||||
+ IntoValueRef<'static>
|
||||
+ NativeType
|
||||
+ serde::Serialize
|
||||
+ NativeArithmetics
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
use std::any::TypeId;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use arrow::array::PrimitiveArray;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use paste::paste;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::{ConcreteDataType, DataType};
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::primitive_traits::Primitive;
|
||||
use crate::value::Value;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{MutableVector, PrimitiveVector, PrimitiveVectorBuilder, Vector};
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
pub struct PrimitiveType<T: Primitive> {
|
||||
@@ -24,22 +29,60 @@ impl<T: Primitive, U: Primitive> PartialEq<PrimitiveType<U>> for PrimitiveType<T
|
||||
|
||||
impl<T: Primitive> Eq for PrimitiveType<T> {}
|
||||
|
||||
/// Create a new [ConcreteDataType] from a primitive type.
|
||||
pub trait DataTypeBuilder {
|
||||
/// A trait that provide helper methods for a primitive type to implementing the [PrimitiveVector].
|
||||
pub trait PrimitiveElement: Primitive {
|
||||
/// Construct the data type struct.
|
||||
fn build_data_type() -> ConcreteDataType;
|
||||
|
||||
/// Returns the name of the type id.
|
||||
fn type_name() -> String;
|
||||
|
||||
/// Dynamic cast the vector to the concrete vector type.
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveArray<Self>>;
|
||||
|
||||
/// Cast value ref to the primitive type.
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self>>;
|
||||
}
|
||||
|
||||
macro_rules! impl_build_data_type {
|
||||
macro_rules! impl_primitive_element {
|
||||
($Type:ident, $TypeId:ident) => {
|
||||
paste::paste! {
|
||||
impl DataTypeBuilder for $Type {
|
||||
impl PrimitiveElement for $Type {
|
||||
fn build_data_type() -> ConcreteDataType {
|
||||
ConcreteDataType::$TypeId(PrimitiveType::<$Type>::default())
|
||||
}
|
||||
|
||||
fn type_name() -> String {
|
||||
stringify!($TypeId).to_string()
|
||||
}
|
||||
|
||||
fn cast_vector(vector: &dyn Vector) -> Result<&PrimitiveArray<$Type>> {
|
||||
let primitive_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveVector<$Type>>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast {} to vector of primitive type {}",
|
||||
vector.vector_type_name(),
|
||||
stringify!($TypeId)
|
||||
),
|
||||
})?;
|
||||
Ok(&primitive_vector.array)
|
||||
}
|
||||
|
||||
fn cast_value_ref(value: ValueRef) -> Result<Option<Self>> {
|
||||
match value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::$TypeId(v) => Ok(Some(v.into())),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast value {:?} to primitive type {}",
|
||||
other,
|
||||
stringify!($TypeId),
|
||||
),
|
||||
}.fail(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -63,6 +106,10 @@ macro_rules! impl_numeric {
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::$TypeId
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(PrimitiveVectorBuilder::<$Type>::with_capacity(capacity))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for PrimitiveType<$Type> {
|
||||
@@ -79,7 +126,7 @@ macro_rules! impl_numeric {
|
||||
}
|
||||
}
|
||||
|
||||
impl_build_data_type!($Type, $TypeId);
|
||||
impl_primitive_element!($Type, $TypeId);
|
||||
|
||||
paste! {
|
||||
pub type [<$TypeId Type>]=PrimitiveType<$Type>;
|
||||
|
||||
@@ -6,6 +6,8 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{DataTypeRef, LogicalTypeId, Value};
|
||||
use crate::scalars::ScalarVectorBuilder;
|
||||
use crate::vectors::{MutableVector, StringVectorBuilder};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct StringType;
|
||||
@@ -32,4 +34,8 @@ impl DataType for StringType {
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Utf8
|
||||
}
|
||||
|
||||
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
|
||||
Box::new(StringVectorBuilder::with_capacity(capacity))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,20 +1,22 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use common_base::bytes::{Bytes, StringBytes};
|
||||
use common_time::date::Date;
|
||||
use common_time::datetime::DateTime;
|
||||
pub use ordered_float::OrderedFloat;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::prelude::*;
|
||||
use crate::vectors::ListVector;
|
||||
|
||||
pub type OrderedF32 = OrderedFloat<f32>;
|
||||
pub type OrderedF64 = OrderedFloat<f64>;
|
||||
|
||||
/// Value holds a single arbitrary value of any [DataType](crate::data_type::DataType).
|
||||
///
|
||||
/// Although compare Value with different data type is allowed, it is recommended to only
|
||||
/// compare Value with same data type. Comparing Value with different data type may not
|
||||
/// behaves as what you expect.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
/// Comparison between values with different types (expect Null) is not allowed.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum Value {
|
||||
Null,
|
||||
|
||||
@@ -36,8 +38,8 @@ pub enum Value {
|
||||
Binary(Bytes),
|
||||
|
||||
// Date & Time types:
|
||||
Date(common_time::date::Date),
|
||||
DateTime(common_time::datetime::DateTime),
|
||||
Date(Date),
|
||||
DateTime(DateTime),
|
||||
|
||||
List(ListValue),
|
||||
}
|
||||
@@ -69,13 +71,95 @@ impl Value {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this is a null value.
|
||||
pub fn is_null(&self) -> bool {
|
||||
matches!(self, Value::Null)
|
||||
}
|
||||
|
||||
/// Cast itself to [ListValue].
|
||||
pub fn as_list(&self) -> Result<Option<&ListValue>> {
|
||||
match self {
|
||||
Value::Null => Ok(None),
|
||||
Value::List(v) => Ok(Some(v)),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast {:?} to list value", other),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Cast itself to [ValueRef].
|
||||
pub fn as_value_ref(&self) -> ValueRef {
|
||||
match self {
|
||||
Value::Null => ValueRef::Null,
|
||||
Value::Boolean(v) => ValueRef::Boolean(*v),
|
||||
Value::UInt8(v) => ValueRef::UInt8(*v),
|
||||
Value::UInt16(v) => ValueRef::UInt16(*v),
|
||||
Value::UInt32(v) => ValueRef::UInt32(*v),
|
||||
Value::UInt64(v) => ValueRef::UInt64(*v),
|
||||
Value::Int8(v) => ValueRef::Int8(*v),
|
||||
Value::Int16(v) => ValueRef::Int16(*v),
|
||||
Value::Int32(v) => ValueRef::Int32(*v),
|
||||
Value::Int64(v) => ValueRef::Int64(*v),
|
||||
Value::Float32(v) => ValueRef::Float32(*v),
|
||||
Value::Float64(v) => ValueRef::Float64(*v),
|
||||
Value::String(v) => ValueRef::String(v.as_utf8()),
|
||||
Value::Binary(v) => ValueRef::Binary(v),
|
||||
Value::Date(v) => ValueRef::Date(*v),
|
||||
Value::DateTime(v) => ValueRef::DateTime(*v),
|
||||
Value::List(v) => ValueRef::List(ListValueRef::Ref(v)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_from {
|
||||
($Variant:ident, $Type:ident) => {
|
||||
macro_rules! impl_ord_for_value_like {
|
||||
($Type: ident, $left: ident, $right: ident) => {
|
||||
if $left.is_null() && !$right.is_null() {
|
||||
return Ordering::Less;
|
||||
} else if !$left.is_null() && $right.is_null() {
|
||||
return Ordering::Greater;
|
||||
} else {
|
||||
match ($left, $right) {
|
||||
($Type::Null, $Type::Null) => Ordering::Equal,
|
||||
($Type::Boolean(v1), $Type::Boolean(v2)) => v1.cmp(v2),
|
||||
($Type::UInt8(v1), $Type::UInt8(v2)) => v1.cmp(v2),
|
||||
($Type::UInt16(v1), $Type::UInt16(v2)) => v1.cmp(v2),
|
||||
($Type::UInt32(v1), $Type::UInt32(v2)) => v1.cmp(v2),
|
||||
($Type::UInt64(v1), $Type::UInt64(v2)) => v1.cmp(v2),
|
||||
($Type::Int8(v1), $Type::Int8(v2)) => v1.cmp(v2),
|
||||
($Type::Int16(v1), $Type::Int16(v2)) => v1.cmp(v2),
|
||||
($Type::Int32(v1), $Type::Int32(v2)) => v1.cmp(v2),
|
||||
($Type::Int64(v1), $Type::Int64(v2)) => v1.cmp(v2),
|
||||
($Type::Float32(v1), $Type::Float32(v2)) => v1.cmp(v2),
|
||||
($Type::Float64(v1), $Type::Float64(v2)) => v1.cmp(v2),
|
||||
($Type::String(v1), $Type::String(v2)) => v1.cmp(v2),
|
||||
($Type::Binary(v1), $Type::Binary(v2)) => v1.cmp(v2),
|
||||
($Type::Date(v1), $Type::Date(v2)) => v1.cmp(v2),
|
||||
($Type::DateTime(v1), $Type::DateTime(v2)) => v1.cmp(v2),
|
||||
($Type::List(v1), $Type::List(v2)) => v1.cmp(v2),
|
||||
_ => panic!(
|
||||
"Cannot compare different values {:?} and {:?}",
|
||||
$left, $right
|
||||
),
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl PartialOrd for Value {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Value {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
impl_ord_for_value_like!(Value, self, other)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_value_from {
|
||||
($Variant: ident, $Type: ident) => {
|
||||
impl From<$Type> for Value {
|
||||
fn from(value: $Type) -> Self {
|
||||
Value::$Variant(value.into())
|
||||
@@ -93,19 +177,19 @@ macro_rules! impl_from {
|
||||
};
|
||||
}
|
||||
|
||||
impl_from!(Boolean, bool);
|
||||
impl_from!(UInt8, u8);
|
||||
impl_from!(UInt16, u16);
|
||||
impl_from!(UInt32, u32);
|
||||
impl_from!(UInt64, u64);
|
||||
impl_from!(Int8, i8);
|
||||
impl_from!(Int16, i16);
|
||||
impl_from!(Int32, i32);
|
||||
impl_from!(Int64, i64);
|
||||
impl_from!(Float32, f32);
|
||||
impl_from!(Float64, f64);
|
||||
impl_from!(String, StringBytes);
|
||||
impl_from!(Binary, Bytes);
|
||||
impl_value_from!(Boolean, bool);
|
||||
impl_value_from!(UInt8, u8);
|
||||
impl_value_from!(UInt16, u16);
|
||||
impl_value_from!(UInt32, u32);
|
||||
impl_value_from!(UInt64, u64);
|
||||
impl_value_from!(Int8, i8);
|
||||
impl_value_from!(Int16, i16);
|
||||
impl_value_from!(Int32, i32);
|
||||
impl_value_from!(Int64, i64);
|
||||
impl_value_from!(Float32, f32);
|
||||
impl_value_from!(Float64, f64);
|
||||
impl_value_from!(String, StringBytes);
|
||||
impl_value_from!(Binary, Bytes);
|
||||
|
||||
impl From<String> for Value {
|
||||
fn from(string: String) -> Value {
|
||||
@@ -159,6 +243,7 @@ impl TryFrom<Value> for serde_json::Value {
|
||||
}
|
||||
}
|
||||
|
||||
/// List value.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ListValue {
|
||||
/// List of nested Values (boxed to reduce size_of(Value))
|
||||
@@ -201,10 +286,208 @@ impl Ord for ListValue {
|
||||
}
|
||||
}
|
||||
|
||||
/// Reference to [Value].
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum ValueRef<'a> {
|
||||
Null,
|
||||
|
||||
// Numeric types:
|
||||
Boolean(bool),
|
||||
UInt8(u8),
|
||||
UInt16(u16),
|
||||
UInt32(u32),
|
||||
UInt64(u64),
|
||||
Int8(i8),
|
||||
Int16(i16),
|
||||
Int32(i32),
|
||||
Int64(i64),
|
||||
Float32(OrderedF32),
|
||||
Float64(OrderedF64),
|
||||
|
||||
// String types:
|
||||
String(&'a str),
|
||||
Binary(&'a [u8]),
|
||||
|
||||
// Date & Time types:
|
||||
Date(Date),
|
||||
DateTime(DateTime),
|
||||
|
||||
List(ListValueRef<'a>),
|
||||
}
|
||||
|
||||
macro_rules! impl_as_for_value_ref {
|
||||
($value: ident, $Variant: ident) => {
|
||||
match $value {
|
||||
ValueRef::Null => Ok(None),
|
||||
ValueRef::$Variant(v) => Ok(Some(*v)),
|
||||
other => error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast value ref {:?} to {}",
|
||||
other,
|
||||
stringify!($Variant)
|
||||
),
|
||||
}
|
||||
.fail(),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl<'a> ValueRef<'a> {
|
||||
/// Returns true if this is null.
|
||||
pub fn is_null(&self) -> bool {
|
||||
matches!(self, ValueRef::Null)
|
||||
}
|
||||
|
||||
/// Cast itself to binary slice.
|
||||
pub fn as_binary(&self) -> Result<Option<&[u8]>> {
|
||||
impl_as_for_value_ref!(self, Binary)
|
||||
}
|
||||
|
||||
/// Cast itself to string slice.
|
||||
pub fn as_string(&self) -> Result<Option<&str>> {
|
||||
impl_as_for_value_ref!(self, String)
|
||||
}
|
||||
|
||||
/// Cast itself to boolean.
|
||||
pub fn as_boolean(&self) -> Result<Option<bool>> {
|
||||
impl_as_for_value_ref!(self, Boolean)
|
||||
}
|
||||
|
||||
/// Cast itself to [Date].
|
||||
pub fn as_date(&self) -> Result<Option<Date>> {
|
||||
impl_as_for_value_ref!(self, Date)
|
||||
}
|
||||
|
||||
/// Cast itself to [DateTime].
|
||||
pub fn as_datetime(&self) -> Result<Option<DateTime>> {
|
||||
impl_as_for_value_ref!(self, DateTime)
|
||||
}
|
||||
|
||||
/// Cast itself to [ListValueRef].
|
||||
pub fn as_list(&self) -> Result<Option<ListValueRef>> {
|
||||
impl_as_for_value_ref!(self, List)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialOrd for ValueRef<'a> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Ord for ValueRef<'a> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
impl_ord_for_value_like!(ValueRef, self, other)
|
||||
}
|
||||
}
|
||||
|
||||
/// A helper trait to convert copyable types to `ValueRef`.
|
||||
///
|
||||
/// It could replace the usage of `Into<ValueRef<'a>>`, thus avoid confusion between `Into<Value>`
|
||||
/// and `Into<ValueRef<'a>>` in generic codes. One typical usage is the [`Primitive`](crate::primitive_traits::Primitive) trait.
|
||||
pub trait IntoValueRef<'a> {
|
||||
/// Convert itself to [ValueRef].
|
||||
fn into_value_ref(self) -> ValueRef<'a>;
|
||||
}
|
||||
|
||||
macro_rules! impl_value_ref_from {
|
||||
($Variant:ident, $Type:ident) => {
|
||||
impl From<$Type> for ValueRef<'_> {
|
||||
fn from(value: $Type) -> Self {
|
||||
ValueRef::$Variant(value.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoValueRef<'a> for $Type {
|
||||
fn into_value_ref(self) -> ValueRef<'a> {
|
||||
ValueRef::$Variant(self.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Option<$Type>> for ValueRef<'_> {
|
||||
fn from(value: Option<$Type>) -> Self {
|
||||
match value {
|
||||
Some(v) => ValueRef::$Variant(v.into()),
|
||||
None => ValueRef::Null,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoValueRef<'a> for Option<$Type> {
|
||||
fn into_value_ref(self) -> ValueRef<'a> {
|
||||
match self {
|
||||
Some(v) => ValueRef::$Variant(v.into()),
|
||||
None => ValueRef::Null,
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_value_ref_from!(Boolean, bool);
|
||||
impl_value_ref_from!(UInt8, u8);
|
||||
impl_value_ref_from!(UInt16, u16);
|
||||
impl_value_ref_from!(UInt32, u32);
|
||||
impl_value_ref_from!(UInt64, u64);
|
||||
impl_value_ref_from!(Int8, i8);
|
||||
impl_value_ref_from!(Int16, i16);
|
||||
impl_value_ref_from!(Int32, i32);
|
||||
impl_value_ref_from!(Int64, i64);
|
||||
impl_value_ref_from!(Float32, f32);
|
||||
impl_value_ref_from!(Float64, f64);
|
||||
|
||||
impl<'a> From<&'a str> for ValueRef<'a> {
|
||||
fn from(string: &'a str) -> ValueRef<'a> {
|
||||
ValueRef::String(string)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a [u8]> for ValueRef<'a> {
|
||||
fn from(bytes: &'a [u8]) -> ValueRef<'a> {
|
||||
ValueRef::Binary(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
/// Reference to a [ListValue].
|
||||
// Comparison still requires some allocation (call of `to_value()`) and might be avoidable.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum ListValueRef<'a> {
|
||||
Indexed { vector: &'a ListVector, idx: usize },
|
||||
Ref(&'a ListValue),
|
||||
}
|
||||
|
||||
impl<'a> ListValueRef<'a> {
|
||||
fn to_value(self) -> Value {
|
||||
match self {
|
||||
ListValueRef::Indexed { vector, idx } => vector.get(idx),
|
||||
ListValueRef::Ref(v) => Value::List((*v).clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialEq for ListValueRef<'a> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.to_value().eq(&other.to_value())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Eq for ListValueRef<'a> {}
|
||||
|
||||
impl<'a> Ord for ListValueRef<'a> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
// Respect the order of `Value` by converting into value before comparison.
|
||||
self.to_value().cmp(&other.to_value())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialOrd for ListValueRef<'a> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_time::datetime::DateTime;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
@@ -423,4 +706,165 @@ mod tests {
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_value() {
|
||||
assert!(Value::Null.is_null());
|
||||
assert!(!Value::Boolean(true).is_null());
|
||||
assert!(Value::Null < Value::Boolean(false));
|
||||
assert!(Value::Boolean(true) > Value::Null);
|
||||
assert!(Value::Null < Value::Int32(10));
|
||||
assert!(Value::Int32(10) > Value::Null);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_value_ref() {
|
||||
assert!(ValueRef::Null.is_null());
|
||||
assert!(!ValueRef::Boolean(true).is_null());
|
||||
assert!(ValueRef::Null < ValueRef::Boolean(false));
|
||||
assert!(ValueRef::Boolean(true) > ValueRef::Null);
|
||||
assert!(ValueRef::Null < ValueRef::Int32(10));
|
||||
assert!(ValueRef::Int32(10) > ValueRef::Null);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_as_value_ref() {
|
||||
macro_rules! check_as_value_ref {
|
||||
($Variant: ident, $data: expr) => {
|
||||
let value = Value::$Variant($data);
|
||||
let value_ref = value.as_value_ref();
|
||||
let expect_ref = ValueRef::$Variant($data);
|
||||
|
||||
assert_eq!(expect_ref, value_ref);
|
||||
};
|
||||
}
|
||||
|
||||
assert_eq!(ValueRef::Null, Value::Null.as_value_ref());
|
||||
check_as_value_ref!(Boolean, true);
|
||||
check_as_value_ref!(UInt8, 123);
|
||||
check_as_value_ref!(UInt16, 123);
|
||||
check_as_value_ref!(UInt32, 123);
|
||||
check_as_value_ref!(UInt64, 123);
|
||||
check_as_value_ref!(Int8, -12);
|
||||
check_as_value_ref!(Int16, -12);
|
||||
check_as_value_ref!(Int32, -12);
|
||||
check_as_value_ref!(Int64, -12);
|
||||
check_as_value_ref!(Float32, OrderedF32::from(16.0));
|
||||
check_as_value_ref!(Float64, OrderedF64::from(16.0));
|
||||
|
||||
assert_eq!(
|
||||
ValueRef::String("hello"),
|
||||
Value::String("hello".into()).as_value_ref()
|
||||
);
|
||||
assert_eq!(
|
||||
ValueRef::Binary(b"hello"),
|
||||
Value::Binary("hello".as_bytes().into()).as_value_ref()
|
||||
);
|
||||
|
||||
check_as_value_ref!(Date, Date::new(103));
|
||||
check_as_value_ref!(DateTime, DateTime::new(1034));
|
||||
|
||||
let list = ListValue {
|
||||
items: None,
|
||||
datatype: ConcreteDataType::int32_datatype(),
|
||||
};
|
||||
assert_eq!(
|
||||
ValueRef::List(ListValueRef::Ref(&list)),
|
||||
Value::List(list.clone()).as_value_ref()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_value_ref_as() {
|
||||
macro_rules! check_as_null {
|
||||
($method: ident) => {
|
||||
assert_eq!(None, ValueRef::Null.$method().unwrap());
|
||||
};
|
||||
}
|
||||
|
||||
check_as_null!(as_binary);
|
||||
check_as_null!(as_string);
|
||||
check_as_null!(as_boolean);
|
||||
check_as_null!(as_date);
|
||||
check_as_null!(as_datetime);
|
||||
check_as_null!(as_list);
|
||||
|
||||
macro_rules! check_as_correct {
|
||||
($data: expr, $Variant: ident, $method: ident) => {
|
||||
assert_eq!(Some($data), ValueRef::$Variant($data).$method().unwrap());
|
||||
};
|
||||
}
|
||||
|
||||
check_as_correct!("hello", String, as_string);
|
||||
check_as_correct!("hello".as_bytes(), Binary, as_binary);
|
||||
check_as_correct!(true, Boolean, as_boolean);
|
||||
check_as_correct!(Date::new(123), Date, as_date);
|
||||
check_as_correct!(DateTime::new(12), DateTime, as_datetime);
|
||||
let list = ListValue {
|
||||
items: None,
|
||||
datatype: ConcreteDataType::int32_datatype(),
|
||||
};
|
||||
check_as_correct!(ListValueRef::Ref(&list), List, as_list);
|
||||
|
||||
let wrong_value = ValueRef::Int32(12345);
|
||||
assert!(wrong_value.as_binary().is_err());
|
||||
assert!(wrong_value.as_string().is_err());
|
||||
assert!(wrong_value.as_boolean().is_err());
|
||||
assert!(wrong_value.as_date().is_err());
|
||||
assert!(wrong_value.as_datetime().is_err());
|
||||
assert!(wrong_value.as_list().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_into_value_ref() {
|
||||
macro_rules! check_into_value_ref {
|
||||
($Variant: ident, $data: expr, $PrimitiveType: ident, $Wrapper: ident) => {
|
||||
let data: $PrimitiveType = $data;
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
data.into_value_ref()
|
||||
);
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
ValueRef::from(data)
|
||||
);
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
Some(data).into_value_ref()
|
||||
);
|
||||
assert_eq!(
|
||||
ValueRef::$Variant($Wrapper::from(data)),
|
||||
ValueRef::from(Some(data))
|
||||
);
|
||||
let x: Option<$PrimitiveType> = None;
|
||||
assert_eq!(ValueRef::Null, x.into_value_ref());
|
||||
assert_eq!(ValueRef::Null, x.into());
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! check_primitive_into_value_ref {
|
||||
($Variant: ident, $data: expr, $PrimitiveType: ident) => {
|
||||
check_into_value_ref!($Variant, $data, $PrimitiveType, $PrimitiveType)
|
||||
};
|
||||
}
|
||||
|
||||
check_primitive_into_value_ref!(Boolean, true, bool);
|
||||
check_primitive_into_value_ref!(UInt8, 10, u8);
|
||||
check_primitive_into_value_ref!(UInt16, 20, u16);
|
||||
check_primitive_into_value_ref!(UInt32, 30, u32);
|
||||
check_primitive_into_value_ref!(UInt64, 40, u64);
|
||||
check_primitive_into_value_ref!(Int8, -10, i8);
|
||||
check_primitive_into_value_ref!(Int16, -20, i16);
|
||||
check_primitive_into_value_ref!(Int32, -30, i32);
|
||||
check_primitive_into_value_ref!(Int64, -40, i64);
|
||||
check_into_value_ref!(Float32, 10.0, f32, OrderedF32);
|
||||
check_into_value_ref!(Float64, 10.0, f64, OrderedF64);
|
||||
|
||||
let hello = "hello";
|
||||
assert_eq!(
|
||||
ValueRef::Binary(hello.as_bytes()),
|
||||
ValueRef::from(hello.as_bytes())
|
||||
);
|
||||
assert_eq!(ValueRef::String(hello), ValueRef::from(hello));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::any::Any;
|
||||
use std::fmt::Debug;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::ArrayRef;
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use arrow::bitmap::Bitmap;
|
||||
pub use binary::*;
|
||||
pub use boolean::*;
|
||||
@@ -33,9 +33,9 @@ use snafu::ensure;
|
||||
pub use string::*;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{BadArrayAccessSnafu, Result};
|
||||
use crate::error::{self, Result};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::Value;
|
||||
use crate::value::{Value, ValueRef};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Validity<'a> {
|
||||
@@ -80,6 +80,9 @@ pub trait Vector: Send + Sync + Serializable + Debug {
|
||||
/// Convert this vector to a new arrow [ArrayRef].
|
||||
fn to_arrow_array(&self) -> ArrayRef;
|
||||
|
||||
/// Convert this vector to a new boxed arrow [Array].
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array>;
|
||||
|
||||
/// Returns the validity of the Array.
|
||||
fn validity(&self) -> Validity;
|
||||
|
||||
@@ -110,6 +113,10 @@ pub trait Vector: Send + Sync + Serializable + Debug {
|
||||
self.null_count() == self.len()
|
||||
}
|
||||
|
||||
/// Slices the `Vector`, returning a new `VectorRef`.
|
||||
///
|
||||
/// # Panics
|
||||
/// This function panics if `offset + length > self.len()`.
|
||||
fn slice(&self, offset: usize, length: usize) -> VectorRef;
|
||||
|
||||
/// Returns the clone of value at `index`.
|
||||
@@ -123,7 +130,7 @@ pub trait Vector: Send + Sync + Serializable + Debug {
|
||||
fn try_get(&self, index: usize) -> Result<Value> {
|
||||
ensure!(
|
||||
index < self.len(),
|
||||
BadArrayAccessSnafu {
|
||||
error::BadArrayAccessSnafu {
|
||||
index,
|
||||
size: self.len()
|
||||
}
|
||||
@@ -134,6 +141,12 @@ pub trait Vector: Send + Sync + Serializable + Debug {
|
||||
// Copies each element according offsets parameter.
|
||||
// (i-th element should be copied offsets[i] - offsets[i - 1] times.)
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef;
|
||||
|
||||
/// Returns the reference of value at `index`.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if `index` is out of bound.
|
||||
fn get_ref(&self, index: usize) -> ValueRef;
|
||||
}
|
||||
|
||||
pub type VectorRef = Arc<dyn Vector>;
|
||||
@@ -180,8 +193,40 @@ macro_rules! impl_get_for_vector {
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_get_ref_for_vector {
|
||||
($array: expr, $index: ident) => {
|
||||
if $array.is_valid($index) {
|
||||
// Safety: The index have been checked by `is_valid()`.
|
||||
unsafe { $array.value_unchecked($index).into() }
|
||||
} else {
|
||||
ValueRef::Null
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_extend_for_builder {
|
||||
($mutable_array: expr, $vector: ident, $VectorType: ident, $offset: ident, $length: ident) => {{
|
||||
use snafu::OptionExt;
|
||||
|
||||
let concrete_vector = $vector
|
||||
.as_any()
|
||||
.downcast_ref::<$VectorType>()
|
||||
.with_context(|| crate::error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to cast vector from {} to {}",
|
||||
$vector.vector_type_name(),
|
||||
stringify!($VectorType)
|
||||
),
|
||||
})?;
|
||||
let slice = concrete_vector.array.slice($offset, $length);
|
||||
$mutable_array.extend_trusted_len(slice.iter());
|
||||
Ok(())
|
||||
}};
|
||||
}
|
||||
|
||||
pub(crate) use {
|
||||
impl_get_for_vector, impl_try_from_arrow_array_for_vector, impl_validity_for_vector,
|
||||
impl_extend_for_builder, impl_get_for_vector, impl_get_ref_for_vector,
|
||||
impl_try_from_arrow_array_for_vector, impl_validity_for_vector,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -192,7 +237,7 @@ pub mod tests {
|
||||
use super::helper::Helper;
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::DataTypeBuilder;
|
||||
use crate::types::PrimitiveElement;
|
||||
|
||||
#[test]
|
||||
fn test_df_columns_to_vector() {
|
||||
|
||||
@@ -4,16 +4,14 @@ use std::sync::Arc;
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use arrow::array::{BinaryValueIter, MutableArray};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use snafu::OptionExt;
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::arrow_array::{BinaryArray, MutableBinaryArray};
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::Result;
|
||||
use crate::error::SerializeSnafu;
|
||||
use crate::error::{self, Result};
|
||||
use crate::scalars::{common, ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::Value;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// Vector of binary strings.
|
||||
@@ -57,6 +55,10 @@ impl Vector for BinaryVector {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
}
|
||||
@@ -80,6 +82,10 @@ impl Vector for BinaryVector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
common::replicate_scalar_vector(self, offsets)
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
vectors::impl_get_ref_for_vector!(self.array, index)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for BinaryVector {
|
||||
@@ -125,6 +131,15 @@ impl MutableVector for BinaryVectorBuilder {
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.mutable_array.push(value.as_binary()?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self.mutable_array, vector, BinaryVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for BinaryVectorBuilder {
|
||||
@@ -155,7 +170,7 @@ impl Serializable for BinaryVector {
|
||||
Some(vec) => serde_json::to_value(vec),
|
||||
})
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(SerializeSnafu)
|
||||
.context(error::SerializeSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,11 +184,13 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::arrow_array::BinaryArray;
|
||||
use crate::data_type::DataType;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::BinaryType;
|
||||
|
||||
#[test]
|
||||
fn test_binary_vector_misc() {
|
||||
let v = BinaryVector::from(BinaryArray::from_slice(&vec![vec![1, 2, 3], vec![1, 2, 3]]));
|
||||
let v = BinaryVector::from(BinaryArray::from_slice(&[vec![1, 2, 3], vec![1, 2, 3]]));
|
||||
|
||||
assert_eq!(2, v.len());
|
||||
assert_eq!("BinaryVector", v.vector_type_name());
|
||||
@@ -185,6 +202,7 @@ mod tests {
|
||||
for i in 0..2 {
|
||||
assert!(!v.is_null(i));
|
||||
assert_eq!(Value::Binary(Bytes::from(vec![1, 2, 3])), v.get(i));
|
||||
assert_eq!(ValueRef::Binary(&[1, 2, 3]), v.get_ref(i));
|
||||
}
|
||||
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
@@ -194,8 +212,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_serialize_binary_vector_to_json() {
|
||||
let vector =
|
||||
BinaryVector::from(BinaryArray::from_slice(&vec![vec![1, 2, 3], vec![1, 2, 3]]));
|
||||
let vector = BinaryVector::from(BinaryArray::from_slice(&[vec![1, 2, 3], vec![1, 2, 3]]));
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
@@ -221,7 +238,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let arrow_array = BinaryArray::from_slice(&vec![vec![1, 2, 3], vec![1, 2, 3]]);
|
||||
let arrow_array = BinaryArray::from_slice(&[vec![1, 2, 3], vec![1, 2, 3]]);
|
||||
let original = arrow_array.clone();
|
||||
let vector = BinaryVector::from(arrow_array);
|
||||
assert_eq!(original, vector.array);
|
||||
@@ -270,4 +287,23 @@ mod tests {
|
||||
assert_eq!(1, slots.null_count());
|
||||
assert!(!slots.get_bit(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binary_vector_builder() {
|
||||
let input = BinaryVector::from_slice(&[b"world", b"one", b"two"]);
|
||||
|
||||
let mut builder = BinaryType::default().create_mutable_vector(3);
|
||||
builder
|
||||
.push_value_ref(ValueRef::Binary("hello".as_bytes()))
|
||||
.unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(BinaryVector::from_slice(&[b"hello", b"one", b"two"]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,15 +4,14 @@ use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, BooleanArray, MutableArray, MutableBooleanArray};
|
||||
use arrow::bitmap::utils::{BitmapIter, ZipValidity};
|
||||
use snafu::OptionExt;
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::Result;
|
||||
use crate::scalars::common::replicate_scalar_vector;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::Value;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// Vector of boolean.
|
||||
@@ -72,6 +71,10 @@ impl Vector for BooleanVector {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
}
|
||||
@@ -95,6 +98,10 @@ impl Vector for BooleanVector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
replicate_scalar_vector(self, offsets)
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
vectors::impl_get_ref_for_vector!(self.array, index)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for BooleanVector {
|
||||
@@ -140,6 +147,15 @@ impl MutableVector for BooleanVectorBuilder {
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.mutable_array.push(value.as_boolean()?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self.mutable_array, vector, BooleanVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for BooleanVectorBuilder {
|
||||
@@ -179,7 +195,9 @@ mod tests {
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::BooleanType;
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_misc() {
|
||||
@@ -195,6 +213,7 @@ mod tests {
|
||||
for (i, b) in bools.iter().enumerate() {
|
||||
assert!(!v.is_null(i));
|
||||
assert_eq!(Value::Boolean(*b), v.get(i));
|
||||
assert_eq!(ValueRef::Boolean(*b), v.get_ref(i));
|
||||
}
|
||||
|
||||
let arrow_arr = v.to_arrow_array();
|
||||
@@ -286,4 +305,21 @@ mod tests {
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert_eq!(Validity::AllValid, vector.validity());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_builder() {
|
||||
let input = BooleanVector::from_slice(&[true, false, true]);
|
||||
|
||||
let mut builder = BooleanType::default().create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::Boolean(true)).unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(BooleanVector::from_slice(&[true, false, true]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,13 +2,13 @@ use std::any::Any;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::ArrayRef;
|
||||
use arrow::array::{Array, ArrayRef};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{Result, SerializeSnafu};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::value::Value;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::Helper;
|
||||
use crate::vectors::{Validity, Vector, VectorRef};
|
||||
|
||||
@@ -55,6 +55,11 @@ impl Vector for ConstantVector {
|
||||
v.to_arrow_array()
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let v = self.vector.replicate(&[self.length]);
|
||||
v.to_boxed_arrow_array()
|
||||
}
|
||||
|
||||
fn is_const(&self) -> bool {
|
||||
true
|
||||
}
|
||||
@@ -98,6 +103,10 @@ impl Vector for ConstantVector {
|
||||
|
||||
Arc::new(Self::new(self.vector.clone(), *offsets.last().unwrap()))
|
||||
}
|
||||
|
||||
fn get_ref(&self, _index: usize) -> ValueRef {
|
||||
self.vector.get_ref(0)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ConstantVector {
|
||||
|
||||
@@ -6,8 +6,8 @@ use common_time::date::Date;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::ConversionSnafu;
|
||||
use crate::prelude::{ScalarVectorBuilder, Validity, Value, Vector, VectorRef};
|
||||
use crate::error::{self, Result};
|
||||
use crate::prelude::*;
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::vectors::{MutableVector, PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder};
|
||||
@@ -24,13 +24,13 @@ impl DateVector {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> crate::error::Result<Self> {
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
Ok(Self::new(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<i32>>()
|
||||
.with_context(|| ConversionSnafu {
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
@@ -65,6 +65,16 @@ impl Vector for DateVector {
|
||||
))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Box::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Date32,
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
self.array.validity()
|
||||
}
|
||||
@@ -94,6 +104,16 @@ impl Vector for DateVector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
self.array.replicate(offsets)
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
match self.array.get(index) {
|
||||
Value::Int32(v) => ValueRef::Date(Date::new(v)),
|
||||
Value::Null => ValueRef::Null,
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<i32>>> for DateVector {
|
||||
@@ -135,7 +155,7 @@ impl ScalarVector for DateVector {
|
||||
}
|
||||
|
||||
impl Serializable for DateVector {
|
||||
fn serialize_to_json(&self) -> crate::error::Result<Vec<serde_json::Value>> {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
Ok(self
|
||||
.array
|
||||
.iter_data()
|
||||
@@ -172,6 +192,26 @@ impl MutableVector for DateVectorBuilder {
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.buffer.push(value.as_date()?.map(|d| d.val()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let concrete_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to convert vector from {} to DateVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
self.buffer
|
||||
.extend_slice_of(&concrete_vector.array, offset, length)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for DateVectorBuilder {
|
||||
@@ -197,17 +237,23 @@ impl ScalarVectorBuilder for DateVectorBuilder {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::DateType;
|
||||
|
||||
#[test]
|
||||
pub fn test_build_date_vector() {
|
||||
fn test_build_date_vector() {
|
||||
let mut builder = DateVectorBuilder::with_capacity(4);
|
||||
builder.push(Some(Date::new(1)));
|
||||
builder.push(None);
|
||||
builder.push(Some(Date::new(-1)));
|
||||
let vector = builder.finish();
|
||||
assert_eq!(3, vector.len());
|
||||
assert_eq!(Value::Date(Date::new(1)), vector.get(0));
|
||||
assert_eq!(ValueRef::Date(Date::new(1)), vector.get_ref(0));
|
||||
assert_eq!(Some(Date::new(1)), vector.get_data(0));
|
||||
assert_eq!(None, vector.get_data(1));
|
||||
assert_eq!(Value::Null, vector.get(1));
|
||||
assert_eq!(ValueRef::Null, vector.get_ref(1));
|
||||
assert_eq!(Some(Date::new(-1)), vector.get_data(2));
|
||||
let mut iter = vector.iter_data();
|
||||
assert_eq!(Some(Date::new(1)), iter.next().unwrap());
|
||||
@@ -216,10 +262,33 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_date_scalar() {
|
||||
fn test_date_scalar() {
|
||||
let vector = DateVector::from_slice(&[Date::new(1), Date::new(2)]);
|
||||
assert_eq!(2, vector.len());
|
||||
assert_eq!(Some(Date::new(1)), vector.get_data(0));
|
||||
assert_eq!(Some(Date::new(2)), vector.get_data(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_vector_builder() {
|
||||
let input = DateVector::from_slice(&[Date::new(1), Date::new(2), Date::new(3)]);
|
||||
|
||||
let mut builder = DateType::default().create_mutable_vector(3);
|
||||
builder
|
||||
.push_value_ref(ValueRef::Date(Date::new(5)))
|
||||
.unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateVector::from_slice(&[
|
||||
Date::new(5),
|
||||
Date::new(2),
|
||||
Date::new(3),
|
||||
]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,9 +6,9 @@ use common_time::datetime::DateTime;
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::ConversionSnafu;
|
||||
use crate::error::{self, Result};
|
||||
use crate::prelude::{
|
||||
MutableVector, ScalarVector, ScalarVectorBuilder, Validity, Value, Vector, VectorRef,
|
||||
MutableVector, ScalarVector, ScalarVectorBuilder, Validity, Value, ValueRef, Vector, VectorRef,
|
||||
};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::vectors::{PrimitiveIter, PrimitiveVector, PrimitiveVectorBuilder};
|
||||
@@ -25,13 +25,13 @@ impl DateTimeVector {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> crate::error::Result<Self> {
|
||||
pub fn try_from_arrow_array(array: impl AsRef<dyn Array>) -> Result<Self> {
|
||||
Ok(Self::new(
|
||||
array
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<PrimitiveArray<i64>>()
|
||||
.with_context(|| ConversionSnafu {
|
||||
.with_context(|| error::ConversionSnafu {
|
||||
from: format!("{:?}", array.as_ref().data_type()),
|
||||
})?
|
||||
.clone(),
|
||||
@@ -66,6 +66,16 @@ impl Vector for DateTimeVector {
|
||||
))
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
let validity = self.array.array.validity().cloned();
|
||||
let buffer = self.array.array.values().clone();
|
||||
Box::new(PrimitiveArray::new(
|
||||
arrow::datatypes::DataType::Date64,
|
||||
buffer,
|
||||
validity,
|
||||
))
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
self.array.validity()
|
||||
}
|
||||
@@ -95,6 +105,16 @@ impl Vector for DateTimeVector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
self.array.replicate(offsets)
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
match self.array.get(index) {
|
||||
Value::Int64(v) => ValueRef::DateTime(DateTime::new(v)),
|
||||
Value::Null => ValueRef::Null,
|
||||
_ => {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for DateTimeVector {
|
||||
@@ -163,6 +183,26 @@ impl MutableVector for DateTimeVectorBuilder {
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.buffer.push(value.as_datetime()?.map(|d| d.val()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let concrete_vector = vector
|
||||
.as_any()
|
||||
.downcast_ref::<DateTimeVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to convert vector from {} to DateVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
self.buffer
|
||||
.extend_slice_of(&concrete_vector.array, offset, length)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DateTimeIter<'a> {
|
||||
@@ -199,9 +239,11 @@ mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::DateTimeType;
|
||||
|
||||
#[test]
|
||||
pub fn test_datetime_vector() {
|
||||
fn test_datetime_vector() {
|
||||
let v = DateTimeVector::new(PrimitiveArray::from_vec(vec![1, 2, 3]));
|
||||
assert_eq!(ConcreteDataType::datetime_datatype(), v.data_type());
|
||||
assert_eq!(3, v.len());
|
||||
@@ -210,6 +252,11 @@ mod tests {
|
||||
&arrow::datatypes::DataType::Date64,
|
||||
v.to_arrow_array().data_type()
|
||||
);
|
||||
|
||||
assert_eq!(Some(DateTime::new(1)), v.get_data(0));
|
||||
assert_eq!(Value::DateTime(DateTime::new(1)), v.get(0));
|
||||
assert_eq!(ValueRef::DateTime(DateTime::new(1)), v.get_ref(0));
|
||||
|
||||
let mut iter = v.iter_data();
|
||||
assert_eq!(Some(DateTime::new(1)), iter.next().unwrap());
|
||||
assert_eq!(Some(DateTime::new(2)), iter.next().unwrap());
|
||||
@@ -230,7 +277,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_datetime_vector_builder() {
|
||||
fn test_datetime_vector_builder() {
|
||||
let mut builder = DateTimeVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(DateTime::new(1)));
|
||||
builder.push(None);
|
||||
@@ -241,5 +288,26 @@ mod tests {
|
||||
assert_eq!(Value::DateTime(DateTime::new(1)), v.get(0));
|
||||
assert_eq!(Value::Null, v.get(1));
|
||||
assert_eq!(Value::DateTime(DateTime::new(-1)), v.get(2));
|
||||
|
||||
let input =
|
||||
DateTimeVector::from_slice(&[DateTime::new(1), DateTime::new(2), DateTime::new(3)]);
|
||||
|
||||
let mut builder = DateTimeType::default().create_mutable_vector(3);
|
||||
builder
|
||||
.push_value_ref(ValueRef::DateTime(DateTime::new(5)))
|
||||
.unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(DateTimeVector::from_slice(&[
|
||||
DateTime::new(5),
|
||||
DateTime::new(2),
|
||||
DateTime::new(3),
|
||||
]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{Array, ArrayRef, ListArray};
|
||||
use arrow::bitmap::MutableBitmap;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use serde_json::Value as JsonValue;
|
||||
use snafu::prelude::*;
|
||||
@@ -10,7 +11,7 @@ use crate::error::Result;
|
||||
use crate::prelude::*;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::ListType;
|
||||
use crate::value::ListValue;
|
||||
use crate::value::{ListValue, ListValueRef};
|
||||
use crate::vectors::{impl_try_from_arrow_array_for_vector, impl_validity_for_vector};
|
||||
|
||||
type ArrowListArray = ListArray<i32>;
|
||||
@@ -49,6 +50,10 @@ impl Vector for ListVector {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
impl_validity_for_vector!(self.array)
|
||||
}
|
||||
@@ -94,6 +99,13 @@ impl Vector for ListVector {
|
||||
// Refer to Databend's `ArrayColumn` for more details.
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
ValueRef::List(ListValueRef::Indexed {
|
||||
vector: self,
|
||||
idx: index,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for ListVector {
|
||||
@@ -125,6 +137,134 @@ impl From<ArrowListArray> for ListVector {
|
||||
|
||||
impl_try_from_arrow_array_for_vector!(ArrowListArray, ListVector);
|
||||
|
||||
// Some codes are ported from arrow2's MutableListArray.
|
||||
pub struct ListVectorBuilder {
|
||||
inner_type: ConcreteDataType,
|
||||
offsets: Vec<i32>,
|
||||
values: Box<dyn MutableVector>,
|
||||
validity: Option<MutableBitmap>,
|
||||
}
|
||||
|
||||
impl ListVectorBuilder {
|
||||
pub fn with_capacity(inner_type: ConcreteDataType, capacity: usize) -> ListVectorBuilder {
|
||||
let mut offsets = Vec::with_capacity(capacity + 1);
|
||||
offsets.push(0);
|
||||
// The actual required capacity might greater than the capacity of the `ListVector`
|
||||
// if there exists child vector that has more than one element.
|
||||
let values = inner_type.create_mutable_vector(capacity);
|
||||
|
||||
ListVectorBuilder {
|
||||
inner_type,
|
||||
offsets,
|
||||
values,
|
||||
validity: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn last_offset(&self) -> i32 {
|
||||
*self.offsets.last().unwrap()
|
||||
}
|
||||
|
||||
fn push_null(&mut self) {
|
||||
self.offsets.push(self.last_offset());
|
||||
match &mut self.validity {
|
||||
Some(validity) => validity.push(false),
|
||||
None => self.init_validity(),
|
||||
}
|
||||
}
|
||||
|
||||
fn init_validity(&mut self) {
|
||||
let len = self.offsets.len() - 1;
|
||||
|
||||
let mut validity = MutableBitmap::with_capacity(self.offsets.capacity());
|
||||
validity.extend_constant(len, true);
|
||||
validity.set(len - 1, false);
|
||||
self.validity = Some(validity)
|
||||
}
|
||||
|
||||
fn push_list_value(&mut self, list_value: &ListValue) -> Result<()> {
|
||||
if let Some(items) = list_value.items() {
|
||||
for item in &**items {
|
||||
self.values.push_value_ref(item.as_value_ref())?;
|
||||
}
|
||||
}
|
||||
self.push_valid();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Needs to be called when a valid value was extended to this builder.
|
||||
fn push_valid(&mut self) {
|
||||
let size = self.values.len();
|
||||
let size = i32::try_from(size).unwrap();
|
||||
assert!(size >= *self.offsets.last().unwrap());
|
||||
|
||||
self.offsets.push(size);
|
||||
if let Some(validity) = &mut self.validity {
|
||||
validity.push(true)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MutableVector for ListVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::list_datatype(self.inner_type.clone())
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.offsets.len() - 1
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
let array = ArrowListArray::try_new(
|
||||
ConcreteDataType::list_datatype(self.inner_type.clone()).as_arrow_type(),
|
||||
std::mem::take(&mut self.offsets).into(),
|
||||
self.values.to_vector().to_arrow_array(),
|
||||
std::mem::take(&mut self.validity).map(|x| x.into()),
|
||||
)
|
||||
.unwrap(); // The `ListVectorBuilder` itself should ensure it always builds a valid array.
|
||||
|
||||
let vector = ListVector {
|
||||
array,
|
||||
inner_data_type: self.inner_type.clone(),
|
||||
};
|
||||
Arc::new(vector)
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
if let Some(list_ref) = value.as_list()? {
|
||||
match list_ref {
|
||||
ListValueRef::Indexed { vector, idx } => match vector.get(idx).as_list()? {
|
||||
Some(list_value) => self.push_list_value(list_value)?,
|
||||
None => self.push_null(),
|
||||
},
|
||||
ListValueRef::Ref(list_value) => self.push_list_value(list_value)?,
|
||||
}
|
||||
} else {
|
||||
self.push_null();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
for idx in offset..offset + length {
|
||||
let value = vector.get_ref(idx);
|
||||
self.push_value_ref(value)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow::array::{MutableListArray, MutablePrimitiveArray, TryExtend};
|
||||
@@ -194,6 +334,17 @@ mod tests {
|
||||
)),
|
||||
list_vector.get(0)
|
||||
);
|
||||
let value_ref = list_vector.get_ref(0);
|
||||
assert!(matches!(
|
||||
value_ref,
|
||||
ValueRef::List(ListValueRef::Indexed { .. })
|
||||
));
|
||||
let value_ref = list_vector.get_ref(1);
|
||||
if let ValueRef::List(ListValueRef::Indexed { idx, .. }) = value_ref {
|
||||
assert_eq!(1, idx);
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
assert_eq!(Value::Null, list_vector.get(1));
|
||||
assert_eq!(
|
||||
Value::List(ListValue::new(
|
||||
@@ -240,10 +391,11 @@ mod tests {
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
|
||||
let list_vector = ListVector {
|
||||
array: arrow_array,
|
||||
inner_data_type: ConcreteDataType::int32_datatype(),
|
||||
};
|
||||
let list_vector = ListVector::from(arrow_array);
|
||||
assert_eq!(
|
||||
ConcreteDataType::List(ListType::new(ConcreteDataType::int64_datatype())),
|
||||
list_vector.data_type()
|
||||
);
|
||||
let mut iter = list_vector.values_iter();
|
||||
assert_eq!(
|
||||
"Int64[1, 2, 3]",
|
||||
@@ -272,13 +424,54 @@ mod tests {
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
|
||||
let list_vector = ListVector {
|
||||
array: arrow_array,
|
||||
inner_data_type: ConcreteDataType::int32_datatype(),
|
||||
};
|
||||
let list_vector = ListVector::from(arrow_array);
|
||||
assert_eq!(
|
||||
"Ok([Array([Number(1), Number(2), Number(3)]), Null, Array([Number(4), Null, Number(6)])])",
|
||||
format!("{:?}", list_vector.serialize_to_json())
|
||||
);
|
||||
}
|
||||
|
||||
fn new_list_vector(data: Vec<Option<Vec<Option<i32>>>>) -> ListVector {
|
||||
let mut arrow_array = MutableListArray::<i32, MutablePrimitiveArray<i32>>::new();
|
||||
arrow_array.try_extend(data).unwrap();
|
||||
let arrow_array: ArrowListArray = arrow_array.into();
|
||||
|
||||
ListVector::from(arrow_array)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_vector_builder() {
|
||||
let mut builder =
|
||||
ListType::new(ConcreteDataType::int32_datatype()).create_mutable_vector(3);
|
||||
builder
|
||||
.push_value_ref(ValueRef::List(ListValueRef::Ref(&ListValue::new(
|
||||
Some(Box::new(vec![
|
||||
Value::Int32(4),
|
||||
Value::Null,
|
||||
Value::Int32(6),
|
||||
])),
|
||||
ConcreteDataType::int32_datatype(),
|
||||
))))
|
||||
.unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
|
||||
let data = vec![
|
||||
Some(vec![Some(1), Some(2), Some(3)]),
|
||||
None,
|
||||
Some(vec![Some(7), Some(8), None]),
|
||||
];
|
||||
let input = new_list_vector(data);
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(new_list_vector(vec![
|
||||
Some(vec![Some(4), None, Some(6)]),
|
||||
None,
|
||||
Some(vec![Some(7), Some(8), None]),
|
||||
]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,19 +1,40 @@
|
||||
use std::any::Any;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::prelude::*;
|
||||
|
||||
/// Mutable vector that could be used to build an immutable vector.
|
||||
pub trait MutableVector: Send + Sync {
|
||||
/// Returns the data type of the vector.
|
||||
fn data_type(&self) -> ConcreteDataType;
|
||||
|
||||
/// Returns the length of the vector.
|
||||
fn len(&self) -> usize;
|
||||
|
||||
/// Returns whether the vector is empty.
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Convert to Any, to enable dynamic casting.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
|
||||
/// Convert to mutable Any, to enable dynamic casting.
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any;
|
||||
|
||||
/// Convert `self` to an (immutable) [VectorRef] and reset `self`.
|
||||
fn to_vector(&mut self) -> VectorRef;
|
||||
|
||||
/// Push value ref to this mutable vector.
|
||||
///
|
||||
/// Returns error if data type unmatch.
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()>;
|
||||
|
||||
/// Extend this mutable vector by slice of `vector`.
|
||||
///
|
||||
/// Returns error if data type unmatch.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `offset + length > vector.len()`.
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()>;
|
||||
}
|
||||
|
||||
@@ -5,15 +5,14 @@ use std::sync::Arc;
|
||||
use arrow::array::ArrayRef;
|
||||
use arrow::array::{Array, NullArray};
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use snafu::OptionExt;
|
||||
use snafu::{ensure, OptionExt};
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::Result;
|
||||
use crate::error::{self, Result};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::NullType;
|
||||
use crate::value::Value;
|
||||
use crate::vectors::impl_try_from_arrow_array_for_vector;
|
||||
use crate::vectors::{Validity, Vector, VectorRef};
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
#[derive(PartialEq)]
|
||||
pub struct NullVector {
|
||||
@@ -55,6 +54,10 @@ impl Vector for NullVector {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
Validity::AllNull
|
||||
}
|
||||
@@ -90,6 +93,11 @@ impl Vector for NullVector {
|
||||
array: NullArray::new(ArrowDataType::Null, *offsets.last().unwrap() as usize),
|
||||
})
|
||||
}
|
||||
|
||||
fn get_ref(&self, _index: usize) -> ValueRef {
|
||||
// Skips bound check for null array.
|
||||
ValueRef::Null
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for NullVector {
|
||||
@@ -106,13 +114,77 @@ impl Serializable for NullVector {
|
||||
}
|
||||
}
|
||||
|
||||
impl_try_from_arrow_array_for_vector!(NullArray, NullVector);
|
||||
vectors::impl_try_from_arrow_array_for_vector!(NullArray, NullVector);
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct NullVectorBuilder {
|
||||
length: usize,
|
||||
}
|
||||
|
||||
impl MutableVector for NullVectorBuilder {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
ConcreteDataType::null_datatype()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.length
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn as_mut_any(&mut self) -> &mut dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
let vector = Arc::new(NullVector::new(self.length));
|
||||
self.length = 0;
|
||||
vector
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
ensure!(
|
||||
value.is_null(),
|
||||
error::CastTypeSnafu {
|
||||
msg: format!("Failed to cast value ref {:?} to null", value),
|
||||
}
|
||||
);
|
||||
|
||||
self.length += 1;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vector
|
||||
.as_any()
|
||||
.downcast_ref::<NullVector>()
|
||||
.with_context(|| error::CastTypeSnafu {
|
||||
msg: format!(
|
||||
"Failed to convert vector from {} to NullVector",
|
||||
vector.vector_type_name()
|
||||
),
|
||||
})?;
|
||||
assert!(
|
||||
offset + length <= vector.len(),
|
||||
"offset {} + length {} must less than {}",
|
||||
offset,
|
||||
length,
|
||||
vector.len()
|
||||
);
|
||||
|
||||
self.length += length;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
|
||||
#[test]
|
||||
fn test_null_vector_misc() {
|
||||
@@ -135,6 +207,7 @@ mod tests {
|
||||
for i in 0..32 {
|
||||
assert!(v.is_null(i));
|
||||
assert_eq!(Value::Null, v.get(i));
|
||||
assert_eq!(ValueRef::Null, v.get_ref(i));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,4 +233,21 @@ mod tests {
|
||||
assert_eq!(Validity::AllNull, vector.validity());
|
||||
assert_eq!(5, vector.null_count());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_vector_builder() {
|
||||
let mut builder = NullType::default().create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::Null).unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
|
||||
let input = NullVector::new(3);
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(input);
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,8 +14,8 @@ use crate::error::{Result, SerializeSnafu};
|
||||
use crate::scalars::{Scalar, ScalarRef};
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::{DataTypeBuilder, Primitive};
|
||||
use crate::value::Value;
|
||||
use crate::types::{Primitive, PrimitiveElement};
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// Vector for primitive data types.
|
||||
@@ -61,7 +61,7 @@ impl<T: Primitive> PrimitiveVector<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive + DataTypeBuilder> Vector for PrimitiveVector<T> {
|
||||
impl<T: PrimitiveElement> Vector for PrimitiveVector<T> {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
T::build_data_type()
|
||||
}
|
||||
@@ -82,6 +82,10 @@ impl<T: Primitive + DataTypeBuilder> Vector for PrimitiveVector<T> {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
}
|
||||
@@ -128,6 +132,15 @@ impl<T: Primitive + DataTypeBuilder> Vector for PrimitiveVector<T> {
|
||||
}
|
||||
builder.to_vector()
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
if self.array.is_valid(index) {
|
||||
// Safety: The index have been checked by `is_valid()`.
|
||||
unsafe { self.array.value_unchecked(index).into_value_ref() }
|
||||
} else {
|
||||
ValueRef::Null
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive> From<PrimitiveArray<T>> for PrimitiveVector<T> {
|
||||
@@ -154,7 +167,7 @@ impl<T: Primitive, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr> for Pr
|
||||
|
||||
impl<T> ScalarVector for PrimitiveVector<T>
|
||||
where
|
||||
T: Scalar<VectorType = Self> + Primitive + DataTypeBuilder,
|
||||
T: Scalar<VectorType = Self> + PrimitiveElement,
|
||||
for<'a> T: ScalarRef<'a, ScalarType = T, VectorType = Self>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
@@ -203,11 +216,11 @@ impl<'a, T: Copy> Iterator for PrimitiveIter<'a, T> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PrimitiveVectorBuilder<T: Primitive + DataTypeBuilder> {
|
||||
pub struct PrimitiveVectorBuilder<T: PrimitiveElement> {
|
||||
pub(crate) mutable_array: MutablePrimitiveArray<T>,
|
||||
}
|
||||
|
||||
impl<T: Primitive + DataTypeBuilder> PrimitiveVectorBuilder<T> {
|
||||
impl<T: PrimitiveElement> PrimitiveVectorBuilder<T> {
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutablePrimitiveArray::with_capacity(capacity),
|
||||
@@ -228,7 +241,7 @@ pub type Int64VectorBuilder = PrimitiveVectorBuilder<i64>;
|
||||
pub type Float32VectorBuilder = PrimitiveVectorBuilder<f32>;
|
||||
pub type Float64VectorBuilder = PrimitiveVectorBuilder<f64>;
|
||||
|
||||
impl<T: Primitive + DataTypeBuilder> MutableVector for PrimitiveVectorBuilder<T> {
|
||||
impl<T: PrimitiveElement> MutableVector for PrimitiveVectorBuilder<T> {
|
||||
fn data_type(&self) -> ConcreteDataType {
|
||||
T::build_data_type()
|
||||
}
|
||||
@@ -250,11 +263,25 @@ impl<T: Primitive + DataTypeBuilder> MutableVector for PrimitiveVectorBuilder<T>
|
||||
array: std::mem::take(&mut self.mutable_array).into(),
|
||||
})
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
let primitive = T::cast_value_ref(value)?;
|
||||
self.mutable_array.push(primitive);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
let primitive = T::cast_vector(vector)?;
|
||||
// Slice the underlying array to avoid creating a new Arc.
|
||||
let slice = primitive.slice(offset, length);
|
||||
self.mutable_array.extend_trusted_len(slice.iter());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ScalarVectorBuilder for PrimitiveVectorBuilder<T>
|
||||
where
|
||||
T: Scalar<VectorType = PrimitiveVector<T>> + Primitive + DataTypeBuilder,
|
||||
T: Scalar<VectorType = PrimitiveVector<T>> + PrimitiveElement,
|
||||
for<'a> T: ScalarRef<'a, ScalarType = T, VectorType = PrimitiveVector<T>>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
@@ -277,7 +304,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive + DataTypeBuilder> Serializable for PrimitiveVector<T> {
|
||||
impl<T: PrimitiveElement> Serializable for PrimitiveVector<T> {
|
||||
fn serialize_to_json(&self) -> Result<Vec<JsonValue>> {
|
||||
self.array
|
||||
.iter()
|
||||
@@ -293,7 +320,9 @@ mod tests {
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::Int64Type;
|
||||
|
||||
fn check_vec(v: PrimitiveVector<i32>) {
|
||||
assert_eq!(4, v.len());
|
||||
@@ -305,6 +334,7 @@ mod tests {
|
||||
for i in 0..4 {
|
||||
assert!(!v.is_null(i));
|
||||
assert_eq!(Value::Int32(i as i32 + 1), v.get(i));
|
||||
assert_eq!(ValueRef::Int32(i as i32 + 1), v.get_ref(i));
|
||||
}
|
||||
|
||||
let json_value = v.serialize_to_json().unwrap();
|
||||
@@ -416,4 +446,21 @@ mod tests {
|
||||
let v = PrimitiveVector::<i64>::from(vec![Some(0i64), Some(1i64), Some(2i64), None, None]);
|
||||
assert_eq!(40, v.memory_size());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_primitive_vector_builder() {
|
||||
let mut builder = Int64Type::default().create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::Int64(123)).unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
|
||||
let input = Int64Vector::from_slice(&[7, 8, 9]);
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(Int64Vector::from_slice(&[123, 8, 9]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,16 +4,15 @@ use std::sync::Arc;
|
||||
use arrow::array::{Array, ArrayRef, MutableArray, Utf8ValuesIter};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use serde_json::Value as JsonValue;
|
||||
use snafu::OptionExt;
|
||||
use snafu::ResultExt;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
|
||||
use crate::arrow_array::{MutableStringArray, StringArray};
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::SerializeSnafu;
|
||||
use crate::error::{Result, SerializeSnafu};
|
||||
use crate::scalars::{common, ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::StringType;
|
||||
use crate::value::Value;
|
||||
use crate::value::{Value, ValueRef};
|
||||
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
|
||||
|
||||
/// String array wrapper
|
||||
@@ -89,6 +88,10 @@ impl Vector for StringVector {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
|
||||
Box::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
vectors::impl_validity_for_vector!(self.array)
|
||||
}
|
||||
@@ -112,6 +115,10 @@ impl Vector for StringVector {
|
||||
fn replicate(&self, offsets: &[usize]) -> VectorRef {
|
||||
common::replicate_scalar_vector(self, offsets)
|
||||
}
|
||||
|
||||
fn get_ref(&self, index: usize) -> ValueRef {
|
||||
vectors::impl_get_ref_for_vector!(self.array, index)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for StringVector {
|
||||
@@ -157,6 +164,15 @@ impl MutableVector for StringVectorBuilder {
|
||||
fn to_vector(&mut self) -> VectorRef {
|
||||
Arc::new(self.finish())
|
||||
}
|
||||
|
||||
fn push_value_ref(&mut self, value: ValueRef) -> Result<()> {
|
||||
self.buffer.push(value.as_string()?);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extend_slice_of(&mut self, vector: &dyn Vector, offset: usize, length: usize) -> Result<()> {
|
||||
vectors::impl_extend_for_builder!(self.buffer, vector, StringVector, offset, length)
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for StringVectorBuilder {
|
||||
@@ -199,6 +215,7 @@ mod tests {
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_misc() {
|
||||
@@ -213,6 +230,7 @@ mod tests {
|
||||
|
||||
for (i, s) in strs.iter().enumerate() {
|
||||
assert_eq!(Value::from(*s), v.get(i));
|
||||
assert_eq!(ValueRef::from(*s), v.get_ref(i));
|
||||
assert_eq!(Value::from(*s), v.try_get(i).unwrap());
|
||||
}
|
||||
|
||||
@@ -273,4 +291,21 @@ mod tests {
|
||||
assert_eq!("world", iter.next().unwrap().unwrap());
|
||||
assert_eq!(None, iter.next());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_builder() {
|
||||
let mut builder = StringType::default().create_mutable_vector(3);
|
||||
builder.push_value_ref(ValueRef::String("hello")).unwrap();
|
||||
assert!(builder.push_value_ref(ValueRef::Int32(123)).is_err());
|
||||
|
||||
let input = StringVector::from_slice(&["world", "one", "two"]);
|
||||
builder.extend_slice_of(&input, 1, 2).unwrap();
|
||||
assert!(builder
|
||||
.extend_slice_of(&crate::vectors::Int32Vector::from_slice(&[13]), 0, 1)
|
||||
.is_err());
|
||||
let vector = builder.to_vector();
|
||||
|
||||
let expect: VectorRef = Arc::new(StringVector::from_slice(&["hello", "one", "two"]));
|
||||
assert_eq!(expect, vector);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ use datafusion::field_util::FieldExt;
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::DataTypeBuilder;
|
||||
use datatypes::types::PrimitiveElement;
|
||||
use function::{create_query_engine, get_numbers_from_table};
|
||||
use query::error::Result;
|
||||
use query::query_engine::Output;
|
||||
@@ -35,7 +35,7 @@ async fn test_argmax_success<T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: Primitive + PartialOrd + DataTypeBuilder,
|
||||
T: PrimitiveElement + PartialOrd,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
let result = execute_argmax(column_name, table_name, engine.clone())
|
||||
|
||||
@@ -7,7 +7,7 @@ use datafusion::field_util::FieldExt;
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::DataTypeBuilder;
|
||||
use datatypes::types::PrimitiveElement;
|
||||
use function::{create_query_engine, get_numbers_from_table};
|
||||
use query::error::Result;
|
||||
use query::query_engine::Output;
|
||||
@@ -36,7 +36,7 @@ async fn test_argmin_success<T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: Primitive + PartialOrd + DataTypeBuilder,
|
||||
T: PrimitiveElement + PartialOrd,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
let result = execute_argmin(column_name, table_name, engine.clone())
|
||||
|
||||
@@ -8,7 +8,7 @@ use common_recordbatch::{util, RecordBatch};
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::types::DataTypeBuilder;
|
||||
use datatypes::types::PrimitiveElement;
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use query::query_engine::{Output, QueryEngineFactory};
|
||||
use query::QueryEngine;
|
||||
@@ -59,7 +59,7 @@ pub async fn get_numbers_from_table<'s, T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Vec<T>
|
||||
where
|
||||
T: Primitive + DataTypeBuilder,
|
||||
T: PrimitiveElement,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
let sql = format!("SELECT {} FROM {}", column_name, table_name);
|
||||
|
||||
@@ -7,7 +7,7 @@ use datafusion::field_util::FieldExt;
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::DataTypeBuilder;
|
||||
use datatypes::types::PrimitiveElement;
|
||||
use datatypes::value::OrderedFloat;
|
||||
use format_num::NumberFormat;
|
||||
use function::{create_query_engine, get_numbers_from_table};
|
||||
@@ -39,7 +39,7 @@ async fn test_mean_success<T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + DataTypeBuilder,
|
||||
T: PrimitiveElement + AsPrimitive<f64>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
let result = execute_mean(column_name, table_name, engine.clone())
|
||||
|
||||
@@ -18,7 +18,7 @@ use datafusion::arrow_print;
|
||||
use datafusion_common::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::types::DataTypeBuilder;
|
||||
use datatypes::types::PrimitiveElement;
|
||||
use datatypes::types::PrimitiveType;
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use datatypes::with_match_primitive_type_id;
|
||||
@@ -211,7 +211,7 @@ async fn test_my_sum() -> Result<()> {
|
||||
|
||||
async fn test_my_sum_with<T>(numbers: Vec<T>, expected: Vec<&str>) -> Result<()>
|
||||
where
|
||||
T: Primitive + DataTypeBuilder,
|
||||
T: PrimitiveElement,
|
||||
{
|
||||
let table_name = format!("{}_numbers", std::any::type_name::<T>());
|
||||
let column_name = format!("{}_number", std::any::type_name::<T>());
|
||||
|
||||
@@ -11,7 +11,7 @@ use datafusion::field_util::SchemaExt;
|
||||
use datatypes::for_all_ordered_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::types::DataTypeBuilder;
|
||||
use datatypes::types::PrimitiveElement;
|
||||
use datatypes::vectors::PrimitiveVector;
|
||||
use function::{create_query_engine, get_numbers_from_table};
|
||||
use num_traits::AsPrimitive;
|
||||
@@ -65,7 +65,7 @@ async fn test_percentile_success<T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + DataTypeBuilder,
|
||||
T: PrimitiveElement + AsPrimitive<f64>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
let result = execute_percentile(column_name, table_name, engine.clone())
|
||||
@@ -120,7 +120,7 @@ async fn test_percentile_failed<T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: Primitive + DataTypeBuilder,
|
||||
T: PrimitiveElement,
|
||||
{
|
||||
let result = execute_percentile(column_name, table_name, engine).await;
|
||||
assert!(result.is_err());
|
||||
|
||||
@@ -7,7 +7,7 @@ use datafusion::field_util::FieldExt;
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::DataTypeBuilder;
|
||||
use datatypes::types::PrimitiveElement;
|
||||
use function::{create_query_engine, get_numbers_from_table};
|
||||
use num_traits::AsPrimitive;
|
||||
use query::error::Result;
|
||||
@@ -37,7 +37,7 @@ async fn test_polyval_success<T, PolyT>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: Primitive + AsPrimitive<PolyT> + DataTypeBuilder,
|
||||
T: Primitive + AsPrimitive<PolyT> + PrimitiveElement,
|
||||
PolyT: Primitive + std::ops::Mul<Output = PolyT> + std::iter::Sum,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
for<'a> PolyT: Scalar<RefType<'a> = PolyT>,
|
||||
|
||||
@@ -16,7 +16,7 @@ use datafusion::logical_plan::LogicalPlanBuilder;
|
||||
use datatypes::for_all_ordered_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::types::DataTypeBuilder;
|
||||
use datatypes::types::PrimitiveElement;
|
||||
use datatypes::vectors::{Float32Vector, Float64Vector, PrimitiveVector, UInt32Vector};
|
||||
use num::NumCast;
|
||||
use query::error::Result;
|
||||
@@ -236,7 +236,7 @@ async fn get_numbers_from_table<'s, T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Vec<T>
|
||||
where
|
||||
T: Primitive + DataTypeBuilder,
|
||||
T: PrimitiveElement,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
let sql = format!("SELECT {} FROM {}", column_name, table_name);
|
||||
@@ -285,7 +285,7 @@ async fn test_median_success<T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: Primitive + Ord + DataTypeBuilder,
|
||||
T: PrimitiveElement + Ord,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
let result = execute_median(column_name, table_name, engine.clone())
|
||||
@@ -324,7 +324,7 @@ async fn test_median_failed<T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: Primitive + DataTypeBuilder,
|
||||
T: PrimitiveElement,
|
||||
{
|
||||
let result = execute_median(column_name, table_name, engine).await;
|
||||
assert!(result.is_err());
|
||||
|
||||
@@ -7,7 +7,7 @@ use datafusion::field_util::FieldExt;
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::DataTypeBuilder;
|
||||
use datatypes::types::PrimitiveElement;
|
||||
use function::{create_query_engine, get_numbers_from_table};
|
||||
use num_traits::AsPrimitive;
|
||||
use query::error::Result;
|
||||
@@ -39,7 +39,7 @@ async fn test_scipy_stats_norm_cdf_success<T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + DataTypeBuilder,
|
||||
T: PrimitiveElement + AsPrimitive<f64>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
let result = execute_scipy_stats_norm_cdf(column_name, table_name, engine.clone())
|
||||
|
||||
@@ -7,7 +7,7 @@ use datafusion::field_util::FieldExt;
|
||||
use datafusion::field_util::SchemaExt;
|
||||
use datatypes::for_all_primitive_types;
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::types::DataTypeBuilder;
|
||||
use datatypes::types::PrimitiveElement;
|
||||
use function::{create_query_engine, get_numbers_from_table};
|
||||
use num_traits::AsPrimitive;
|
||||
use query::error::Result;
|
||||
@@ -39,7 +39,7 @@ async fn test_scipy_stats_norm_pdf_success<T>(
|
||||
engine: Arc<dyn QueryEngine>,
|
||||
) -> Result<()>
|
||||
where
|
||||
T: Primitive + AsPrimitive<f64> + DataTypeBuilder,
|
||||
T: PrimitiveElement + AsPrimitive<f64>,
|
||||
for<'a> T: Scalar<RefType<'a> = T>,
|
||||
{
|
||||
let result = execute_scipy_stats_norm_pdf(column_name, table_name, engine.clone())
|
||||
|
||||
@@ -296,9 +296,7 @@ fn create_located<T>(node: T, loc: Location) -> Located<T> {
|
||||
}
|
||||
|
||||
/// cast a `dyn Array` of type unsigned/int/float into a `dyn Vector`
|
||||
fn try_into_vector<T: datatypes::types::Primitive + datatypes::types::DataTypeBuilder>(
|
||||
arg: Arc<dyn Array>,
|
||||
) -> Result<Arc<dyn Vector>> {
|
||||
fn try_into_vector<T: datatypes::types::Primitive>(arg: Arc<dyn Array>) -> Result<Arc<dyn Vector>> {
|
||||
// wrap try_into_vector in here to convert `datatypes::error::Error` to `python::error::Error`
|
||||
Helper::try_into_vector(arg).context(TypeCastSnafu)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user