diff --git a/src/datatypes/src/data_type.rs b/src/datatypes/src/data_type.rs index 4cd9f6fd15..77174a43a3 100644 --- a/src/datatypes/src/data_type.rs +++ b/src/datatypes/src/data_type.rs @@ -1,5 +1,7 @@ use std::sync::Arc; +use arrow2::datatypes::DataType as ArrowDataType; + use crate::type_id::LogicalTypeId; use crate::value::Value; @@ -13,6 +15,9 @@ pub trait DataType: std::fmt::Debug + Send + Sync { /// Returns the default value of this type. fn default_value(&self) -> Value; + + /// Convert this type as [arrow2::datatypes::DataType]. + fn as_arrow_type(&self) -> ArrowDataType; } pub type DataTypeRef = Arc; diff --git a/src/datatypes/src/types/binary_type.rs b/src/datatypes/src/types/binary_type.rs index c5d5a3ed18..18c2a0fa1b 100644 --- a/src/datatypes/src/types/binary_type.rs +++ b/src/datatypes/src/types/binary_type.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +use arrow2::datatypes::DataType as ArrowDataType; use common::bytes::StringBytes; use crate::data_type::{DataType, DataTypeRef}; @@ -27,4 +28,8 @@ impl DataType for BinaryType { fn default_value(&self) -> Value { StringBytes::default().into() } + + fn as_arrow_type(&self) -> ArrowDataType { + ArrowDataType::LargeBinary + } } diff --git a/src/datatypes/src/types/primitive_type.rs b/src/datatypes/src/types/primitive_type.rs index ade799acfe..3bef260359 100644 --- a/src/datatypes/src/types/primitive_type.rs +++ b/src/datatypes/src/types/primitive_type.rs @@ -1,6 +1,8 @@ use std::marker::PhantomData; use std::sync::Arc; +use arrow2::datatypes::DataType as ArrowDataType; + use crate::data_type::{DataType, DataTypeRef}; use crate::type_id::LogicalTypeId; use crate::types::primitive_traits::Primitive; @@ -49,6 +51,10 @@ macro_rules! impl_numeric { fn default_value(&self) -> Value { $Type::default().into() } + + fn as_arrow_type(&self) -> ArrowDataType { + ArrowDataType::$TypeId + } } impl std::fmt::Debug for PrimitiveType<$Type> { diff --git a/src/datatypes/src/vectors.rs b/src/datatypes/src/vectors.rs index 9adac163b6..c90106c2ea 100644 --- a/src/datatypes/src/vectors.rs +++ b/src/datatypes/src/vectors.rs @@ -4,6 +4,8 @@ pub mod primitive; use std::any::Any; use std::sync::Arc; +use arrow2::array::ArrayRef; + use crate::data_type::DataTypeRef; /// Vector of data values. @@ -24,6 +26,9 @@ pub trait Vector: Send + Sync { fn is_empty(&self) -> bool { self.len() == 0 } + + /// Convert this vector to a new arrow [ArrayRef]. + fn to_arrow_array(&self) -> ArrayRef; } pub type VectorRef = Arc; diff --git a/src/datatypes/src/vectors/binary.rs b/src/datatypes/src/vectors/binary.rs index 27eb29ae6a..6b27c5a855 100644 --- a/src/datatypes/src/vectors/binary.rs +++ b/src/datatypes/src/vectors/binary.rs @@ -1,5 +1,7 @@ use std::any::Any; +use std::sync::Arc; +use arrow2::array::ArrayRef; use arrow2::array::BinaryValueIter; use arrow2::bitmap::utils::ZipValidity; @@ -27,6 +29,10 @@ impl Vector for BinaryVector { fn len(&self) -> usize { self.array.len() } + + fn to_arrow_array(&self) -> ArrayRef { + Arc::new(self.array.clone()) + } } impl ScalarVector for BinaryVector { diff --git a/src/datatypes/src/vectors/primitive.rs b/src/datatypes/src/vectors/primitive.rs index 73a1c14991..0caa466884 100644 --- a/src/datatypes/src/vectors/primitive.rs +++ b/src/datatypes/src/vectors/primitive.rs @@ -1,6 +1,8 @@ use std::any::Any; use std::slice::Iter; +use std::sync::Arc; +use arrow2::array::ArrayRef; use arrow2::array::{MutablePrimitiveArray, PrimitiveArray}; use arrow2::bitmap::utils::ZipValidity; @@ -33,6 +35,10 @@ impl Vector for PrimitiveVector { fn len(&self) -> usize { self.array.len() } + + fn to_arrow_array(&self) -> ArrayRef { + Arc::new(self.array.clone()) + } } impl ScalarVector for PrimitiveVector {