mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-08 14:22:58 +00:00
feat: adds ConcretDataType and more datatypes impl (#31)
* feat: adds ConcretDataType and impl binary/boolean/null types and vectors * feat: adds String to ConcretDataType * docs: ConcretDataType::from_arrow_type may panic
This commit is contained in:
@@ -11,6 +11,7 @@ features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc
|
||||
[dependencies]
|
||||
common-base = { path = "../common/base" }
|
||||
common-error = { path = "../common/error" }
|
||||
enum_dispatch = "0.3"
|
||||
paste = "1.0"
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = "1.0.79"
|
||||
|
||||
6
src/datatypes/src/arrow_array.rs
Normal file
6
src/datatypes/src/arrow_array.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
use arrow::array::{BinaryArray, MutableBinaryArray, MutableUtf8Array, Utf8Array};
|
||||
|
||||
pub type LargeBinaryArray = BinaryArray<i64>;
|
||||
pub type MutableLargeBinaryArray = MutableBinaryArray<i64>;
|
||||
pub type MutableStringArray = MutableUtf8Array<i32>;
|
||||
pub type StringArray = Utf8Array<i32>;
|
||||
@@ -3,8 +3,68 @@ use std::sync::Arc;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::{
|
||||
BinaryType, BooleanType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
|
||||
NullType, StringType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
use crate::value::Value;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[enum_dispatch::enum_dispatch(DataType)]
|
||||
pub enum ConcretDataType {
|
||||
Null(NullType),
|
||||
Boolean(BooleanType),
|
||||
|
||||
// Numeric types:
|
||||
Int8(Int8Type),
|
||||
Int16(Int16Type),
|
||||
Int32(Int32Type),
|
||||
Int64(Int64Type),
|
||||
UInt8(UInt8Type),
|
||||
UInt16(UInt16Type),
|
||||
UInt32(UInt32Type),
|
||||
UInt64(UInt64Type),
|
||||
Float32(Float32Type),
|
||||
Float64(Float64Type),
|
||||
|
||||
// String types
|
||||
Binary(BinaryType),
|
||||
String(StringType),
|
||||
}
|
||||
|
||||
impl ConcretDataType {
|
||||
/// Convert arrow data type to [ConcretDataType].
|
||||
///
|
||||
/// # Panics
|
||||
/// Panic if given arrow data type is not supported.
|
||||
pub fn from_arrow_type(dt: &ArrowDataType) -> Self {
|
||||
match dt {
|
||||
ArrowDataType::Null => ConcretDataType::Null(NullType::default()),
|
||||
ArrowDataType::Boolean => ConcretDataType::Boolean(BooleanType::default()),
|
||||
ArrowDataType::Binary | ArrowDataType::LargeBinary => {
|
||||
ConcretDataType::Binary(BinaryType::default())
|
||||
}
|
||||
ArrowDataType::UInt8 => ConcretDataType::UInt8(UInt8Type::default()),
|
||||
ArrowDataType::UInt16 => ConcretDataType::UInt16(UInt16Type::default()),
|
||||
ArrowDataType::UInt32 => ConcretDataType::UInt32(UInt32Type::default()),
|
||||
ArrowDataType::UInt64 => ConcretDataType::UInt64(UInt64Type::default()),
|
||||
ArrowDataType::Int8 => ConcretDataType::Int8(Int8Type::default()),
|
||||
ArrowDataType::Int16 => ConcretDataType::Int16(Int16Type::default()),
|
||||
ArrowDataType::Int32 => ConcretDataType::Int32(Int32Type::default()),
|
||||
ArrowDataType::Int64 => ConcretDataType::Int64(Int64Type::default()),
|
||||
ArrowDataType::Float32 => ConcretDataType::Float32(Float32Type::default()),
|
||||
ArrowDataType::Float64 => ConcretDataType::Float64(Float64Type::default()),
|
||||
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => {
|
||||
ConcretDataType::String(StringType::default())
|
||||
}
|
||||
|
||||
_ => {
|
||||
unimplemented!("arrow data_type: {:?}", dt)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Data type abstraction.
|
||||
pub trait DataType: std::fmt::Debug + Send + Sync {
|
||||
/// Name of this data type.
|
||||
@@ -21,3 +81,76 @@ pub trait DataType: std::fmt::Debug + Send + Sync {
|
||||
}
|
||||
|
||||
pub type DataTypeRef = Arc<dyn DataType>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_type() {
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::Null),
|
||||
ConcretDataType::Null(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::Boolean),
|
||||
ConcretDataType::Boolean(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::Binary),
|
||||
ConcretDataType::Binary(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::LargeBinary),
|
||||
ConcretDataType::Binary(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::Int8),
|
||||
ConcretDataType::Int8(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::Int16),
|
||||
ConcretDataType::Int16(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::Int32),
|
||||
ConcretDataType::Int32(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::Int64),
|
||||
ConcretDataType::Int64(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::UInt8),
|
||||
ConcretDataType::UInt8(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::UInt16),
|
||||
ConcretDataType::UInt16(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::UInt32),
|
||||
ConcretDataType::UInt32(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::UInt64),
|
||||
ConcretDataType::UInt64(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::Float32),
|
||||
ConcretDataType::Float32(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::Float64),
|
||||
ConcretDataType::Float64(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::Utf8),
|
||||
ConcretDataType::String(_)
|
||||
));
|
||||
assert!(matches!(
|
||||
ConcretDataType::from_arrow_type(&ArrowDataType::LargeUtf8),
|
||||
ConcretDataType::String(_)
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,25 +1,14 @@
|
||||
#![feature(generic_associated_types)]
|
||||
|
||||
use arrow::array;
|
||||
use arrow::array::{BinaryArray, MutableBinaryArray, Utf8Array};
|
||||
|
||||
pub mod arrow_array;
|
||||
mod data_type;
|
||||
pub mod deserialize;
|
||||
pub mod error;
|
||||
pub mod prelude;
|
||||
mod scalars;
|
||||
pub mod schema;
|
||||
pub mod serialize;
|
||||
pub mod type_id;
|
||||
mod types;
|
||||
pub mod value;
|
||||
pub mod vectors;
|
||||
|
||||
pub type LargeBinaryArray = BinaryArray<i64>;
|
||||
pub type MutableLargeBinaryArray = MutableBinaryArray<i64>;
|
||||
|
||||
pub type StringArray = Utf8Array<i32>;
|
||||
pub type MutableStringArray = array::MutableUtf8Array<i32>;
|
||||
|
||||
pub mod schema;
|
||||
|
||||
pub mod deserialize;
|
||||
pub mod serialize;
|
||||
|
||||
pub mod error;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
pub use crate::data_type::{DataType, DataTypeRef};
|
||||
pub use crate::data_type::{ConcretDataType, DataType, DataTypeRef};
|
||||
pub use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
pub use crate::type_id::LogicalTypeId;
|
||||
pub use crate::value::Value;
|
||||
|
||||
@@ -1,9 +1,16 @@
|
||||
mod binary_type;
|
||||
mod boolean_type;
|
||||
mod null_type;
|
||||
mod primitive_traits;
|
||||
mod primitive_type;
|
||||
mod string_type;
|
||||
|
||||
pub use binary_type::BinaryType;
|
||||
pub use boolean_type::BooleanType;
|
||||
pub use null_type::NullType;
|
||||
pub use primitive_traits::Primitive;
|
||||
pub use primitive_type::{DataTypeBuilder, PrimitiveType};
|
||||
pub use primitive_type::{
|
||||
DataTypeBuilder, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
|
||||
PrimitiveType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
|
||||
};
|
||||
pub use string_type::StringType;
|
||||
|
||||
@@ -7,7 +7,7 @@ use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct BinaryType;
|
||||
|
||||
impl BinaryType {
|
||||
|
||||
34
src/datatypes/src/types/boolean_type.rs
Normal file
34
src/datatypes/src/types/boolean_type.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct BooleanType;
|
||||
|
||||
impl BooleanType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for BooleanType {
|
||||
fn name(&self) -> &str {
|
||||
"Boolean"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Boolean
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
bool::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Boolean
|
||||
}
|
||||
}
|
||||
34
src/datatypes/src/types/null_type.rs
Normal file
34
src/datatypes/src/types/null_type.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct NullType;
|
||||
|
||||
impl NullType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for NullType {
|
||||
fn name(&self) -> &str {
|
||||
"Null"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::Null
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
Value::Null
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::Null
|
||||
}
|
||||
}
|
||||
@@ -2,24 +2,18 @@ use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use paste::paste;
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::primitive_traits::Primitive;
|
||||
use crate::value::Value;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct PrimitiveType<T: Primitive> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Primitive> PrimitiveType<T> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new [DataTypeRef] from a primitive type.
|
||||
pub trait DataTypeBuilder {
|
||||
fn build_data_type() -> DataTypeRef;
|
||||
@@ -30,7 +24,7 @@ macro_rules! impl_build_data_type {
|
||||
paste::paste! {
|
||||
impl DataTypeBuilder for $Type {
|
||||
fn build_data_type() -> DataTypeRef {
|
||||
Arc::new(PrimitiveType::<$Type>::new())
|
||||
Arc::new(PrimitiveType::<$Type>::default())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -63,7 +57,19 @@ macro_rules! impl_numeric {
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PrimitiveType<$Type> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_build_data_type!($Type);
|
||||
|
||||
paste! {
|
||||
pub type [<$TypeId Type>]=PrimitiveType<$Type>;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use common_base::bytes::StringBytes;
|
||||
use crate::data_type::DataType;
|
||||
use crate::prelude::{DataTypeRef, LogicalTypeId, Value};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct StringType;
|
||||
|
||||
impl StringType {
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
pub mod binary;
|
||||
pub mod boolean;
|
||||
pub mod null;
|
||||
pub mod primitive;
|
||||
mod string;
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ use arrow::array::BinaryValueIter;
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::arrow_array::{LargeBinaryArray, MutableLargeBinaryArray};
|
||||
use crate::data_type::DataTypeRef;
|
||||
use crate::error::Result;
|
||||
use crate::error::SerializeSnafu;
|
||||
@@ -13,7 +14,6 @@ use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::BinaryType;
|
||||
use crate::vectors::Vector;
|
||||
use crate::{LargeBinaryArray, MutableLargeBinaryArray};
|
||||
|
||||
/// Vector of binary strings.
|
||||
#[derive(Debug)]
|
||||
@@ -99,8 +99,8 @@ mod tests {
|
||||
use serde::*;
|
||||
|
||||
use super::BinaryVector;
|
||||
use crate::arrow_array::LargeBinaryArray;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::LargeBinaryArray;
|
||||
|
||||
#[test]
|
||||
pub fn test_serialize_binary_vector_to_json() {
|
||||
|
||||
207
src/datatypes/src/vectors/boolean.rs
Normal file
207
src/datatypes/src/vectors/boolean.rs
Normal file
@@ -0,0 +1,207 @@
|
||||
use std::any::Any;
|
||||
use std::borrow::Borrow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{ArrayRef, BooleanArray, MutableBooleanArray};
|
||||
use arrow::bitmap::utils::{BitmapIter, ZipValidity};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::data_type::DataTypeRef;
|
||||
use crate::error::Result;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::BooleanType;
|
||||
use crate::vectors::Vector;
|
||||
|
||||
/// Vector of boolean.
|
||||
#[derive(Debug)]
|
||||
pub struct BooleanVector {
|
||||
array: BooleanArray,
|
||||
}
|
||||
|
||||
impl From<Vec<bool>> for BooleanVector {
|
||||
fn from(data: Vec<bool>) -> Self {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from_slice(&data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BooleanArray> for BooleanVector {
|
||||
fn from(array: BooleanArray) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Option<bool>>> for BooleanVector {
|
||||
fn from(data: Vec<Option<bool>>) -> Self {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from(data),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanVector {
|
||||
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
|
||||
BooleanVector {
|
||||
array: BooleanArray::from_iter(iter),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for BooleanVector {
|
||||
fn data_type(&self) -> DataTypeRef {
|
||||
BooleanType::arc()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for BooleanVector {
|
||||
type RefItem<'a> = bool;
|
||||
type Iter<'a> = ZipValidity<'a, bool, BitmapIter<'a>>;
|
||||
type Builder = BooleanVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if idx < self.len() {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
self.array.iter()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BooleanVectorBuilder {
|
||||
mutable_array: MutableBooleanArray,
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for BooleanVectorBuilder {
|
||||
type VectorType = BooleanVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutableBooleanArray::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.mutable_array.push(value);
|
||||
}
|
||||
|
||||
fn finish(self) -> Self::VectorType {
|
||||
BooleanVector {
|
||||
array: self.mutable_array.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serializable for BooleanVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
self.iter_data()
|
||||
.map(serde_json::to_value)
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(crate::error::SerializeSnafu)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde::*;
|
||||
|
||||
use super::*;
|
||||
use crate::serialize::Serializable;
|
||||
|
||||
#[test]
|
||||
pub fn test_serialize_boolean_vector_to_json() {
|
||||
let vector = BooleanVector {
|
||||
array: BooleanArray::from_slice(&vec![true, false, true, true, false, false]),
|
||||
};
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
let mut output = vec![];
|
||||
let mut serializer = serde_json::ser::Serializer::new(&mut output);
|
||||
json_value.serialize(&mut serializer).unwrap();
|
||||
assert_eq!(
|
||||
"[true,false,true,true,false,false]",
|
||||
String::from_utf8_lossy(&output)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_from_vec() {
|
||||
let vec = BooleanVector::from(vec![false, true, false, true]);
|
||||
assert_eq!(4, vec.len());
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
i == 1 || i == 3,
|
||||
vec.get_data(i).unwrap(),
|
||||
"failed at {}",
|
||||
i
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_from_iter() {
|
||||
let v = vec![Some(false), Some(true), Some(false), Some(true)];
|
||||
let vec = v.into_iter().collect::<BooleanVector>();
|
||||
assert_eq!(4, vec.len());
|
||||
for i in 0..3 {
|
||||
assert_eq!(
|
||||
i == 1 || i == 3,
|
||||
vec.get_data(i).unwrap(),
|
||||
"failed at {}",
|
||||
i
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_from_vec_option() {
|
||||
let vec = BooleanVector::from(vec![Some(false), Some(true), None, Some(true)]);
|
||||
assert_eq!(4, vec.len());
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
i == 1 || i == 3,
|
||||
vec.get_data(i).unwrap(),
|
||||
"failed at {}",
|
||||
i
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_builder() {
|
||||
let mut builder = BooleanVectorBuilder::with_capacity(4);
|
||||
builder.push(Some(false));
|
||||
builder.push(Some(true));
|
||||
builder.push(Some(false));
|
||||
builder.push(Some(true));
|
||||
|
||||
let vec = builder.finish();
|
||||
|
||||
assert_eq!(4, vec.len());
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
i == 1 || i == 3,
|
||||
vec.get_data(i).unwrap(),
|
||||
"failed at {}",
|
||||
i
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
77
src/datatypes/src/vectors/null.rs
Normal file
77
src/datatypes/src/vectors/null.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
use std::any::Any;
|
||||
use std::fmt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::ArrayRef;
|
||||
use arrow::array::{Array, NullArray};
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use crate::data_type::DataTypeRef;
|
||||
use crate::types::NullType;
|
||||
use crate::vectors::Vector;
|
||||
|
||||
pub struct NullVector {
|
||||
array: NullArray,
|
||||
}
|
||||
|
||||
impl NullVector {
|
||||
pub fn new(n: usize) -> Self {
|
||||
Self {
|
||||
array: NullArray::new(ArrowDataType::Null, n),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NullArray> for NullVector {
|
||||
fn from(array: NullArray) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
}
|
||||
|
||||
impl Vector for NullVector {
|
||||
fn data_type(&self) -> DataTypeRef {
|
||||
NullType::arc()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for NullVector {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "NullVector({})", self.len())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_null_array() {
|
||||
let null_arr = NullVector::new(32);
|
||||
|
||||
assert_eq!(null_arr.len(), 32);
|
||||
let arrow_arr = null_arr.to_arrow_array();
|
||||
assert_eq!(arrow_arr.null_count(), 32);
|
||||
|
||||
let array2 = arrow_arr.slice(8, 16);
|
||||
assert_eq!(array2.len(), 16);
|
||||
assert_eq!(array2.null_count(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_debug_null_array() {
|
||||
let array = NullVector::new(1024 * 1024);
|
||||
assert_eq!(format!("{:?}", array), "NullVector(1048576)");
|
||||
}
|
||||
}
|
||||
@@ -6,13 +6,13 @@ use arrow::bitmap::utils::ZipValidity;
|
||||
use serde_json::Value;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::arrow_array::{MutableStringArray, StringArray};
|
||||
use crate::data_type::DataTypeRef;
|
||||
use crate::error::SerializeSnafu;
|
||||
use crate::prelude::{ScalarVectorBuilder, Vector};
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::StringType;
|
||||
use crate::{MutableStringArray, StringArray};
|
||||
|
||||
/// String array wrapper
|
||||
#[derive(Clone)]
|
||||
|
||||
Reference in New Issue
Block a user