feat: Implements validity() and null_count() for Vector (#38)

* feat: Add validity() to Vector

* test(datatypes): Add more tests and fix get_data() not returns None for null
This commit is contained in:
evenyag
2022-06-01 20:55:58 +08:00
committed by GitHub
parent fb0585229e
commit 23f235524d
10 changed files with 329 additions and 153 deletions

View File

@@ -24,6 +24,6 @@ version="0.10"
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
[dev-dependencies]
serde_json = "1.0.81"
serde_json = "1.0"
tokio = { version = "1.18", features = ["full"] }

View File

@@ -14,5 +14,5 @@ common-error = { path = "../common/error" }
enum_dispatch = "0.3"
paste = "1.0"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = "1.0.79"
serde_json = "1.0"
snafu = { version = "0.7", features = ["backtraces"] }

View File

@@ -2,4 +2,4 @@ pub use crate::data_type::{ConcreteDataType, DataType, DataTypeRef};
pub use crate::scalars::{ScalarVector, ScalarVectorBuilder};
pub use crate::type_id::LogicalTypeId;
pub use crate::value::Value;
pub use crate::vectors::{Vector, VectorRef};
pub use crate::vectors::{Validity, Vector, VectorRef};

View File

@@ -20,6 +20,9 @@ pub trait ScalarVector: Vector {
/// Returns the reference to an element at given position.
///
/// Note: `get()` has bad performance, avoid call this function inside loop.
///
/// # Panics
/// Panics if `idx >= self.len()`.
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>>;
/// Returns iterator of current vector.

View File

@@ -8,6 +8,7 @@ use std::any::Any;
use std::sync::Arc;
use arrow::array::ArrayRef;
use arrow::bitmap::Bitmap;
use arrow::datatypes::DataType as ArrowDataType;
pub use binary::*;
pub use boolean::*;
@@ -24,6 +25,25 @@ pub use crate::vectors::{
UInt8Vector,
};
#[derive(Debug, PartialEq)]
pub enum Validity<'a> {
/// Whether the array slot is valid or not (null).
Slots(&'a Bitmap),
/// All slots are valid.
AllValid,
/// All slots are null.
AllNull,
}
impl<'a> Validity<'a> {
pub fn slots(&self) -> Option<&Bitmap> {
match self {
Validity::Slots(bitmap) => Some(bitmap),
_ => None,
}
}
}
/// Vector of data values.
pub trait Vector: Send + Sync + Serializable {
/// Returns the data type of the vector.
@@ -45,6 +65,20 @@ pub trait Vector: Send + Sync + Serializable {
/// Convert this vector to a new arrow [ArrayRef].
fn to_arrow_array(&self) -> ArrayRef;
/// Returns the validity of the Array.
fn validity(&self) -> Validity;
/// The number of null slots on this [`Vector`].
/// # Implementation
/// This is `O(1)`.
fn null_count(&self) -> usize {
match self.validity() {
Validity::Slots(bitmap) => bitmap.null_count(),
Validity::AllValid => 0,
Validity::AllNull => self.len(),
}
}
}
pub type VectorRef = Arc<dyn Vector>;
@@ -101,16 +135,16 @@ macro_rules! impl_try_from_arrow_array_for_vector {
pub(crate) use impl_try_from_arrow_array_for_vector;
#[cfg(test)]
mod tests {
pub mod tests {
use arrow::array::{Array, PrimitiveArray};
use serde::Serialize;
use serde_json;
use super::*;
use crate::data_type::DataType;
use crate::types::DataTypeBuilder;
#[test]
pub fn test_df_columns_to_vector() {
fn test_df_columns_to_vector() {
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1, 2, 3]));
let vector = try_into_vector(df_column).unwrap();
assert_eq!(
@@ -120,28 +154,22 @@ mod tests {
}
#[test]
pub fn test_serialize_i32_vector() {
fn test_serialize_i32_vector() {
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::<i32>::from_slice(vec![1, 2, 3]));
let json_value = try_into_vector(df_column)
.unwrap()
.serialize_to_json()
.unwrap();
let mut output = vec![];
let mut serializer = serde_json::ser::Serializer::new(&mut output);
json_value.serialize(&mut serializer).unwrap();
assert_eq!(b"[1,2,3]", output.as_slice());
assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
}
#[test]
pub fn test_serialize_i8_vector() {
fn test_serialize_i8_vector() {
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1u8, 2u8, 3u8]));
let json_value = try_into_vector(df_column)
.unwrap()
.serialize_to_json()
.unwrap();
let mut output = vec![];
let mut serializer = serde_json::ser::Serializer::new(&mut output);
json_value.serialize(&mut serializer).unwrap();
assert_eq!(b"[1,2,3]", output.as_slice());
assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
}
}

View File

@@ -2,7 +2,7 @@ use std::any::Any;
use std::sync::Arc;
use arrow::array::BinaryValueIter;
use arrow::array::{ArrayRef, BinaryArray};
use arrow::array::{Array, ArrayRef, BinaryArray};
use arrow::bitmap::utils::ZipValidity;
use snafu::OptionExt;
use snafu::ResultExt;
@@ -15,7 +15,7 @@ use crate::scalars::{ScalarVector, ScalarVectorBuilder};
use crate::serialize::Serializable;
use crate::types::BinaryType;
use crate::vectors::impl_try_from_arrow_array_for_vector;
use crate::vectors::Vector;
use crate::vectors::{Validity, Vector};
/// Vector of binary strings.
#[derive(Debug)]
@@ -45,6 +45,13 @@ impl Vector for BinaryVector {
fn to_arrow_array(&self) -> ArrayRef {
Arc::new(self.array.clone())
}
fn validity(&self) -> Validity {
match self.array.validity() {
Some(bitmap) => Validity::Slots(bitmap),
None => Validity::AllValid,
}
}
}
impl ScalarVector for BinaryVector {
@@ -53,7 +60,7 @@ impl ScalarVector for BinaryVector {
type Builder = BinaryVectorBuilder;
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
if idx < self.len() {
if self.array.is_valid(idx) {
Some(self.array.value(idx))
} else {
None
@@ -91,8 +98,7 @@ impl ScalarVectorBuilder for BinaryVectorBuilder {
impl Serializable for BinaryVector {
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
self.array
.iter()
self.iter_data()
.map(|v| match v {
None => Ok(serde_json::Value::Null), // if binary vector not present, map to NULL
Some(vec) => serde_json::to_value(vec),
@@ -106,30 +112,87 @@ impl_try_from_arrow_array_for_vector!(LargeBinaryArray, BinaryVector);
#[cfg(test)]
mod tests {
use serde::*;
use serde_json;
use super::BinaryVector;
use super::*;
use crate::arrow_array::LargeBinaryArray;
use crate::serialize::Serializable;
#[test]
pub fn test_serialize_binary_vector_to_json() {
let vector = BinaryVector {
array: LargeBinaryArray::from_slice(&vec![vec![1, 2, 3], vec![1, 2, 3]]),
};
fn test_serialize_binary_vector_to_json() {
let vector = BinaryVector::from(LargeBinaryArray::from_slice(&vec![
vec![1, 2, 3],
vec![1, 2, 3],
]));
let json_value = vector.serialize_to_json().unwrap();
let mut output = vec![];
let mut serializer = serde_json::ser::Serializer::new(&mut output);
json_value.serialize(&mut serializer).unwrap();
assert_eq!("[[1,2,3],[1,2,3]]", String::from_utf8_lossy(&output));
assert_eq!(
"[[1,2,3],[1,2,3]]",
serde_json::to_string(&json_value).unwrap()
);
}
#[test]
pub fn test_from_arrow_array() {
fn test_serialize_binary_vector_with_null_to_json() {
let mut builder = BinaryVectorBuilder::with_capacity(4);
builder.push(Some(&[1, 2, 3]));
builder.push(None);
builder.push(Some(&[4, 5, 6]));
let vector = builder.finish();
let json_value = vector.serialize_to_json().unwrap();
assert_eq!(
"[[1,2,3],null,[4,5,6]]",
serde_json::to_string(&json_value).unwrap()
);
}
#[test]
fn test_from_arrow_array() {
let arrow_array = LargeBinaryArray::from_slice(&vec![vec![1, 2, 3], vec![1, 2, 3]]);
let original = arrow_array.clone();
let vector = BinaryVector::from(arrow_array);
assert_eq!(original, vector.array);
}
#[test]
fn test_binary_vector_build_get() {
let mut builder = BinaryVectorBuilder::with_capacity(4);
builder.push(Some(b"hello"));
builder.push(Some(b"happy"));
builder.push(Some(b"world"));
builder.push(None);
let vector = builder.finish();
assert_eq!(b"hello", vector.get_data(0).unwrap());
assert_eq!(None, vector.get_data(3));
let mut iter = vector.iter_data();
assert_eq!(b"hello", iter.next().unwrap().unwrap());
assert_eq!(b"happy", iter.next().unwrap().unwrap());
assert_eq!(b"world", iter.next().unwrap().unwrap());
assert_eq!(None, iter.next().unwrap());
assert_eq!(None, iter.next());
}
#[test]
fn test_binary_vector_validity() {
let mut builder = BinaryVectorBuilder::with_capacity(4);
builder.push(Some(b"hello"));
builder.push(Some(b"world"));
let vector = builder.finish();
assert_eq!(0, vector.null_count());
assert_eq!(Validity::AllValid, vector.validity());
let mut builder = BinaryVectorBuilder::with_capacity(3);
builder.push(Some(b"hello"));
builder.push(None);
builder.push(Some(b"world"));
let vector = builder.finish();
assert_eq!(1, vector.null_count());
let validity = vector.validity();
let slots = validity.slots().unwrap();
assert_eq!(1, slots.null_count());
assert!(!slots.get_bit(1));
}
}

View File

@@ -2,7 +2,7 @@ use std::any::Any;
use std::borrow::Borrow;
use std::sync::Arc;
use arrow::array::{ArrayRef, BooleanArray, MutableBooleanArray};
use arrow::array::{Array, ArrayRef, BooleanArray, MutableBooleanArray};
use arrow::bitmap::utils::{BitmapIter, ZipValidity};
use snafu::OptionExt;
use snafu::ResultExt;
@@ -13,7 +13,7 @@ use crate::scalars::{ScalarVector, ScalarVectorBuilder};
use crate::serialize::Serializable;
use crate::types::BooleanType;
use crate::vectors::impl_try_from_arrow_array_for_vector;
use crate::vectors::Vector;
use crate::vectors::{Validity, Vector};
/// Vector of boolean.
#[derive(Debug)]
@@ -67,6 +67,13 @@ impl Vector for BooleanVector {
fn to_arrow_array(&self) -> ArrayRef {
Arc::new(self.array.clone())
}
fn validity(&self) -> Validity {
match self.array.validity() {
Some(bitmap) => Validity::Slots(bitmap),
None => Validity::AllValid,
}
}
}
impl ScalarVector for BooleanVector {
@@ -75,7 +82,7 @@ impl ScalarVector for BooleanVector {
type Builder = BooleanVectorBuilder;
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
if idx < self.len() {
if self.array.is_valid(idx) {
Some(self.array.value(idx))
} else {
None
@@ -124,88 +131,92 @@ impl_try_from_arrow_array_for_vector!(BooleanArray, BooleanVector);
#[cfg(test)]
mod tests {
use serde::*;
use serde_json;
use super::*;
use crate::serialize::Serializable;
#[test]
pub fn test_serialize_boolean_vector_to_json() {
let vector = BooleanVector {
array: BooleanArray::from_slice(&vec![true, false, true, true, false, false]),
};
fn test_serialize_boolean_vector_to_json() {
let vector = BooleanVector::from(vec![true, false, true, true, false, false]);
let json_value = vector.serialize_to_json().unwrap();
let mut output = vec![];
let mut serializer = serde_json::ser::Serializer::new(&mut output);
json_value.serialize(&mut serializer).unwrap();
assert_eq!(
"[true,false,true,true,false,false]",
String::from_utf8_lossy(&output)
serde_json::to_string(&json_value).unwrap(),
);
}
#[test]
fn test_serialize_boolean_vector_with_null_to_json() {
let vector = BooleanVector::from(vec![Some(true), None, Some(false)]);
let json_value = vector.serialize_to_json().unwrap();
assert_eq!(
"[true,null,false]",
serde_json::to_string(&json_value).unwrap(),
);
}
#[test]
fn test_boolean_vector_from_vec() {
let vec = BooleanVector::from(vec![false, true, false, true]);
let input = vec![false, true, false, true];
let vec = BooleanVector::from(input.clone());
assert_eq!(4, vec.len());
for i in 0..4 {
assert_eq!(
i == 1 || i == 3,
vec.get_data(i).unwrap(),
"failed at {}",
i
)
for (i, v) in input.into_iter().enumerate() {
assert_eq!(Some(v), vec.get_data(i), "failed at {}", i)
}
}
#[test]
fn test_boolean_vector_from_iter() {
let v = vec![Some(false), Some(true), Some(false), Some(true)];
let vec = v.into_iter().collect::<BooleanVector>();
let input = vec![Some(false), Some(true), Some(false), Some(true)];
let vec = input.iter().collect::<BooleanVector>();
assert_eq!(4, vec.len());
for i in 0..3 {
assert_eq!(
i == 1 || i == 3,
vec.get_data(i).unwrap(),
"failed at {}",
i
)
for (i, v) in input.into_iter().enumerate() {
assert_eq!(v, vec.get_data(i), "failed at {}", i)
}
}
#[test]
fn test_boolean_vector_from_vec_option() {
let vec = BooleanVector::from(vec![Some(false), Some(true), None, Some(true)]);
let input = vec![Some(false), Some(true), None, Some(true)];
let vec = BooleanVector::from(input.clone());
assert_eq!(4, vec.len());
for i in 0..4 {
assert_eq!(
i == 1 || i == 3,
vec.get_data(i).unwrap(),
"failed at {}",
i
)
for (i, v) in input.into_iter().enumerate() {
assert_eq!(v, vec.get_data(i), "failed at {}", i)
}
}
#[test]
fn test_boolean_vector_builder() {
let mut builder = BooleanVectorBuilder::with_capacity(4);
builder.push(Some(false));
builder.push(Some(true));
builder.push(Some(false));
builder.push(Some(true));
fn test_boolean_vector_build_get() {
let input = [Some(true), None, Some(false)];
let mut builder = BooleanVectorBuilder::with_capacity(3);
for v in input {
builder.push(v);
}
let vector = builder.finish();
assert_eq!(input.len(), vector.len());
let vec = builder.finish();
let res: Vec<_> = vector.iter_data().collect();
assert_eq!(input, &res[..]);
assert_eq!(4, vec.len());
for i in 0..4 {
assert_eq!(
i == 1 || i == 3,
vec.get_data(i).unwrap(),
"failed at {}",
i
)
for (i, v) in input.into_iter().enumerate() {
assert_eq!(v, vector.get_data(i));
}
}
#[test]
fn test_boolean_vector_validity() {
let vector = BooleanVector::from(vec![Some(true), None, Some(false)]);
assert_eq!(1, vector.null_count());
let validity = vector.validity();
let slots = validity.slots().unwrap();
assert_eq!(1, slots.null_count());
assert!(!slots.get_bit(1));
let vector = BooleanVector::from(vec![true, false, false]);
assert_eq!(0, vector.null_count());
assert_eq!(Validity::AllValid, vector.validity());
}
}

View File

@@ -6,14 +6,13 @@ use arrow::array::ArrayRef;
use arrow::array::{Array, NullArray};
use arrow::datatypes::DataType as ArrowDataType;
use snafu::OptionExt;
use snafu::ResultExt;
use crate::data_type::ConcreteDataType;
use crate::error::{Result, SerializeSnafu};
use crate::error::Result;
use crate::serialize::Serializable;
use crate::types::NullType;
use crate::vectors::impl_try_from_arrow_array_for_vector;
use crate::vectors::Vector;
use crate::vectors::{Validity, Vector};
pub struct NullVector {
array: NullArray,
@@ -49,6 +48,10 @@ impl Vector for NullVector {
fn to_arrow_array(&self) -> ArrayRef {
Arc::new(self.array.clone())
}
fn validity(&self) -> Validity {
Validity::AllNull
}
}
impl fmt::Debug for NullVector {
@@ -57,14 +60,11 @@ impl fmt::Debug for NullVector {
}
}
const NULL_STR: &str = "NULL";
impl Serializable for NullVector {
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
vec![NULL_STR.to_owned(); self.len()]
.into_iter()
.map(serde_json::to_value)
.collect::<serde_json::Result<_>>()
.context(SerializeSnafu)
Ok(std::iter::repeat(serde_json::Value::Null)
.take(self.len())
.collect())
}
}
@@ -72,16 +72,16 @@ impl_try_from_arrow_array_for_vector!(NullArray, NullVector);
#[cfg(test)]
mod tests {
use serde_json::Value as JsonValue;
use serde_json;
use super::*;
#[test]
fn test_null_array() {
let null_arr = NullVector::new(32);
fn test_null_vector() {
let vector = NullVector::new(32);
assert_eq!(null_arr.len(), 32);
let arrow_arr = null_arr.to_arrow_array();
assert_eq!(vector.len(), 32);
let arrow_arr = vector.to_arrow_array();
assert_eq!(arrow_arr.null_count(), 32);
let array2 = arrow_arr.slice(8, 16);
@@ -90,21 +90,25 @@ mod tests {
}
#[test]
fn test_debug_null_array() {
fn test_debug_null_vector() {
let array = NullVector::new(1024 * 1024);
assert_eq!(format!("{:?}", array), "NullVector(1048576)");
}
#[test]
fn test_serialize_json() {
let null_vec = NullVector::new(3);
let vector = NullVector::new(3);
let json_value = vector.serialize_to_json().unwrap();
assert_eq!(
vec![
JsonValue::from(NULL_STR),
JsonValue::from(NULL_STR),
JsonValue::from(NULL_STR),
],
null_vec.serialize_to_json().unwrap()
"[null,null,null]",
serde_json::to_string(&json_value).unwrap()
);
}
#[test]
fn test_null_vector_validity() {
let vector = NullVector::new(5);
assert_eq!(Validity::AllNull, vector.validity());
assert_eq!(5, vector.null_count());
}
}

View File

@@ -3,7 +3,7 @@ use std::iter::FromIterator;
use std::slice::Iter;
use std::sync::Arc;
use arrow::array::{ArrayRef, MutablePrimitiveArray, PrimitiveArray};
use arrow::array::{Array, ArrayRef, MutablePrimitiveArray, PrimitiveArray};
use arrow::bitmap::utils::ZipValidity;
use serde_json::Value as JsonValue;
use snafu::{OptionExt, ResultExt};
@@ -14,7 +14,7 @@ use crate::error::{Result, SerializeSnafu};
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
use crate::serialize::Serializable;
use crate::types::{DataTypeBuilder, Primitive};
use crate::vectors::Vector;
use crate::vectors::{Validity, Vector};
/// Vector for primitive data types.
#[derive(Debug)]
@@ -73,6 +73,13 @@ impl<T: Primitive + DataTypeBuilder> Vector for PrimitiveVector<T> {
fn to_arrow_array(&self) -> ArrayRef {
Arc::new(self.array.clone())
}
fn validity(&self) -> Validity {
match self.array.validity() {
Some(bitmap) => Validity::Slots(bitmap),
None => Validity::AllValid,
}
}
}
impl<T: Primitive> From<PrimitiveArray<T>> for PrimitiveVector<T> {
@@ -89,21 +96,13 @@ impl<T: Primitive, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr> for Pr
}
}
impl<'a, T: Primitive> PrimitiveVector<T> {
/// implement iter for PrimitiveVector
#[inline]
pub fn iter(&'a self) -> std::slice::Iter<'a, T> {
self.array.values().iter()
}
}
impl<T: Primitive + DataTypeBuilder> ScalarVector for PrimitiveVector<T> {
type RefItem<'a> = T;
type Iter<'a> = PrimitiveIter<'a, T>;
type Builder = PrimitiveVectorBuilder<T>;
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
if idx < self.len() {
if self.array.is_valid(idx) {
Some(self.array.value(idx))
} else {
None
@@ -168,7 +167,7 @@ impl<T: Primitive + DataTypeBuilder> ScalarVectorBuilder for PrimitiveVectorBuil
impl<T: Primitive + DataTypeBuilder> Serializable for PrimitiveVector<T> {
fn serialize_to_json(&self) -> Result<Vec<JsonValue>> {
self.iter()
self.iter_data()
.map(serde_json::to_value)
.collect::<serde_json::Result<_>>()
.context(SerializeSnafu)
@@ -177,43 +176,92 @@ impl<T: Primitive + DataTypeBuilder> Serializable for PrimitiveVector<T> {
#[cfg(test)]
mod tests {
use serde_json;
use super::*;
use crate::serialize::Serializable;
fn assert_vec_eq(v: PrimitiveVector<i32>) {
assert_eq!(
vec![
JsonValue::from(1i32),
JsonValue::from(2i32),
JsonValue::from(3i32),
JsonValue::from(4i32)
],
v.serialize_to_json().unwrap()
);
fn check_vec(v: PrimitiveVector<i32>) {
let json_value = v.serialize_to_json().unwrap();
assert_eq!("[1,2,3,4]", serde_json::to_string(&json_value).unwrap(),);
}
#[test]
fn test_from_values() {
let v = PrimitiveVector::<i32>::from_values(vec![1, 2, 3, 4]);
assert_vec_eq(v);
check_vec(v);
}
#[test]
fn test_from_vec() {
let v = PrimitiveVector::<i32>::from_vec(vec![1, 2, 3, 4]);
assert_vec_eq(v);
check_vec(v);
}
#[test]
fn test_from_slice() {
let v = PrimitiveVector::<i32>::from_slice(vec![1, 2, 3, 4]);
assert_vec_eq(v);
check_vec(v);
}
#[test]
fn test_serialize_primitive_vector_with_null_to_json() {
let input = [Some(1i32), Some(2i32), None, Some(4i32), None];
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
for v in input {
builder.push(v);
}
let vector = builder.finish();
let json_value = vector.serialize_to_json().unwrap();
assert_eq!(
"[1,2,null,4,null]",
serde_json::to_string(&json_value).unwrap(),
);
}
#[test]
fn test_from_arrow_array() {
let arrow_array = PrimitiveArray::from_slice(vec![1, 2, 3, 4]);
let v = PrimitiveVector::from(arrow_array);
assert_vec_eq(v);
check_vec(v);
}
#[test]
fn test_primitive_vector_build_get() {
let input = [Some(1i32), Some(2i32), None, Some(4i32), None];
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
for v in input {
builder.push(v);
}
let vector = builder.finish();
assert_eq!(input.len(), vector.len());
for (i, v) in input.into_iter().enumerate() {
assert_eq!(v, vector.get_data(i));
}
let res: Vec<_> = vector.iter_data().collect();
assert_eq!(input, &res[..]);
}
#[test]
fn test_primitive_vector_validity() {
let input = [Some(1i32), Some(2i32), None, None];
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
for v in input {
builder.push(v);
}
let vector = builder.finish();
assert_eq!(2, vector.null_count());
let validity = vector.validity();
let slots = validity.slots().unwrap();
assert_eq!(2, slots.null_count());
assert!(!slots.get_bit(2));
assert!(!slots.get_bit(3));
let vector = PrimitiveVector::<i32>::from_slice(vec![1, 2, 3, 4]);
assert_eq!(0, vector.null_count());
assert_eq!(Validity::AllValid, vector.validity());
}
}

View File

@@ -1,7 +1,7 @@
use std::any::Any;
use std::sync::Arc;
use arrow::array::{ArrayRef, Utf8ValuesIter};
use arrow::array::{Array, ArrayRef, Utf8ValuesIter};
use arrow::bitmap::utils::ZipValidity;
use serde_json::Value;
use snafu::OptionExt;
@@ -10,7 +10,7 @@ use snafu::ResultExt;
use crate::arrow_array::{MutableStringArray, StringArray};
use crate::data_type::ConcreteDataType;
use crate::error::SerializeSnafu;
use crate::prelude::{ScalarVectorBuilder, Vector};
use crate::prelude::{ScalarVectorBuilder, Validity, Vector};
use crate::scalars::ScalarVector;
use crate::serialize::Serializable;
use crate::types::StringType;
@@ -44,6 +44,13 @@ impl Vector for StringVector {
fn to_arrow_array(&self) -> ArrayRef {
Arc::new(self.array.clone())
}
fn validity(&self) -> Validity {
match self.array.validity() {
Some(bitmap) => Validity::Slots(bitmap),
None => Validity::AllValid,
}
}
}
impl ScalarVector for StringVector {
@@ -52,9 +59,10 @@ impl ScalarVector for StringVector {
type Builder = StringVectorBuilder;
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
match idx < self.array.len() {
true => Some(self.array.value(idx)),
false => None,
if self.array.is_valid(idx) {
Some(self.array.value(idx))
} else {
None
}
}
@@ -89,8 +97,7 @@ impl ScalarVectorBuilder for StringVectorBuilder {
impl Serializable for StringVector {
fn serialize_to_json(&self) -> crate::error::Result<Vec<Value>> {
self.array
.iter()
self.iter_data()
.map(|v| match v {
None => Ok(serde_json::Value::Null),
Some(s) => serde_json::to_value(s),
@@ -104,31 +111,24 @@ impl_try_from_arrow_array_for_vector!(StringArray, StringVector);
#[cfg(test)]
mod tests {
use serde_json;
use super::*;
#[test]
pub fn test_serialize_string_vector() {
fn test_serialize_string_vector() {
let mut builder = StringVectorBuilder::with_capacity(3);
builder.push(Some("hello"));
builder.push(None);
builder.push(Some("world"));
let string_vector = builder.finish();
let serialized = serialize_to_json_string(string_vector.serialize_to_json().unwrap());
let serialized =
serde_json::to_string(&string_vector.serialize_to_json().unwrap()).unwrap();
assert_eq!(r#"["hello",null,"world"]"#, serialized);
}
pub fn serialize_to_json_string<T>(val: T) -> String
where
T: serde::Serialize,
{
let mut output = vec![];
let mut serializer = serde_json::Serializer::new(&mut output);
val.serialize(&mut serializer).unwrap();
String::from_utf8_lossy(&output).into()
}
#[test]
pub fn test_from_arrow_array() {
fn test_from_arrow_array() {
let mut builder = MutableStringArray::new();
builder.push(Some("A"));
builder.push(Some("B"));
@@ -138,7 +138,26 @@ mod tests {
let vector = StringVector::from(string_array);
assert_eq!(
r#"["A","B",null,"D"]"#,
serialize_to_json_string(vector.serialize_to_json().unwrap())
serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap(),
);
}
#[test]
fn test_string_vector_build_get() {
let mut builder = StringVectorBuilder::with_capacity(4);
builder.push(Some("hello"));
builder.push(None);
builder.push(Some("world"));
let vector = builder.finish();
assert_eq!(Some("hello"), vector.get_data(0));
assert_eq!(None, vector.get_data(1));
assert_eq!(Some("world"), vector.get_data(2));
let mut iter = vector.iter_data();
assert_eq!("hello", iter.next().unwrap().unwrap());
assert_eq!(None, iter.next().unwrap());
assert_eq!("world", iter.next().unwrap().unwrap());
assert_eq!(None, iter.next());
}
}