mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-21 23:40:38 +00:00
feat: Implements validity() and null_count() for Vector (#38)
* feat: Add validity() to Vector * test(datatypes): Add more tests and fix get_data() not returns None for null
This commit is contained in:
@@ -24,6 +24,6 @@ version="0.10"
|
||||
features = ["io_csv", "io_json", "io_parquet", "io_parquet_compression", "io_ipc", "ahash", "compute", "serde_types"]
|
||||
|
||||
[dev-dependencies]
|
||||
serde_json = "1.0.81"
|
||||
serde_json = "1.0"
|
||||
tokio = { version = "1.18", features = ["full"] }
|
||||
|
||||
|
||||
@@ -14,5 +14,5 @@ common-error = { path = "../common/error" }
|
||||
enum_dispatch = "0.3"
|
||||
paste = "1.0"
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = "1.0.79"
|
||||
serde_json = "1.0"
|
||||
snafu = { version = "0.7", features = ["backtraces"] }
|
||||
|
||||
@@ -2,4 +2,4 @@ pub use crate::data_type::{ConcreteDataType, DataType, DataTypeRef};
|
||||
pub use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
pub use crate::type_id::LogicalTypeId;
|
||||
pub use crate::value::Value;
|
||||
pub use crate::vectors::{Vector, VectorRef};
|
||||
pub use crate::vectors::{Validity, Vector, VectorRef};
|
||||
|
||||
@@ -20,6 +20,9 @@ pub trait ScalarVector: Vector {
|
||||
/// Returns the reference to an element at given position.
|
||||
///
|
||||
/// Note: `get()` has bad performance, avoid call this function inside loop.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if `idx >= self.len()`.
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>>;
|
||||
|
||||
/// Returns iterator of current vector.
|
||||
|
||||
@@ -8,6 +8,7 @@ use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::ArrayRef;
|
||||
use arrow::bitmap::Bitmap;
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
pub use binary::*;
|
||||
pub use boolean::*;
|
||||
@@ -24,6 +25,25 @@ pub use crate::vectors::{
|
||||
UInt8Vector,
|
||||
};
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Validity<'a> {
|
||||
/// Whether the array slot is valid or not (null).
|
||||
Slots(&'a Bitmap),
|
||||
/// All slots are valid.
|
||||
AllValid,
|
||||
/// All slots are null.
|
||||
AllNull,
|
||||
}
|
||||
|
||||
impl<'a> Validity<'a> {
|
||||
pub fn slots(&self) -> Option<&Bitmap> {
|
||||
match self {
|
||||
Validity::Slots(bitmap) => Some(bitmap),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Vector of data values.
|
||||
pub trait Vector: Send + Sync + Serializable {
|
||||
/// Returns the data type of the vector.
|
||||
@@ -45,6 +65,20 @@ pub trait Vector: Send + Sync + Serializable {
|
||||
|
||||
/// Convert this vector to a new arrow [ArrayRef].
|
||||
fn to_arrow_array(&self) -> ArrayRef;
|
||||
|
||||
/// Returns the validity of the Array.
|
||||
fn validity(&self) -> Validity;
|
||||
|
||||
/// The number of null slots on this [`Vector`].
|
||||
/// # Implementation
|
||||
/// This is `O(1)`.
|
||||
fn null_count(&self) -> usize {
|
||||
match self.validity() {
|
||||
Validity::Slots(bitmap) => bitmap.null_count(),
|
||||
Validity::AllValid => 0,
|
||||
Validity::AllNull => self.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type VectorRef = Arc<dyn Vector>;
|
||||
@@ -101,16 +135,16 @@ macro_rules! impl_try_from_arrow_array_for_vector {
|
||||
pub(crate) use impl_try_from_arrow_array_for_vector;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
pub mod tests {
|
||||
use arrow::array::{Array, PrimitiveArray};
|
||||
use serde::Serialize;
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::data_type::DataType;
|
||||
use crate::types::DataTypeBuilder;
|
||||
|
||||
#[test]
|
||||
pub fn test_df_columns_to_vector() {
|
||||
fn test_df_columns_to_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1, 2, 3]));
|
||||
let vector = try_into_vector(df_column).unwrap();
|
||||
assert_eq!(
|
||||
@@ -120,28 +154,22 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_serialize_i32_vector() {
|
||||
fn test_serialize_i32_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::<i32>::from_slice(vec![1, 2, 3]));
|
||||
let json_value = try_into_vector(df_column)
|
||||
.unwrap()
|
||||
.serialize_to_json()
|
||||
.unwrap();
|
||||
let mut output = vec![];
|
||||
let mut serializer = serde_json::ser::Serializer::new(&mut output);
|
||||
json_value.serialize(&mut serializer).unwrap();
|
||||
assert_eq!(b"[1,2,3]", output.as_slice());
|
||||
assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_serialize_i8_vector() {
|
||||
fn test_serialize_i8_vector() {
|
||||
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1u8, 2u8, 3u8]));
|
||||
let json_value = try_into_vector(df_column)
|
||||
.unwrap()
|
||||
.serialize_to_json()
|
||||
.unwrap();
|
||||
let mut output = vec![];
|
||||
let mut serializer = serde_json::ser::Serializer::new(&mut output);
|
||||
json_value.serialize(&mut serializer).unwrap();
|
||||
assert_eq!(b"[1,2,3]", output.as_slice());
|
||||
assert_eq!("[1,2,3]", serde_json::to_string(&json_value).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::BinaryValueIter;
|
||||
use arrow::array::{ArrayRef, BinaryArray};
|
||||
use arrow::array::{Array, ArrayRef, BinaryArray};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use snafu::OptionExt;
|
||||
use snafu::ResultExt;
|
||||
@@ -15,7 +15,7 @@ use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::BinaryType;
|
||||
use crate::vectors::impl_try_from_arrow_array_for_vector;
|
||||
use crate::vectors::Vector;
|
||||
use crate::vectors::{Validity, Vector};
|
||||
|
||||
/// Vector of binary strings.
|
||||
#[derive(Debug)]
|
||||
@@ -45,6 +45,13 @@ impl Vector for BinaryVector {
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
match self.array.validity() {
|
||||
Some(bitmap) => Validity::Slots(bitmap),
|
||||
None => Validity::AllValid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for BinaryVector {
|
||||
@@ -53,7 +60,7 @@ impl ScalarVector for BinaryVector {
|
||||
type Builder = BinaryVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if idx < self.len() {
|
||||
if self.array.is_valid(idx) {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
@@ -91,8 +98,7 @@ impl ScalarVectorBuilder for BinaryVectorBuilder {
|
||||
|
||||
impl Serializable for BinaryVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
self.array
|
||||
.iter()
|
||||
self.iter_data()
|
||||
.map(|v| match v {
|
||||
None => Ok(serde_json::Value::Null), // if binary vector not present, map to NULL
|
||||
Some(vec) => serde_json::to_value(vec),
|
||||
@@ -106,30 +112,87 @@ impl_try_from_arrow_array_for_vector!(LargeBinaryArray, BinaryVector);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde::*;
|
||||
use serde_json;
|
||||
|
||||
use super::BinaryVector;
|
||||
use super::*;
|
||||
use crate::arrow_array::LargeBinaryArray;
|
||||
use crate::serialize::Serializable;
|
||||
|
||||
#[test]
|
||||
pub fn test_serialize_binary_vector_to_json() {
|
||||
let vector = BinaryVector {
|
||||
array: LargeBinaryArray::from_slice(&vec![vec![1, 2, 3], vec![1, 2, 3]]),
|
||||
};
|
||||
fn test_serialize_binary_vector_to_json() {
|
||||
let vector = BinaryVector::from(LargeBinaryArray::from_slice(&vec![
|
||||
vec![1, 2, 3],
|
||||
vec![1, 2, 3],
|
||||
]));
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
let mut output = vec![];
|
||||
let mut serializer = serde_json::ser::Serializer::new(&mut output);
|
||||
json_value.serialize(&mut serializer).unwrap();
|
||||
assert_eq!("[[1,2,3],[1,2,3]]", String::from_utf8_lossy(&output));
|
||||
assert_eq!(
|
||||
"[[1,2,3],[1,2,3]]",
|
||||
serde_json::to_string(&json_value).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_from_arrow_array() {
|
||||
fn test_serialize_binary_vector_with_null_to_json() {
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(4);
|
||||
builder.push(Some(&[1, 2, 3]));
|
||||
builder.push(None);
|
||||
builder.push(Some(&[4, 5, 6]));
|
||||
let vector = builder.finish();
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
"[[1,2,3],null,[4,5,6]]",
|
||||
serde_json::to_string(&json_value).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let arrow_array = LargeBinaryArray::from_slice(&vec![vec![1, 2, 3], vec![1, 2, 3]]);
|
||||
let original = arrow_array.clone();
|
||||
let vector = BinaryVector::from(arrow_array);
|
||||
assert_eq!(original, vector.array);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binary_vector_build_get() {
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(4);
|
||||
builder.push(Some(b"hello"));
|
||||
builder.push(Some(b"happy"));
|
||||
builder.push(Some(b"world"));
|
||||
builder.push(None);
|
||||
|
||||
let vector = builder.finish();
|
||||
assert_eq!(b"hello", vector.get_data(0).unwrap());
|
||||
assert_eq!(None, vector.get_data(3));
|
||||
|
||||
let mut iter = vector.iter_data();
|
||||
assert_eq!(b"hello", iter.next().unwrap().unwrap());
|
||||
assert_eq!(b"happy", iter.next().unwrap().unwrap());
|
||||
assert_eq!(b"world", iter.next().unwrap().unwrap());
|
||||
assert_eq!(None, iter.next().unwrap());
|
||||
assert_eq!(None, iter.next());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_binary_vector_validity() {
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(4);
|
||||
builder.push(Some(b"hello"));
|
||||
builder.push(Some(b"world"));
|
||||
let vector = builder.finish();
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert_eq!(Validity::AllValid, vector.validity());
|
||||
|
||||
let mut builder = BinaryVectorBuilder::with_capacity(3);
|
||||
builder.push(Some(b"hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some(b"world"));
|
||||
let vector = builder.finish();
|
||||
assert_eq!(1, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
let slots = validity.slots().unwrap();
|
||||
assert_eq!(1, slots.null_count());
|
||||
assert!(!slots.get_bit(1));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::any::Any;
|
||||
use std::borrow::Borrow;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{ArrayRef, BooleanArray, MutableBooleanArray};
|
||||
use arrow::array::{Array, ArrayRef, BooleanArray, MutableBooleanArray};
|
||||
use arrow::bitmap::utils::{BitmapIter, ZipValidity};
|
||||
use snafu::OptionExt;
|
||||
use snafu::ResultExt;
|
||||
@@ -13,7 +13,7 @@ use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::BooleanType;
|
||||
use crate::vectors::impl_try_from_arrow_array_for_vector;
|
||||
use crate::vectors::Vector;
|
||||
use crate::vectors::{Validity, Vector};
|
||||
|
||||
/// Vector of boolean.
|
||||
#[derive(Debug)]
|
||||
@@ -67,6 +67,13 @@ impl Vector for BooleanVector {
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
match self.array.validity() {
|
||||
Some(bitmap) => Validity::Slots(bitmap),
|
||||
None => Validity::AllValid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for BooleanVector {
|
||||
@@ -75,7 +82,7 @@ impl ScalarVector for BooleanVector {
|
||||
type Builder = BooleanVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if idx < self.len() {
|
||||
if self.array.is_valid(idx) {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
@@ -124,88 +131,92 @@ impl_try_from_arrow_array_for_vector!(BooleanArray, BooleanVector);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde::*;
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::serialize::Serializable;
|
||||
|
||||
#[test]
|
||||
pub fn test_serialize_boolean_vector_to_json() {
|
||||
let vector = BooleanVector {
|
||||
array: BooleanArray::from_slice(&vec![true, false, true, true, false, false]),
|
||||
};
|
||||
fn test_serialize_boolean_vector_to_json() {
|
||||
let vector = BooleanVector::from(vec![true, false, true, true, false, false]);
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
let mut output = vec![];
|
||||
let mut serializer = serde_json::ser::Serializer::new(&mut output);
|
||||
json_value.serialize(&mut serializer).unwrap();
|
||||
assert_eq!(
|
||||
"[true,false,true,true,false,false]",
|
||||
String::from_utf8_lossy(&output)
|
||||
serde_json::to_string(&json_value).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_boolean_vector_with_null_to_json() {
|
||||
let vector = BooleanVector::from(vec![Some(true), None, Some(false)]);
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
"[true,null,false]",
|
||||
serde_json::to_string(&json_value).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_from_vec() {
|
||||
let vec = BooleanVector::from(vec![false, true, false, true]);
|
||||
let input = vec![false, true, false, true];
|
||||
let vec = BooleanVector::from(input.clone());
|
||||
assert_eq!(4, vec.len());
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
i == 1 || i == 3,
|
||||
vec.get_data(i).unwrap(),
|
||||
"failed at {}",
|
||||
i
|
||||
)
|
||||
for (i, v) in input.into_iter().enumerate() {
|
||||
assert_eq!(Some(v), vec.get_data(i), "failed at {}", i)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_from_iter() {
|
||||
let v = vec![Some(false), Some(true), Some(false), Some(true)];
|
||||
let vec = v.into_iter().collect::<BooleanVector>();
|
||||
let input = vec![Some(false), Some(true), Some(false), Some(true)];
|
||||
let vec = input.iter().collect::<BooleanVector>();
|
||||
assert_eq!(4, vec.len());
|
||||
for i in 0..3 {
|
||||
assert_eq!(
|
||||
i == 1 || i == 3,
|
||||
vec.get_data(i).unwrap(),
|
||||
"failed at {}",
|
||||
i
|
||||
)
|
||||
for (i, v) in input.into_iter().enumerate() {
|
||||
assert_eq!(v, vec.get_data(i), "failed at {}", i)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_from_vec_option() {
|
||||
let vec = BooleanVector::from(vec![Some(false), Some(true), None, Some(true)]);
|
||||
let input = vec![Some(false), Some(true), None, Some(true)];
|
||||
let vec = BooleanVector::from(input.clone());
|
||||
assert_eq!(4, vec.len());
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
i == 1 || i == 3,
|
||||
vec.get_data(i).unwrap(),
|
||||
"failed at {}",
|
||||
i
|
||||
)
|
||||
for (i, v) in input.into_iter().enumerate() {
|
||||
assert_eq!(v, vec.get_data(i), "failed at {}", i)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_builder() {
|
||||
let mut builder = BooleanVectorBuilder::with_capacity(4);
|
||||
builder.push(Some(false));
|
||||
builder.push(Some(true));
|
||||
builder.push(Some(false));
|
||||
builder.push(Some(true));
|
||||
fn test_boolean_vector_build_get() {
|
||||
let input = [Some(true), None, Some(false)];
|
||||
let mut builder = BooleanVectorBuilder::with_capacity(3);
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let vector = builder.finish();
|
||||
assert_eq!(input.len(), vector.len());
|
||||
|
||||
let vec = builder.finish();
|
||||
let res: Vec<_> = vector.iter_data().collect();
|
||||
assert_eq!(input, &res[..]);
|
||||
|
||||
assert_eq!(4, vec.len());
|
||||
for i in 0..4 {
|
||||
assert_eq!(
|
||||
i == 1 || i == 3,
|
||||
vec.get_data(i).unwrap(),
|
||||
"failed at {}",
|
||||
i
|
||||
)
|
||||
for (i, v) in input.into_iter().enumerate() {
|
||||
assert_eq!(v, vector.get_data(i));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boolean_vector_validity() {
|
||||
let vector = BooleanVector::from(vec![Some(true), None, Some(false)]);
|
||||
assert_eq!(1, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
let slots = validity.slots().unwrap();
|
||||
assert_eq!(1, slots.null_count());
|
||||
assert!(!slots.get_bit(1));
|
||||
|
||||
let vector = BooleanVector::from(vec![true, false, false]);
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert_eq!(Validity::AllValid, vector.validity());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,14 +6,13 @@ use arrow::array::ArrayRef;
|
||||
use arrow::array::{Array, NullArray};
|
||||
use arrow::datatypes::DataType as ArrowDataType;
|
||||
use snafu::OptionExt;
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::{Result, SerializeSnafu};
|
||||
use crate::error::Result;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::NullType;
|
||||
use crate::vectors::impl_try_from_arrow_array_for_vector;
|
||||
use crate::vectors::Vector;
|
||||
use crate::vectors::{Validity, Vector};
|
||||
|
||||
pub struct NullVector {
|
||||
array: NullArray,
|
||||
@@ -49,6 +48,10 @@ impl Vector for NullVector {
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
Validity::AllNull
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for NullVector {
|
||||
@@ -57,14 +60,11 @@ impl fmt::Debug for NullVector {
|
||||
}
|
||||
}
|
||||
|
||||
const NULL_STR: &str = "NULL";
|
||||
impl Serializable for NullVector {
|
||||
fn serialize_to_json(&self) -> Result<Vec<serde_json::Value>> {
|
||||
vec![NULL_STR.to_owned(); self.len()]
|
||||
.into_iter()
|
||||
.map(serde_json::to_value)
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(SerializeSnafu)
|
||||
Ok(std::iter::repeat(serde_json::Value::Null)
|
||||
.take(self.len())
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,16 +72,16 @@ impl_try_from_arrow_array_for_vector!(NullArray, NullVector);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json::Value as JsonValue;
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_null_array() {
|
||||
let null_arr = NullVector::new(32);
|
||||
fn test_null_vector() {
|
||||
let vector = NullVector::new(32);
|
||||
|
||||
assert_eq!(null_arr.len(), 32);
|
||||
let arrow_arr = null_arr.to_arrow_array();
|
||||
assert_eq!(vector.len(), 32);
|
||||
let arrow_arr = vector.to_arrow_array();
|
||||
assert_eq!(arrow_arr.null_count(), 32);
|
||||
|
||||
let array2 = arrow_arr.slice(8, 16);
|
||||
@@ -90,21 +90,25 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_debug_null_array() {
|
||||
fn test_debug_null_vector() {
|
||||
let array = NullVector::new(1024 * 1024);
|
||||
assert_eq!(format!("{:?}", array), "NullVector(1048576)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_json() {
|
||||
let null_vec = NullVector::new(3);
|
||||
let vector = NullVector::new(3);
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
vec![
|
||||
JsonValue::from(NULL_STR),
|
||||
JsonValue::from(NULL_STR),
|
||||
JsonValue::from(NULL_STR),
|
||||
],
|
||||
null_vec.serialize_to_json().unwrap()
|
||||
"[null,null,null]",
|
||||
serde_json::to_string(&json_value).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_vector_validity() {
|
||||
let vector = NullVector::new(5);
|
||||
assert_eq!(Validity::AllNull, vector.validity());
|
||||
assert_eq!(5, vector.null_count());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::iter::FromIterator;
|
||||
use std::slice::Iter;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{ArrayRef, MutablePrimitiveArray, PrimitiveArray};
|
||||
use arrow::array::{Array, ArrayRef, MutablePrimitiveArray, PrimitiveArray};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use serde_json::Value as JsonValue;
|
||||
use snafu::{OptionExt, ResultExt};
|
||||
@@ -14,7 +14,7 @@ use crate::error::{Result, SerializeSnafu};
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::{DataTypeBuilder, Primitive};
|
||||
use crate::vectors::Vector;
|
||||
use crate::vectors::{Validity, Vector};
|
||||
|
||||
/// Vector for primitive data types.
|
||||
#[derive(Debug)]
|
||||
@@ -73,6 +73,13 @@ impl<T: Primitive + DataTypeBuilder> Vector for PrimitiveVector<T> {
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
match self.array.validity() {
|
||||
Some(bitmap) => Validity::Slots(bitmap),
|
||||
None => Validity::AllValid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive> From<PrimitiveArray<T>> for PrimitiveVector<T> {
|
||||
@@ -89,21 +96,13 @@ impl<T: Primitive, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr> for Pr
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Primitive> PrimitiveVector<T> {
|
||||
/// implement iter for PrimitiveVector
|
||||
#[inline]
|
||||
pub fn iter(&'a self) -> std::slice::Iter<'a, T> {
|
||||
self.array.values().iter()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive + DataTypeBuilder> ScalarVector for PrimitiveVector<T> {
|
||||
type RefItem<'a> = T;
|
||||
type Iter<'a> = PrimitiveIter<'a, T>;
|
||||
type Builder = PrimitiveVectorBuilder<T>;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if idx < self.len() {
|
||||
if self.array.is_valid(idx) {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
@@ -168,7 +167,7 @@ impl<T: Primitive + DataTypeBuilder> ScalarVectorBuilder for PrimitiveVectorBuil
|
||||
|
||||
impl<T: Primitive + DataTypeBuilder> Serializable for PrimitiveVector<T> {
|
||||
fn serialize_to_json(&self) -> Result<Vec<JsonValue>> {
|
||||
self.iter()
|
||||
self.iter_data()
|
||||
.map(serde_json::to_value)
|
||||
.collect::<serde_json::Result<_>>()
|
||||
.context(SerializeSnafu)
|
||||
@@ -177,43 +176,92 @@ impl<T: Primitive + DataTypeBuilder> Serializable for PrimitiveVector<T> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
use crate::serialize::Serializable;
|
||||
|
||||
fn assert_vec_eq(v: PrimitiveVector<i32>) {
|
||||
assert_eq!(
|
||||
vec![
|
||||
JsonValue::from(1i32),
|
||||
JsonValue::from(2i32),
|
||||
JsonValue::from(3i32),
|
||||
JsonValue::from(4i32)
|
||||
],
|
||||
v.serialize_to_json().unwrap()
|
||||
);
|
||||
fn check_vec(v: PrimitiveVector<i32>) {
|
||||
let json_value = v.serialize_to_json().unwrap();
|
||||
assert_eq!("[1,2,3,4]", serde_json::to_string(&json_value).unwrap(),);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_values() {
|
||||
let v = PrimitiveVector::<i32>::from_values(vec![1, 2, 3, 4]);
|
||||
assert_vec_eq(v);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_vec() {
|
||||
let v = PrimitiveVector::<i32>::from_vec(vec![1, 2, 3, 4]);
|
||||
assert_vec_eq(v);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_slice() {
|
||||
let v = PrimitiveVector::<i32>::from_slice(vec![1, 2, 3, 4]);
|
||||
assert_vec_eq(v);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_primitive_vector_with_null_to_json() {
|
||||
let input = [Some(1i32), Some(2i32), None, Some(4i32), None];
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let vector = builder.finish();
|
||||
|
||||
let json_value = vector.serialize_to_json().unwrap();
|
||||
assert_eq!(
|
||||
"[1,2,null,4,null]",
|
||||
serde_json::to_string(&json_value).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_arrow_array() {
|
||||
let arrow_array = PrimitiveArray::from_slice(vec![1, 2, 3, 4]);
|
||||
let v = PrimitiveVector::from(arrow_array);
|
||||
assert_vec_eq(v);
|
||||
check_vec(v);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_primitive_vector_build_get() {
|
||||
let input = [Some(1i32), Some(2i32), None, Some(4i32), None];
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let vector = builder.finish();
|
||||
assert_eq!(input.len(), vector.len());
|
||||
|
||||
for (i, v) in input.into_iter().enumerate() {
|
||||
assert_eq!(v, vector.get_data(i));
|
||||
}
|
||||
|
||||
let res: Vec<_> = vector.iter_data().collect();
|
||||
assert_eq!(input, &res[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_primitive_vector_validity() {
|
||||
let input = [Some(1i32), Some(2i32), None, None];
|
||||
let mut builder = PrimitiveVectorBuilder::with_capacity(input.len());
|
||||
for v in input {
|
||||
builder.push(v);
|
||||
}
|
||||
let vector = builder.finish();
|
||||
assert_eq!(2, vector.null_count());
|
||||
let validity = vector.validity();
|
||||
let slots = validity.slots().unwrap();
|
||||
assert_eq!(2, slots.null_count());
|
||||
assert!(!slots.get_bit(2));
|
||||
assert!(!slots.get_bit(3));
|
||||
|
||||
let vector = PrimitiveVector::<i32>::from_slice(vec![1, 2, 3, 4]);
|
||||
assert_eq!(0, vector.null_count());
|
||||
assert_eq!(Validity::AllValid, vector.validity());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::array::{ArrayRef, Utf8ValuesIter};
|
||||
use arrow::array::{Array, ArrayRef, Utf8ValuesIter};
|
||||
use arrow::bitmap::utils::ZipValidity;
|
||||
use serde_json::Value;
|
||||
use snafu::OptionExt;
|
||||
@@ -10,7 +10,7 @@ use snafu::ResultExt;
|
||||
use crate::arrow_array::{MutableStringArray, StringArray};
|
||||
use crate::data_type::ConcreteDataType;
|
||||
use crate::error::SerializeSnafu;
|
||||
use crate::prelude::{ScalarVectorBuilder, Vector};
|
||||
use crate::prelude::{ScalarVectorBuilder, Validity, Vector};
|
||||
use crate::scalars::ScalarVector;
|
||||
use crate::serialize::Serializable;
|
||||
use crate::types::StringType;
|
||||
@@ -44,6 +44,13 @@ impl Vector for StringVector {
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
|
||||
fn validity(&self) -> Validity {
|
||||
match self.array.validity() {
|
||||
Some(bitmap) => Validity::Slots(bitmap),
|
||||
None => Validity::AllValid,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for StringVector {
|
||||
@@ -52,9 +59,10 @@ impl ScalarVector for StringVector {
|
||||
type Builder = StringVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
match idx < self.array.len() {
|
||||
true => Some(self.array.value(idx)),
|
||||
false => None,
|
||||
if self.array.is_valid(idx) {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,8 +97,7 @@ impl ScalarVectorBuilder for StringVectorBuilder {
|
||||
|
||||
impl Serializable for StringVector {
|
||||
fn serialize_to_json(&self) -> crate::error::Result<Vec<Value>> {
|
||||
self.array
|
||||
.iter()
|
||||
self.iter_data()
|
||||
.map(|v| match v {
|
||||
None => Ok(serde_json::Value::Null),
|
||||
Some(s) => serde_json::to_value(s),
|
||||
@@ -104,31 +111,24 @@ impl_try_from_arrow_array_for_vector!(StringArray, StringVector);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
pub fn test_serialize_string_vector() {
|
||||
fn test_serialize_string_vector() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(3);
|
||||
builder.push(Some("hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some("world"));
|
||||
let string_vector = builder.finish();
|
||||
let serialized = serialize_to_json_string(string_vector.serialize_to_json().unwrap());
|
||||
let serialized =
|
||||
serde_json::to_string(&string_vector.serialize_to_json().unwrap()).unwrap();
|
||||
assert_eq!(r#"["hello",null,"world"]"#, serialized);
|
||||
}
|
||||
|
||||
pub fn serialize_to_json_string<T>(val: T) -> String
|
||||
where
|
||||
T: serde::Serialize,
|
||||
{
|
||||
let mut output = vec![];
|
||||
let mut serializer = serde_json::Serializer::new(&mut output);
|
||||
val.serialize(&mut serializer).unwrap();
|
||||
String::from_utf8_lossy(&output).into()
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_from_arrow_array() {
|
||||
fn test_from_arrow_array() {
|
||||
let mut builder = MutableStringArray::new();
|
||||
builder.push(Some("A"));
|
||||
builder.push(Some("B"));
|
||||
@@ -138,7 +138,26 @@ mod tests {
|
||||
let vector = StringVector::from(string_array);
|
||||
assert_eq!(
|
||||
r#"["A","B",null,"D"]"#,
|
||||
serialize_to_json_string(vector.serialize_to_json().unwrap())
|
||||
serde_json::to_string(&vector.serialize_to_json().unwrap()).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_string_vector_build_get() {
|
||||
let mut builder = StringVectorBuilder::with_capacity(4);
|
||||
builder.push(Some("hello"));
|
||||
builder.push(None);
|
||||
builder.push(Some("world"));
|
||||
let vector = builder.finish();
|
||||
|
||||
assert_eq!(Some("hello"), vector.get_data(0));
|
||||
assert_eq!(None, vector.get_data(1));
|
||||
assert_eq!(Some("world"), vector.get_data(2));
|
||||
|
||||
let mut iter = vector.iter_data();
|
||||
assert_eq!("hello", iter.next().unwrap().unwrap());
|
||||
assert_eq!(None, iter.next().unwrap());
|
||||
assert_eq!("world", iter.next().unwrap().unwrap());
|
||||
assert_eq!(None, iter.next());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user