feat: RecordBatch serialization (#26)

This commit is contained in:
Lei, Huang
2022-05-17 17:01:00 +08:00
committed by GitHub
parent 3d374cce68
commit bd4fe1f5bc
7 changed files with 177 additions and 57 deletions

View File

@@ -11,6 +11,9 @@ pub enum Error {
source: serde_json::Error,
backtrace: Backtrace,
},
#[snafu(display("Failed to convert datafusion type: {}", from))]
Conversion { from: String, backtrace: Backtrace },
}
impl ErrorExt for Error {

View File

@@ -4,11 +4,20 @@ pub mod primitive;
use std::any::Any;
use std::sync::Arc;
use arrow::array::Array;
use arrow::array::ArrayRef;
use arrow::datatypes::DataType;
pub use binary::*;
use paste::paste;
pub use primitive::*;
use serde_json::Value;
use crate::data_type::DataTypeRef;
use crate::serialize::Serializable;
use crate::vectors::{
Float32Vector, Float64Vector, Int16Vector, Int32Vector, Int64Vector, Int8Vector, UInt16Vector,
UInt32Vector, UInt64Vector, UInt8Vector,
};
/// Vector of data values.
pub trait Vector: Send + Sync {
@@ -34,3 +43,99 @@ pub trait Vector: Send + Sync {
}
pub type VectorRef = Arc<dyn Vector>;
pub trait TryIntoVector {
fn try_into_vector(self) -> crate::error::Result<VectorRef>;
}
macro_rules! impl_try_into_vector_for_arrow_array {
( $($ty: expr),+ ) => {
paste! {
impl<A> TryIntoVector for A
where
A: AsRef<dyn Array>,
{
fn try_into_vector(self) -> Result<VectorRef, crate::error::Error> {
match self.as_ref().data_type() {
$(
DataType::$ty => Ok(Arc::new(<[<$ty Vector>]>::try_from_arrow_array(self.as_ref())?)),
)+
_ => {
unimplemented!()
}
}
}}
}
}
}
macro_rules! impl_arrow_array_serialize {
( $($ty: expr),+ ) => {
impl<A> Serializable for A
where
A: AsRef<dyn Array> + Send + Sync,
{
fn serialize_to_json(&self) -> crate::error::Result<Vec<Value>> {
paste! {
match self.as_ref().data_type() {
$(
DataType::$ty => <[<$ty Vector>]>::try_from_arrow_array(self.as_ref())?.serialize_to_json(),
)+
_ => {
unimplemented!()
}
}
}
}
}
};
}
// todo(hl): implement more type to vector conversion
impl_try_into_vector_for_arrow_array![
Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64
];
// todo(hl): implement serializations for more types
impl_arrow_array_serialize![
Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64
];
#[cfg(test)]
mod tests {
use arrow::array::{Array, PrimitiveArray};
use serde::Serialize;
use super::*;
use crate::types::DataTypeBuilder;
#[test]
pub fn test_df_columns_to_vector() {
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1, 2, 3]));
let vector = df_column.try_into_vector().unwrap();
assert_eq!(
i32::build_data_type().as_arrow_type(),
vector.data_type().as_arrow_type()
);
}
#[test]
pub fn test_serialize_i32_vector() {
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1, 2, 3]));
let json_value = df_column.serialize_to_json().unwrap();
let mut output = vec![];
let mut serializer = serde_json::ser::Serializer::new(&mut output);
json_value.serialize(&mut serializer).unwrap();
assert_eq!(b"[1,2,3]", output.as_slice());
}
#[test]
pub fn test_serialize_i8_vector() {
let df_column: Arc<dyn Array> = Arc::new(PrimitiveArray::from_slice(vec![1u8, 2u8, 3u8]));
let json_value = df_column.serialize_to_json().unwrap();
let mut output = vec![];
let mut serializer = serde_json::ser::Serializer::new(&mut output);
json_value.serialize(&mut serializer).unwrap();
assert_eq!(b"[1,2,3]", output.as_slice());
}
}

View File

@@ -2,12 +2,14 @@ use std::any::Any;
use std::slice::Iter;
use std::sync::Arc;
use arrow::array::{ArrayRef, MutablePrimitiveArray, PrimitiveArray};
use arrow::array::{Array, ArrayRef, MutablePrimitiveArray, PrimitiveArray};
use arrow::bitmap::utils::ZipValidity;
use serde_json::Value as JsonValue;
use snafu::ResultExt;
use snafu::{OptionExt, ResultExt};
use crate::data_type::DataTypeRef;
use crate::error;
use crate::error::ConversionSnafu;
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
use crate::types::{DataTypeBuilder, Primitive};
use crate::vectors::Vector;
@@ -47,6 +49,19 @@ impl<'a, T: Primitive> PrimitiveVector<T> {
pub fn iter(&'a self) -> std::slice::Iter<'a, T> {
self.array.values().iter()
}
/// Convert an Arrow array to PrimitiveVector.
pub fn try_from_arrow_array(array: &dyn Array) -> Result<Self, error::Error> {
Ok(Self::new(
array
.as_any()
.downcast_ref::<PrimitiveArray<T>>()
.with_context(|| ConversionSnafu {
from: format!("{:?}", array.data_type()),
})?
.clone(),
))
}
}
impl<T: Primitive + DataTypeBuilder> ScalarVector for PrimitiveVector<T> {