From e75a54b76690f8c295b3d6da60da636ac4f67191 Mon Sep 17 00:00:00 2001 From: "Lei, Huang" Date: Thu, 19 May 2022 16:10:00 +0800 Subject: [PATCH] feat: impl From arrow array for exsisting vectors (#32) * feat: impl From arrow array for exsisting vectors * fix: review comments * feat: clippy forbid prints --- .github/workflows/develop.yml | 2 +- src/datatypes/src/vectors/binary.rs | 16 ++++++++++- src/datatypes/src/vectors/primitive.rs | 40 ++++++++++++++++++++++++++ src/datatypes/src/vectors/string.rs | 23 ++++++++++++++- 4 files changed, 78 insertions(+), 3 deletions(-) diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index a2a3505cf4..2c94d17189 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -70,4 +70,4 @@ jobs: - uses: actions-rs/cargo@v1 with: command: clippy - args: --workspace --all-targets -- -D warnings + args: --workspace --all-targets -- -D warnings -D clippy::print_stdout -D clippy::print_stderr diff --git a/src/datatypes/src/vectors/binary.rs b/src/datatypes/src/vectors/binary.rs index 847e4123d2..81861faaf8 100644 --- a/src/datatypes/src/vectors/binary.rs +++ b/src/datatypes/src/vectors/binary.rs @@ -1,8 +1,8 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::ArrayRef; use arrow::array::BinaryValueIter; +use arrow::array::{ArrayRef, BinaryArray}; use arrow::bitmap::utils::ZipValidity; use snafu::ResultExt; @@ -21,6 +21,12 @@ pub struct BinaryVector { array: LargeBinaryArray, } +impl From> for BinaryVector { + fn from(array: BinaryArray) -> Self { + Self { array } + } +} + impl Vector for BinaryVector { fn data_type(&self) -> DataTypeRef { BinaryType::arc() @@ -114,4 +120,12 @@ mod tests { json_value.serialize(&mut serializer).unwrap(); assert_eq!("[[1,2,3],[1,2,3]]", String::from_utf8_lossy(&output)); } + + #[test] + pub fn test_from_arrow_array() { + let arrow_array = LargeBinaryArray::from_slice(&vec![vec![1, 2, 3], vec![1, 2, 3]]); + let original = arrow_array.clone(); + let vector = BinaryVector::from(arrow_array); + assert_eq!(original, vector.array); + } } diff --git a/src/datatypes/src/vectors/primitive.rs b/src/datatypes/src/vectors/primitive.rs index 37b7f8f0a4..6b3ca69057 100644 --- a/src/datatypes/src/vectors/primitive.rs +++ b/src/datatypes/src/vectors/primitive.rs @@ -15,6 +15,7 @@ use crate::types::{DataTypeBuilder, Primitive}; use crate::vectors::Vector; /// Vector for primitive data types. +#[derive(Debug)] pub struct PrimitiveVector { array: PrimitiveArray, } @@ -84,6 +85,24 @@ impl ScalarVector for PrimitiveVector { } } +/// #Panics +/// All arrow primitive types should have a corresponding PrimitiveVector +/// todo(hl): DaysMsArray/MonthsDaysNsArray primitive type +impl From> for PrimitiveVector { + fn from(arrow_array: PrimitiveArray) -> Self { + Self::new( + arrow_array + .as_any() + .downcast_ref::>() + .with_context(|| ConversionSnafu { + from: format!("{:?}", arrow_array.data_type()), + }) + .unwrap() + .clone(), + ) + } +} + pub type UInt8Vector = PrimitiveVector; pub type UInt16Vector = PrimitiveVector; pub type UInt32Vector = PrimitiveVector; @@ -156,3 +175,24 @@ impl_serializable! { Int32Vector } impl_serializable! { Int64Vector } impl_serializable! { Float32Vector } impl_serializable! { Float64Vector } + +#[cfg(test)] +mod tests { + use super::*; + use crate::serialize::Serializable; + + #[test] + pub fn test_from_arrow_array() { + let arrow_array = PrimitiveArray::from_slice(vec![1, 2, 3, 4]); + let vector = PrimitiveVector::from(arrow_array); + assert_eq!( + vec![ + JsonValue::from(1), + JsonValue::from(2), + JsonValue::from(3), + JsonValue::from(4) + ], + vector.serialize_to_json().unwrap() + ); + } +} diff --git a/src/datatypes/src/vectors/string.rs b/src/datatypes/src/vectors/string.rs index d57b2f58c6..368d4a8c60 100644 --- a/src/datatypes/src/vectors/string.rs +++ b/src/datatypes/src/vectors/string.rs @@ -15,11 +15,17 @@ use crate::serialize::Serializable; use crate::types::StringType; /// String array wrapper -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct StringVector { array: StringArray, } +impl From for StringVector { + fn from(array: StringArray) -> Self { + Self { array } + } +} + impl Vector for StringVector { fn data_type(&self) -> DataTypeRef { StringType::arc() @@ -116,4 +122,19 @@ mod tests { val.serialize(&mut serializer).unwrap(); String::from_utf8_lossy(&output).into() } + + #[test] + pub fn test_from_arrow_array() { + let mut builder = MutableStringArray::new(); + builder.push(Some("A")); + builder.push(Some("B")); + builder.push::<&str>(None); + builder.push(Some("D")); + let string_array: StringArray = builder.into(); + let vector = StringVector::from(string_array); + assert_eq!( + r#"["A","B",null,"D"]"#, + serialize_to_json_string(vector.serialize_to_json().unwrap()) + ); + } }