From 445fd7571212889169fc69c0ef5ebe302f6f56a2 Mon Sep 17 00:00:00 2001 From: evenyag Date: Sun, 24 Apr 2022 17:22:56 +0800 Subject: [PATCH] feat: Implement ScalarVector for primitive/binary vector --- src/datatypes/src/lib.rs | 6 +- src/datatypes/src/scalar.rs | 86 ++++++++++++++++++++++++++ src/datatypes/src/vectors/binary.rs | 48 +++++++++++++- src/datatypes/src/vectors/primitive.rs | 74 +++++++++++++++++++++- 4 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 src/datatypes/src/scalar.rs diff --git a/src/datatypes/src/lib.rs b/src/datatypes/src/lib.rs index 5c7b8b66d9..513520cf5e 100644 --- a/src/datatypes/src/lib.rs +++ b/src/datatypes/src/lib.rs @@ -1,11 +1,15 @@ +#![feature(generic_associated_types)] + mod data_type; pub mod prelude; +mod scalar; mod schema; pub mod type_id; mod types; pub mod value; pub mod vectors; -use arrow2::array::BinaryArray; +use arrow2::array::{BinaryArray, MutableBinaryArray}; pub type LargeBinaryArray = BinaryArray; +pub type MutableLargeBinaryArray = MutableBinaryArray; diff --git a/src/datatypes/src/scalar.rs b/src/datatypes/src/scalar.rs new file mode 100644 index 0000000000..96cc69fc09 --- /dev/null +++ b/src/datatypes/src/scalar.rs @@ -0,0 +1,86 @@ +use crate::vectors::Vector; + +/// A sub trait of Vector to add scalar operation support. +// This implementation refers to Datebend's [ScalarColumn](https://github.com/datafuselabs/databend/blob/main/common/datavalues/src/scalars/type_.rs) +// and skyzh's [type-exercise-in-rust](https://github.com/skyzh/type-exercise-in-rust). +pub trait ScalarVector: Vector { + /// The reference item of this vector. + type RefItem<'a>: Copy + where + Self: 'a; + + /// Iterator type of this vector. + type Iter<'a>: Iterator>> + where + Self: 'a; + + /// Builder type to build this vector. + type Builder: ScalarVectorBuilder; + + /// Returns the reference to an element at given position. + /// + /// Note: `get()` has bad performance, avoid call this function inside loop. + fn get_data(&self, idx: usize) -> Option>; + + /// Returns iterator of current vector. + fn iter_data(&self) -> Self::Iter<'_>; +} + +/// A trait over all vector builders. +pub trait ScalarVectorBuilder { + type VectorType: ScalarVector; + + /// Create a new builder with initial `capacity`. + fn with_capacity(capacity: usize) -> Self; + + /// Push a value into the builder. + fn push(&mut self, value: Option<::RefItem<'_>>); + + /// Finish build and return a new vector. + fn finish(self) -> Self::VectorType; +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::vectors::binary::BinaryVector; + use crate::vectors::primitive::Int32Vector; + + fn build_vector_from_slice(items: &[Option>]) -> T { + let mut builder = T::Builder::with_capacity(items.len()); + for item in items { + builder.push(*item); + } + builder.finish() + } + + fn assert_vector_eq<'a, T: ScalarVector>(expect: &[Option>], vector: &'a T) + where + T::RefItem<'a>: PartialEq + std::fmt::Debug, + { + for (a, b) in expect.iter().zip(vector.iter_data()) { + assert_eq!(*a, b); + } + } + + #[test] + fn test_build_i32_vector() { + let expect = vec![Some(1), Some(2), Some(3), None, Some(5)]; + let vector: Int32Vector = build_vector_from_slice(&expect); + assert_vector_eq(&expect, &vector); + } + + #[test] + fn test_build_binary_vector() { + let expect: Vec> = vec![ + Some(b"a"), + Some(b"b"), + Some(b"c"), + None, + Some(b"e"), + Some(b""), + ]; + let vector: BinaryVector = build_vector_from_slice(&expect); + assert_vector_eq(&expect, &vector); + } +} diff --git a/src/datatypes/src/vectors/binary.rs b/src/datatypes/src/vectors/binary.rs index 230a0ce611..27eb29ae6a 100644 --- a/src/datatypes/src/vectors/binary.rs +++ b/src/datatypes/src/vectors/binary.rs @@ -1,9 +1,13 @@ use std::any::Any; +use arrow2::array::BinaryValueIter; +use arrow2::bitmap::utils::ZipValidity; + use crate::data_type::DataTypeRef; +use crate::scalar::{ScalarVector, ScalarVectorBuilder}; use crate::types::binary_type::BinaryType; use crate::vectors::Vector; -use crate::LargeBinaryArray; +use crate::{LargeBinaryArray, MutableLargeBinaryArray}; /// Vector of binary strings. #[derive(Debug)] @@ -24,3 +28,45 @@ impl Vector for BinaryVector { self.array.len() } } + +impl ScalarVector for BinaryVector { + type RefItem<'a> = &'a [u8]; + type Iter<'a> = ZipValidity<'a, &'a [u8], BinaryValueIter<'a, i64>>; + type Builder = BinaryVectorBuilder; + + fn get_data(&self, idx: usize) -> Option> { + if idx < self.len() { + Some(self.array.value(idx)) + } else { + None + } + } + + fn iter_data(&self) -> Self::Iter<'_> { + self.array.iter() + } +} + +pub struct BinaryVectorBuilder { + mutable_array: MutableLargeBinaryArray, +} + +impl ScalarVectorBuilder for BinaryVectorBuilder { + type VectorType = BinaryVector; + + fn with_capacity(capacity: usize) -> Self { + Self { + mutable_array: MutableLargeBinaryArray::with_capacity(capacity), + } + } + + fn push(&mut self, value: Option<::RefItem<'_>>) { + self.mutable_array.push(value); + } + + fn finish(self) -> Self::VectorType { + BinaryVector { + array: self.mutable_array.into(), + } + } +} diff --git a/src/datatypes/src/vectors/primitive.rs b/src/datatypes/src/vectors/primitive.rs index b58fe5e1c6..73a1c14991 100644 --- a/src/datatypes/src/vectors/primitive.rs +++ b/src/datatypes/src/vectors/primitive.rs @@ -1,8 +1,11 @@ use std::any::Any; +use std::slice::Iter; -use arrow2::array::PrimitiveArray; +use arrow2::array::{MutablePrimitiveArray, PrimitiveArray}; +use arrow2::bitmap::utils::ZipValidity; use crate::data_type::DataTypeRef; +use crate::scalar::{ScalarVector, ScalarVectorBuilder}; use crate::types::primitive_traits::Primitive; use crate::types::primitive_type::CreateDataType; use crate::vectors::Vector; @@ -31,3 +34,72 @@ impl Vector for PrimitiveVector { self.array.len() } } + +impl ScalarVector for PrimitiveVector { + type RefItem<'a> = T; + type Iter<'a> = PrimitiveIter<'a, T>; + type Builder = PrimitiveVectorBuilder; + + fn get_data(&self, idx: usize) -> Option> { + if idx < self.len() { + Some(self.array.value(idx)) + } else { + None + } + } + + fn iter_data(&self) -> Self::Iter<'_> { + PrimitiveIter { + iter: self.array.iter(), + } + } +} + +pub type UInt8Vector = PrimitiveVector; +pub type UInt16Vector = PrimitiveVector; +pub type UInt32Vector = PrimitiveVector; +pub type UInt64Vector = PrimitiveVector; + +pub type Int8Vector = PrimitiveVector; +pub type Int16Vector = PrimitiveVector; +pub type Int32Vector = PrimitiveVector; +pub type Int64Vector = PrimitiveVector; + +pub type Float32Vector = PrimitiveVector; +pub type Float64Vector = PrimitiveVector; + +pub struct PrimitiveIter<'a, T> { + iter: ZipValidity<'a, &'a T, Iter<'a, T>>, +} + +impl<'a, T: Copy> Iterator for PrimitiveIter<'a, T> { + type Item = Option; + + fn next(&mut self) -> Option> { + self.iter.next().map(|v| v.copied()) + } +} + +pub struct PrimitiveVectorBuilder { + mutable_array: MutablePrimitiveArray, +} + +impl ScalarVectorBuilder for PrimitiveVectorBuilder { + type VectorType = PrimitiveVector; + + fn with_capacity(capacity: usize) -> Self { + Self { + mutable_array: MutablePrimitiveArray::with_capacity(capacity), + } + } + + fn push(&mut self, value: Option<::RefItem<'_>>) { + self.mutable_array.push(value); + } + + fn finish(self) -> Self::VectorType { + PrimitiveVector { + array: self.mutable_array.into(), + } + } +}