feat: Implement ScalarVector for primitive/binary vector

This commit is contained in:
evenyag
2022-04-24 17:22:56 +08:00
parent c9c5e69adf
commit 445fd75712
4 changed files with 211 additions and 3 deletions

View File

@@ -1,11 +1,15 @@
#![feature(generic_associated_types)]
mod data_type;
pub mod prelude;
mod scalar;
mod schema;
pub mod type_id;
mod types;
pub mod value;
pub mod vectors;
use arrow2::array::BinaryArray;
use arrow2::array::{BinaryArray, MutableBinaryArray};
pub type LargeBinaryArray = BinaryArray<i64>;
pub type MutableLargeBinaryArray = MutableBinaryArray<i64>;

View File

@@ -0,0 +1,86 @@
use crate::vectors::Vector;
/// A sub trait of Vector to add scalar operation support.
// This implementation refers to Datebend's [ScalarColumn](https://github.com/datafuselabs/databend/blob/main/common/datavalues/src/scalars/type_.rs)
// and skyzh's [type-exercise-in-rust](https://github.com/skyzh/type-exercise-in-rust).
pub trait ScalarVector: Vector {
/// The reference item of this vector.
type RefItem<'a>: Copy
where
Self: 'a;
/// Iterator type of this vector.
type Iter<'a>: Iterator<Item = Option<Self::RefItem<'a>>>
where
Self: 'a;
/// Builder type to build this vector.
type Builder: ScalarVectorBuilder<VectorType = Self>;
/// Returns the reference to an element at given position.
///
/// Note: `get()` has bad performance, avoid call this function inside loop.
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>>;
/// Returns iterator of current vector.
fn iter_data(&self) -> Self::Iter<'_>;
}
/// A trait over all vector builders.
pub trait ScalarVectorBuilder {
type VectorType: ScalarVector<Builder = Self>;
/// Create a new builder with initial `capacity`.
fn with_capacity(capacity: usize) -> Self;
/// Push a value into the builder.
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>);
/// Finish build and return a new vector.
fn finish(self) -> Self::VectorType;
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vectors::binary::BinaryVector;
use crate::vectors::primitive::Int32Vector;
fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
let mut builder = T::Builder::with_capacity(items.len());
for item in items {
builder.push(*item);
}
builder.finish()
}
fn assert_vector_eq<'a, T: ScalarVector>(expect: &[Option<T::RefItem<'a>>], vector: &'a T)
where
T::RefItem<'a>: PartialEq + std::fmt::Debug,
{
for (a, b) in expect.iter().zip(vector.iter_data()) {
assert_eq!(*a, b);
}
}
#[test]
fn test_build_i32_vector() {
let expect = vec![Some(1), Some(2), Some(3), None, Some(5)];
let vector: Int32Vector = build_vector_from_slice(&expect);
assert_vector_eq(&expect, &vector);
}
#[test]
fn test_build_binary_vector() {
let expect: Vec<Option<&'static [u8]>> = vec![
Some(b"a"),
Some(b"b"),
Some(b"c"),
None,
Some(b"e"),
Some(b""),
];
let vector: BinaryVector = build_vector_from_slice(&expect);
assert_vector_eq(&expect, &vector);
}
}

View File

@@ -1,9 +1,13 @@
use std::any::Any;
use arrow2::array::BinaryValueIter;
use arrow2::bitmap::utils::ZipValidity;
use crate::data_type::DataTypeRef;
use crate::scalar::{ScalarVector, ScalarVectorBuilder};
use crate::types::binary_type::BinaryType;
use crate::vectors::Vector;
use crate::LargeBinaryArray;
use crate::{LargeBinaryArray, MutableLargeBinaryArray};
/// Vector of binary strings.
#[derive(Debug)]
@@ -24,3 +28,45 @@ impl Vector for BinaryVector {
self.array.len()
}
}
impl ScalarVector for BinaryVector {
type RefItem<'a> = &'a [u8];
type Iter<'a> = ZipValidity<'a, &'a [u8], BinaryValueIter<'a, i64>>;
type Builder = BinaryVectorBuilder;
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
if idx < self.len() {
Some(self.array.value(idx))
} else {
None
}
}
fn iter_data(&self) -> Self::Iter<'_> {
self.array.iter()
}
}
pub struct BinaryVectorBuilder {
mutable_array: MutableLargeBinaryArray,
}
impl ScalarVectorBuilder for BinaryVectorBuilder {
type VectorType = BinaryVector;
fn with_capacity(capacity: usize) -> Self {
Self {
mutable_array: MutableLargeBinaryArray::with_capacity(capacity),
}
}
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
self.mutable_array.push(value);
}
fn finish(self) -> Self::VectorType {
BinaryVector {
array: self.mutable_array.into(),
}
}
}

View File

@@ -1,8 +1,11 @@
use std::any::Any;
use std::slice::Iter;
use arrow2::array::PrimitiveArray;
use arrow2::array::{MutablePrimitiveArray, PrimitiveArray};
use arrow2::bitmap::utils::ZipValidity;
use crate::data_type::DataTypeRef;
use crate::scalar::{ScalarVector, ScalarVectorBuilder};
use crate::types::primitive_traits::Primitive;
use crate::types::primitive_type::CreateDataType;
use crate::vectors::Vector;
@@ -31,3 +34,72 @@ impl<T: Primitive + CreateDataType> Vector for PrimitiveVector<T> {
self.array.len()
}
}
impl<T: Primitive + CreateDataType> ScalarVector for PrimitiveVector<T> {
type RefItem<'a> = T;
type Iter<'a> = PrimitiveIter<'a, T>;
type Builder = PrimitiveVectorBuilder<T>;
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
if idx < self.len() {
Some(self.array.value(idx))
} else {
None
}
}
fn iter_data(&self) -> Self::Iter<'_> {
PrimitiveIter {
iter: self.array.iter(),
}
}
}
pub type UInt8Vector = PrimitiveVector<u8>;
pub type UInt16Vector = PrimitiveVector<u16>;
pub type UInt32Vector = PrimitiveVector<u32>;
pub type UInt64Vector = PrimitiveVector<u64>;
pub type Int8Vector = PrimitiveVector<i8>;
pub type Int16Vector = PrimitiveVector<i16>;
pub type Int32Vector = PrimitiveVector<i32>;
pub type Int64Vector = PrimitiveVector<i64>;
pub type Float32Vector = PrimitiveVector<f32>;
pub type Float64Vector = PrimitiveVector<f64>;
pub struct PrimitiveIter<'a, T> {
iter: ZipValidity<'a, &'a T, Iter<'a, T>>,
}
impl<'a, T: Copy> Iterator for PrimitiveIter<'a, T> {
type Item = Option<T>;
fn next(&mut self) -> Option<Option<T>> {
self.iter.next().map(|v| v.copied())
}
}
pub struct PrimitiveVectorBuilder<T: Primitive> {
mutable_array: MutablePrimitiveArray<T>,
}
impl<T: Primitive + CreateDataType> ScalarVectorBuilder for PrimitiveVectorBuilder<T> {
type VectorType = PrimitiveVector<T>;
fn with_capacity(capacity: usize) -> Self {
Self {
mutable_array: MutablePrimitiveArray::with_capacity(capacity),
}
}
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
self.mutable_array.push(value);
}
fn finish(self) -> Self::VectorType {
PrimitiveVector {
array: self.mutable_array.into(),
}
}
}