mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-22 22:20:02 +00:00
feat: Implement ScalarVector for primitive/binary vector
This commit is contained in:
@@ -1,11 +1,15 @@
|
||||
#![feature(generic_associated_types)]
|
||||
|
||||
mod data_type;
|
||||
pub mod prelude;
|
||||
mod scalar;
|
||||
mod schema;
|
||||
pub mod type_id;
|
||||
mod types;
|
||||
pub mod value;
|
||||
pub mod vectors;
|
||||
|
||||
use arrow2::array::BinaryArray;
|
||||
use arrow2::array::{BinaryArray, MutableBinaryArray};
|
||||
|
||||
pub type LargeBinaryArray = BinaryArray<i64>;
|
||||
pub type MutableLargeBinaryArray = MutableBinaryArray<i64>;
|
||||
|
||||
86
src/datatypes/src/scalar.rs
Normal file
86
src/datatypes/src/scalar.rs
Normal file
@@ -0,0 +1,86 @@
|
||||
use crate::vectors::Vector;
|
||||
|
||||
/// A sub trait of Vector to add scalar operation support.
|
||||
// This implementation refers to Datebend's [ScalarColumn](https://github.com/datafuselabs/databend/blob/main/common/datavalues/src/scalars/type_.rs)
|
||||
// and skyzh's [type-exercise-in-rust](https://github.com/skyzh/type-exercise-in-rust).
|
||||
pub trait ScalarVector: Vector {
|
||||
/// The reference item of this vector.
|
||||
type RefItem<'a>: Copy
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
/// Iterator type of this vector.
|
||||
type Iter<'a>: Iterator<Item = Option<Self::RefItem<'a>>>
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
/// Builder type to build this vector.
|
||||
type Builder: ScalarVectorBuilder<VectorType = Self>;
|
||||
|
||||
/// Returns the reference to an element at given position.
|
||||
///
|
||||
/// Note: `get()` has bad performance, avoid call this function inside loop.
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>>;
|
||||
|
||||
/// Returns iterator of current vector.
|
||||
fn iter_data(&self) -> Self::Iter<'_>;
|
||||
}
|
||||
|
||||
/// A trait over all vector builders.
|
||||
pub trait ScalarVectorBuilder {
|
||||
type VectorType: ScalarVector<Builder = Self>;
|
||||
|
||||
/// Create a new builder with initial `capacity`.
|
||||
fn with_capacity(capacity: usize) -> Self;
|
||||
|
||||
/// Push a value into the builder.
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>);
|
||||
|
||||
/// Finish build and return a new vector.
|
||||
fn finish(self) -> Self::VectorType;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::vectors::binary::BinaryVector;
|
||||
use crate::vectors::primitive::Int32Vector;
|
||||
|
||||
fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
|
||||
let mut builder = T::Builder::with_capacity(items.len());
|
||||
for item in items {
|
||||
builder.push(*item);
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn assert_vector_eq<'a, T: ScalarVector>(expect: &[Option<T::RefItem<'a>>], vector: &'a T)
|
||||
where
|
||||
T::RefItem<'a>: PartialEq + std::fmt::Debug,
|
||||
{
|
||||
for (a, b) in expect.iter().zip(vector.iter_data()) {
|
||||
assert_eq!(*a, b);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_i32_vector() {
|
||||
let expect = vec![Some(1), Some(2), Some(3), None, Some(5)];
|
||||
let vector: Int32Vector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_binary_vector() {
|
||||
let expect: Vec<Option<&'static [u8]>> = vec![
|
||||
Some(b"a"),
|
||||
Some(b"b"),
|
||||
Some(b"c"),
|
||||
None,
|
||||
Some(b"e"),
|
||||
Some(b""),
|
||||
];
|
||||
let vector: BinaryVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,13 @@
|
||||
use std::any::Any;
|
||||
|
||||
use arrow2::array::BinaryValueIter;
|
||||
use arrow2::bitmap::utils::ZipValidity;
|
||||
|
||||
use crate::data_type::DataTypeRef;
|
||||
use crate::scalar::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::types::binary_type::BinaryType;
|
||||
use crate::vectors::Vector;
|
||||
use crate::LargeBinaryArray;
|
||||
use crate::{LargeBinaryArray, MutableLargeBinaryArray};
|
||||
|
||||
/// Vector of binary strings.
|
||||
#[derive(Debug)]
|
||||
@@ -24,3 +28,45 @@ impl Vector for BinaryVector {
|
||||
self.array.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for BinaryVector {
|
||||
type RefItem<'a> = &'a [u8];
|
||||
type Iter<'a> = ZipValidity<'a, &'a [u8], BinaryValueIter<'a, i64>>;
|
||||
type Builder = BinaryVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if idx < self.len() {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
self.array.iter()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BinaryVectorBuilder {
|
||||
mutable_array: MutableLargeBinaryArray,
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for BinaryVectorBuilder {
|
||||
type VectorType = BinaryVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutableLargeBinaryArray::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.mutable_array.push(value);
|
||||
}
|
||||
|
||||
fn finish(self) -> Self::VectorType {
|
||||
BinaryVector {
|
||||
array: self.mutable_array.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
use std::any::Any;
|
||||
use std::slice::Iter;
|
||||
|
||||
use arrow2::array::PrimitiveArray;
|
||||
use arrow2::array::{MutablePrimitiveArray, PrimitiveArray};
|
||||
use arrow2::bitmap::utils::ZipValidity;
|
||||
|
||||
use crate::data_type::DataTypeRef;
|
||||
use crate::scalar::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::types::primitive_traits::Primitive;
|
||||
use crate::types::primitive_type::CreateDataType;
|
||||
use crate::vectors::Vector;
|
||||
@@ -31,3 +34,72 @@ impl<T: Primitive + CreateDataType> Vector for PrimitiveVector<T> {
|
||||
self.array.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive + CreateDataType> ScalarVector for PrimitiveVector<T> {
|
||||
type RefItem<'a> = T;
|
||||
type Iter<'a> = PrimitiveIter<'a, T>;
|
||||
type Builder = PrimitiveVectorBuilder<T>;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if idx < self.len() {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
PrimitiveIter {
|
||||
iter: self.array.iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type UInt8Vector = PrimitiveVector<u8>;
|
||||
pub type UInt16Vector = PrimitiveVector<u16>;
|
||||
pub type UInt32Vector = PrimitiveVector<u32>;
|
||||
pub type UInt64Vector = PrimitiveVector<u64>;
|
||||
|
||||
pub type Int8Vector = PrimitiveVector<i8>;
|
||||
pub type Int16Vector = PrimitiveVector<i16>;
|
||||
pub type Int32Vector = PrimitiveVector<i32>;
|
||||
pub type Int64Vector = PrimitiveVector<i64>;
|
||||
|
||||
pub type Float32Vector = PrimitiveVector<f32>;
|
||||
pub type Float64Vector = PrimitiveVector<f64>;
|
||||
|
||||
pub struct PrimitiveIter<'a, T> {
|
||||
iter: ZipValidity<'a, &'a T, Iter<'a, T>>,
|
||||
}
|
||||
|
||||
impl<'a, T: Copy> Iterator for PrimitiveIter<'a, T> {
|
||||
type Item = Option<T>;
|
||||
|
||||
fn next(&mut self) -> Option<Option<T>> {
|
||||
self.iter.next().map(|v| v.copied())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PrimitiveVectorBuilder<T: Primitive> {
|
||||
mutable_array: MutablePrimitiveArray<T>,
|
||||
}
|
||||
|
||||
impl<T: Primitive + CreateDataType> ScalarVectorBuilder for PrimitiveVectorBuilder<T> {
|
||||
type VectorType = PrimitiveVector<T>;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutablePrimitiveArray::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.mutable_array.push(value);
|
||||
}
|
||||
|
||||
fn finish(self) -> Self::VectorType {
|
||||
PrimitiveVector {
|
||||
array: self.mutable_array.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user