mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-22 22:20:02 +00:00
Merge pull request #11 from GrepTimeTeam/feat-datatypes
feat: Implement basic data type framework
This commit is contained in:
100
Cargo.lock
generated
100
Cargo.lock
generated
@@ -2,6 +2,20 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "arrow2"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2e387b20dd573a96f36b173d9027483898f944d696521afd74e2caa3c813d86e"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"chrono",
|
||||
"either",
|
||||
"hash_hasher",
|
||||
"num-traits",
|
||||
"simdutf8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.53"
|
||||
@@ -13,6 +27,32 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc"
|
||||
dependencies = [
|
||||
"bytemuck_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck_derive"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "562e382481975bc61d11275ac5e62a19abd00b0547d99516a415336f183dcd0e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
@@ -20,7 +60,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "common"
|
||||
name = "chrono"
|
||||
version = "0.4.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
|
||||
dependencies = [
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "common-base"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
@@ -33,6 +83,11 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "datatypes"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"arrow2",
|
||||
"common-base",
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "doc-comment"
|
||||
@@ -40,6 +95,18 @@ version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||
|
||||
[[package]]
|
||||
name = "hash_hasher"
|
||||
version = "2.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.3"
|
||||
@@ -66,10 +133,35 @@ version = "0.1.0"
|
||||
name = "logical-plans"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object-store"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.37"
|
||||
@@ -92,6 +184,12 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "simdutf8"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
|
||||
|
||||
[[package]]
|
||||
name = "snafu"
|
||||
version = "0.7.0"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"src/common",
|
||||
"src/common/base",
|
||||
"src/datanode",
|
||||
"src/datatypes",
|
||||
"src/log-store",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "common"
|
||||
name = "common-base"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
7
src/common/base/src/bytes.rs
Normal file
7
src/common/base/src/bytes.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
/// Bytes buffer.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct Bytes(Vec<u8>);
|
||||
|
||||
/// String buffer with arbitrary encoding.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct StringBytes(Vec<u8>);
|
||||
1
src/common/base/src/lib.rs
Normal file
1
src/common/base/src/lib.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod bytes;
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
@@ -6,3 +6,6 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
arrow2 = "0.10"
|
||||
common-base = { path = "../common/base" }
|
||||
paste = "1.0"
|
||||
|
||||
23
src/datatypes/src/data_type.rs
Normal file
23
src/datatypes/src/data_type.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow2::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
|
||||
/// Data type abstraction.
|
||||
pub trait DataType: std::fmt::Debug + Send + Sync {
|
||||
/// Name of this data type.
|
||||
fn name(&self) -> &str;
|
||||
|
||||
/// Returns id of the Logical data type.
|
||||
fn logical_type_id(&self) -> LogicalTypeId;
|
||||
|
||||
/// Returns the default value of this type.
|
||||
fn default_value(&self) -> Value;
|
||||
|
||||
/// Convert this type as [arrow2::datatypes::DataType].
|
||||
fn as_arrow_type(&self) -> ArrowDataType;
|
||||
}
|
||||
|
||||
pub type DataTypeRef = Arc<dyn DataType>;
|
||||
@@ -1 +1,15 @@
|
||||
#![feature(generic_associated_types)]
|
||||
|
||||
mod data_type;
|
||||
pub mod prelude;
|
||||
mod scalars;
|
||||
mod schema;
|
||||
pub mod type_id;
|
||||
mod types;
|
||||
pub mod value;
|
||||
pub mod vectors;
|
||||
|
||||
use arrow2::array::{BinaryArray, MutableBinaryArray};
|
||||
|
||||
pub type LargeBinaryArray = BinaryArray<i64>;
|
||||
pub type MutableLargeBinaryArray = MutableBinaryArray<i64>;
|
||||
|
||||
5
src/datatypes/src/prelude.rs
Normal file
5
src/datatypes/src/prelude.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
pub use crate::data_type::{DataType, DataTypeRef};
|
||||
pub use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
pub use crate::type_id::LogicalTypeId;
|
||||
pub use crate::value::Value;
|
||||
pub use crate::vectors::{Vector, VectorRef};
|
||||
86
src/datatypes/src/scalars.rs
Normal file
86
src/datatypes/src/scalars.rs
Normal file
@@ -0,0 +1,86 @@
|
||||
use crate::vectors::Vector;
|
||||
|
||||
/// A sub trait of Vector to add scalar operation support.
|
||||
// This implementation refers to Datebend's [ScalarColumn](https://github.com/datafuselabs/databend/blob/main/common/datavalues/src/scalars/type_.rs)
|
||||
// and skyzh's [type-exercise-in-rust](https://github.com/skyzh/type-exercise-in-rust).
|
||||
pub trait ScalarVector: Vector {
|
||||
/// The reference item of this vector.
|
||||
type RefItem<'a>: Copy
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
/// Iterator type of this vector.
|
||||
type Iter<'a>: Iterator<Item = Option<Self::RefItem<'a>>>
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
/// Builder type to build this vector.
|
||||
type Builder: ScalarVectorBuilder<VectorType = Self>;
|
||||
|
||||
/// Returns the reference to an element at given position.
|
||||
///
|
||||
/// Note: `get()` has bad performance, avoid call this function inside loop.
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>>;
|
||||
|
||||
/// Returns iterator of current vector.
|
||||
fn iter_data(&self) -> Self::Iter<'_>;
|
||||
}
|
||||
|
||||
/// A trait over all vector builders.
|
||||
pub trait ScalarVectorBuilder {
|
||||
type VectorType: ScalarVector<Builder = Self>;
|
||||
|
||||
/// Create a new builder with initial `capacity`.
|
||||
fn with_capacity(capacity: usize) -> Self;
|
||||
|
||||
/// Push a value into the builder.
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>);
|
||||
|
||||
/// Finish build and return a new vector.
|
||||
fn finish(self) -> Self::VectorType;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::vectors::binary::BinaryVector;
|
||||
use crate::vectors::primitive::Int32Vector;
|
||||
|
||||
fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
|
||||
let mut builder = T::Builder::with_capacity(items.len());
|
||||
for item in items {
|
||||
builder.push(*item);
|
||||
}
|
||||
builder.finish()
|
||||
}
|
||||
|
||||
fn assert_vector_eq<'a, T: ScalarVector>(expect: &[Option<T::RefItem<'a>>], vector: &'a T)
|
||||
where
|
||||
T::RefItem<'a>: PartialEq + std::fmt::Debug,
|
||||
{
|
||||
for (a, b) in expect.iter().zip(vector.iter_data()) {
|
||||
assert_eq!(*a, b);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_i32_vector() {
|
||||
let expect = vec![Some(1), Some(2), Some(3), None, Some(5)];
|
||||
let vector: Int32Vector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_binary_vector() {
|
||||
let expect: Vec<Option<&'static [u8]>> = vec![
|
||||
Some(b"a"),
|
||||
Some(b"b"),
|
||||
Some(b"c"),
|
||||
None,
|
||||
Some(b"e"),
|
||||
Some(b""),
|
||||
];
|
||||
let vector: BinaryVector = build_vector_from_slice(&expect);
|
||||
assert_vector_eq(&expect, &vector);
|
||||
}
|
||||
}
|
||||
30
src/datatypes/src/type_id.rs
Normal file
30
src/datatypes/src/type_id.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
/// Unique identifier for logical data type.
|
||||
#[derive(Debug)]
|
||||
pub enum LogicalTypeId {
|
||||
Null,
|
||||
|
||||
// Numeric types:
|
||||
Boolean,
|
||||
Int8,
|
||||
Int16,
|
||||
Int32,
|
||||
Int64,
|
||||
UInt8,
|
||||
UInt16,
|
||||
UInt32,
|
||||
UInt64,
|
||||
Float32,
|
||||
Float64,
|
||||
|
||||
// String types:
|
||||
String,
|
||||
Binary,
|
||||
|
||||
// Date & Time types:
|
||||
/// Date representing the elapsed time since UNIX epoch (1970-01-01)
|
||||
/// in days (32 bits).
|
||||
Date,
|
||||
/// Datetime representing the elapsed time since UNIX epoch (1970-01-01) in
|
||||
/// seconds/milliseconds/microseconds/nanoseconds, determined by precision.
|
||||
DateTime,
|
||||
}
|
||||
7
src/datatypes/src/types.rs
Normal file
7
src/datatypes/src/types.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
mod binary_type;
|
||||
mod primitive_traits;
|
||||
mod primitive_type;
|
||||
|
||||
pub use binary_type::BinaryType;
|
||||
pub use primitive_traits::Primitive;
|
||||
pub use primitive_type::{DataTypeBuilder, PrimitiveType};
|
||||
35
src/datatypes/src/types/binary_type.rs
Normal file
35
src/datatypes/src/types/binary_type.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow2::datatypes::DataType as ArrowDataType;
|
||||
use common_base::bytes::StringBytes;
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::value::Value;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct BinaryType;
|
||||
|
||||
impl BinaryType {
|
||||
pub fn arc() -> DataTypeRef {
|
||||
Arc::new(Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl DataType for BinaryType {
|
||||
fn name(&self) -> &str {
|
||||
"Binary"
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::String
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
StringBytes::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::LargeBinary
|
||||
}
|
||||
}
|
||||
28
src/datatypes/src/types/primitive_traits.rs
Normal file
28
src/datatypes/src/types/primitive_traits.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
use arrow2::types::NativeType;
|
||||
|
||||
use crate::value::Value;
|
||||
|
||||
/// Primitive type.
|
||||
pub trait Primitive: PartialOrd + Default + Clone + Copy + Into<Value> + NativeType {
|
||||
/// Largest numeric type this primitive type can be cast to.
|
||||
type LargestType: Primitive;
|
||||
}
|
||||
|
||||
macro_rules! impl_primitive {
|
||||
($Type:ident, $LargestType: ident) => {
|
||||
impl Primitive for $Type {
|
||||
type LargestType = $LargestType;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_primitive!(u8, u64);
|
||||
impl_primitive!(u16, u64);
|
||||
impl_primitive!(u32, u64);
|
||||
impl_primitive!(u64, u64);
|
||||
impl_primitive!(i8, i64);
|
||||
impl_primitive!(i16, i64);
|
||||
impl_primitive!(i32, i64);
|
||||
impl_primitive!(i64, i64);
|
||||
impl_primitive!(f32, f64);
|
||||
impl_primitive!(f64, f64);
|
||||
79
src/datatypes/src/types/primitive_type.rs
Normal file
79
src/datatypes/src/types/primitive_type.rs
Normal file
@@ -0,0 +1,79 @@
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow2::datatypes::DataType as ArrowDataType;
|
||||
|
||||
use crate::data_type::{DataType, DataTypeRef};
|
||||
use crate::type_id::LogicalTypeId;
|
||||
use crate::types::primitive_traits::Primitive;
|
||||
use crate::value::Value;
|
||||
|
||||
pub struct PrimitiveType<T: Primitive> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T: Primitive> PrimitiveType<T> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new [DataTypeRef] from a primitive type.
|
||||
pub trait DataTypeBuilder {
|
||||
fn build_data_type() -> DataTypeRef;
|
||||
}
|
||||
|
||||
macro_rules! impl_build_data_type {
|
||||
($Type:ident) => {
|
||||
paste::paste! {
|
||||
impl DataTypeBuilder for $Type {
|
||||
fn build_data_type() -> DataTypeRef {
|
||||
Arc::new(PrimitiveType::<$Type>::new())
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_numeric {
|
||||
($Type:ident, $TypeId:ident) => {
|
||||
impl DataType for PrimitiveType<$Type> {
|
||||
fn name(&self) -> &str {
|
||||
stringify!($TypeId)
|
||||
}
|
||||
|
||||
fn logical_type_id(&self) -> LogicalTypeId {
|
||||
LogicalTypeId::$TypeId
|
||||
}
|
||||
|
||||
fn default_value(&self) -> Value {
|
||||
$Type::default().into()
|
||||
}
|
||||
|
||||
fn as_arrow_type(&self) -> ArrowDataType {
|
||||
ArrowDataType::$TypeId
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for PrimitiveType<$Type> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", self.name())
|
||||
}
|
||||
}
|
||||
|
||||
impl_build_data_type!($Type);
|
||||
};
|
||||
}
|
||||
|
||||
impl_numeric!(u8, UInt8);
|
||||
impl_numeric!(u16, UInt16);
|
||||
impl_numeric!(u32, UInt32);
|
||||
impl_numeric!(u64, UInt64);
|
||||
impl_numeric!(i8, Int8);
|
||||
impl_numeric!(i16, Int16);
|
||||
impl_numeric!(i32, Int32);
|
||||
impl_numeric!(i64, Int64);
|
||||
impl_numeric!(f32, Float32);
|
||||
impl_numeric!(f64, Float64);
|
||||
61
src/datatypes/src/value.rs
Normal file
61
src/datatypes/src/value.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
use common_base::bytes::{Bytes, StringBytes};
|
||||
|
||||
/// Value holds a single arbitrary value of any [DataType](crate::data_type::DataType).
|
||||
#[derive(Debug)]
|
||||
pub enum Value {
|
||||
Null,
|
||||
|
||||
// Numeric types:
|
||||
Boolean(bool),
|
||||
UInt8(u8),
|
||||
UInt16(u16),
|
||||
UInt32(u32),
|
||||
UInt64(u64),
|
||||
Int8(i8),
|
||||
Int16(i16),
|
||||
Int32(i32),
|
||||
Int64(i64),
|
||||
Float32(f32),
|
||||
Float64(f64),
|
||||
|
||||
// String types:
|
||||
String(StringBytes),
|
||||
Binary(Bytes),
|
||||
|
||||
// Date & Time types:
|
||||
Date(i32),
|
||||
DateTime(i64),
|
||||
}
|
||||
|
||||
macro_rules! impl_from {
|
||||
($Variant:ident, $Type:ident) => {
|
||||
impl From<$Type> for Value {
|
||||
fn from(value: $Type) -> Self {
|
||||
Value::$Variant(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Option<$Type>> for Value {
|
||||
fn from(value: Option<$Type>) -> Self {
|
||||
match value {
|
||||
Some(v) => Value::$Variant(v),
|
||||
None => Value::Null,
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_from!(Boolean, bool);
|
||||
impl_from!(UInt8, u8);
|
||||
impl_from!(UInt16, u16);
|
||||
impl_from!(UInt32, u32);
|
||||
impl_from!(UInt64, u64);
|
||||
impl_from!(Int8, i8);
|
||||
impl_from!(Int16, i16);
|
||||
impl_from!(Int32, i32);
|
||||
impl_from!(Int64, i64);
|
||||
impl_from!(Float32, f32);
|
||||
impl_from!(Float64, f64);
|
||||
impl_from!(String, StringBytes);
|
||||
impl_from!(Binary, Bytes);
|
||||
36
src/datatypes/src/vectors.rs
Normal file
36
src/datatypes/src/vectors.rs
Normal file
@@ -0,0 +1,36 @@
|
||||
pub mod binary;
|
||||
pub mod primitive;
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow2::array::ArrayRef;
|
||||
pub use binary::*;
|
||||
pub use primitive::*;
|
||||
|
||||
use crate::data_type::DataTypeRef;
|
||||
|
||||
/// Vector of data values.
|
||||
pub trait Vector: Send + Sync {
|
||||
/// Returns the data type of the vector.
|
||||
///
|
||||
/// This may require heap allocation.
|
||||
fn data_type(&self) -> DataTypeRef;
|
||||
|
||||
/// Returns the vector as [Any](std::any::Any) so that it can be
|
||||
/// downcast to a specific implementation.
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
|
||||
/// Returns number of elements in the vector.
|
||||
fn len(&self) -> usize;
|
||||
|
||||
/// Returns whether the vector is empty.
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Convert this vector to a new arrow [ArrayRef].
|
||||
fn to_arrow_array(&self) -> ArrayRef;
|
||||
}
|
||||
|
||||
pub type VectorRef = Arc<dyn Vector>;
|
||||
78
src/datatypes/src/vectors/binary.rs
Normal file
78
src/datatypes/src/vectors/binary.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow2::array::ArrayRef;
|
||||
use arrow2::array::BinaryValueIter;
|
||||
use arrow2::bitmap::utils::ZipValidity;
|
||||
|
||||
use crate::data_type::DataTypeRef;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::types::BinaryType;
|
||||
use crate::vectors::Vector;
|
||||
use crate::{LargeBinaryArray, MutableLargeBinaryArray};
|
||||
|
||||
/// Vector of binary strings.
|
||||
#[derive(Debug)]
|
||||
pub struct BinaryVector {
|
||||
array: LargeBinaryArray,
|
||||
}
|
||||
|
||||
impl Vector for BinaryVector {
|
||||
fn data_type(&self) -> DataTypeRef {
|
||||
BinaryType::arc()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarVector for BinaryVector {
|
||||
type RefItem<'a> = &'a [u8];
|
||||
type Iter<'a> = ZipValidity<'a, &'a [u8], BinaryValueIter<'a, i64>>;
|
||||
type Builder = BinaryVectorBuilder;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if idx < self.len() {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
self.array.iter()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BinaryVectorBuilder {
|
||||
mutable_array: MutableLargeBinaryArray,
|
||||
}
|
||||
|
||||
impl ScalarVectorBuilder for BinaryVectorBuilder {
|
||||
type VectorType = BinaryVector;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutableLargeBinaryArray::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.mutable_array.push(value);
|
||||
}
|
||||
|
||||
fn finish(self) -> Self::VectorType {
|
||||
BinaryVector {
|
||||
array: self.mutable_array.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
109
src/datatypes/src/vectors/primitive.rs
Normal file
109
src/datatypes/src/vectors/primitive.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
use std::any::Any;
|
||||
use std::slice::Iter;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow2::array::{ArrayRef, MutablePrimitiveArray, PrimitiveArray};
|
||||
use arrow2::bitmap::utils::ZipValidity;
|
||||
|
||||
use crate::data_type::DataTypeRef;
|
||||
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
|
||||
use crate::types::{DataTypeBuilder, Primitive};
|
||||
use crate::vectors::Vector;
|
||||
|
||||
/// Vector for primitive data types.
|
||||
pub struct PrimitiveVector<T: Primitive> {
|
||||
array: PrimitiveArray<T>,
|
||||
}
|
||||
|
||||
impl<T: Primitive> PrimitiveVector<T> {
|
||||
pub fn new(array: PrimitiveArray<T>) -> Self {
|
||||
Self { array }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive + DataTypeBuilder> Vector for PrimitiveVector<T> {
|
||||
fn data_type(&self) -> DataTypeRef {
|
||||
T::build_data_type()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.array.len()
|
||||
}
|
||||
|
||||
fn to_arrow_array(&self) -> ArrayRef {
|
||||
Arc::new(self.array.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Primitive + DataTypeBuilder> ScalarVector for PrimitiveVector<T> {
|
||||
type RefItem<'a> = T;
|
||||
type Iter<'a> = PrimitiveIter<'a, T>;
|
||||
type Builder = PrimitiveVectorBuilder<T>;
|
||||
|
||||
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
|
||||
if idx < self.len() {
|
||||
Some(self.array.value(idx))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_data(&self) -> Self::Iter<'_> {
|
||||
PrimitiveIter {
|
||||
iter: self.array.iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type UInt8Vector = PrimitiveVector<u8>;
|
||||
pub type UInt16Vector = PrimitiveVector<u16>;
|
||||
pub type UInt32Vector = PrimitiveVector<u32>;
|
||||
pub type UInt64Vector = PrimitiveVector<u64>;
|
||||
|
||||
pub type Int8Vector = PrimitiveVector<i8>;
|
||||
pub type Int16Vector = PrimitiveVector<i16>;
|
||||
pub type Int32Vector = PrimitiveVector<i32>;
|
||||
pub type Int64Vector = PrimitiveVector<i64>;
|
||||
|
||||
pub type Float32Vector = PrimitiveVector<f32>;
|
||||
pub type Float64Vector = PrimitiveVector<f64>;
|
||||
|
||||
pub struct PrimitiveIter<'a, T> {
|
||||
iter: ZipValidity<'a, &'a T, Iter<'a, T>>,
|
||||
}
|
||||
|
||||
impl<'a, T: Copy> Iterator for PrimitiveIter<'a, T> {
|
||||
type Item = Option<T>;
|
||||
|
||||
fn next(&mut self) -> Option<Option<T>> {
|
||||
self.iter.next().map(|v| v.copied())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PrimitiveVectorBuilder<T: Primitive> {
|
||||
mutable_array: MutablePrimitiveArray<T>,
|
||||
}
|
||||
|
||||
impl<T: Primitive + DataTypeBuilder> ScalarVectorBuilder for PrimitiveVectorBuilder<T> {
|
||||
type VectorType = PrimitiveVector<T>;
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
mutable_array: MutablePrimitiveArray::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
|
||||
self.mutable_array.push(value);
|
||||
}
|
||||
|
||||
fn finish(self) -> Self::VectorType {
|
||||
PrimitiveVector {
|
||||
array: self.mutable_array.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user