From d20191572e1f34074b50d6304198a3e104ad2f22 Mon Sep 17 00:00:00 2001 From: evenyag Date: Fri, 22 Apr 2022 19:19:48 +0800 Subject: [PATCH] feat: Implement PrimitiveType and PrimitiveVector --- Cargo.lock | 98 +++++++++++++++++++++ src/common/src/lib.rs | 6 ++ src/datatypes/Cargo.toml | 3 + src/datatypes/src/data_type.rs | 18 ++++ src/datatypes/src/lib.rs | 5 ++ src/datatypes/src/type_id.rs | 30 +++++++ src/datatypes/src/types.rs | 2 + src/datatypes/src/types/primitive_traits.rs | 28 ++++++ src/datatypes/src/types/primitive_type.rs | 73 +++++++++++++++ src/datatypes/src/value.rs | 59 +++++++++++++ src/datatypes/src/vectors.rs | 26 ++++++ src/datatypes/src/vectors/primitive.rs | 33 +++++++ 12 files changed, 381 insertions(+) create mode 100644 src/datatypes/src/data_type.rs create mode 100644 src/datatypes/src/type_id.rs create mode 100644 src/datatypes/src/types.rs create mode 100644 src/datatypes/src/types/primitive_traits.rs create mode 100644 src/datatypes/src/types/primitive_type.rs create mode 100644 src/datatypes/src/value.rs create mode 100644 src/datatypes/src/vectors.rs create mode 100644 src/datatypes/src/vectors/primitive.rs diff --git a/Cargo.lock b/Cargo.lock index 9cade898fe..4a4d27f706 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,20 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "arrow2" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e387b20dd573a96f36b173d9027483898f944d696521afd74e2caa3c813d86e" +dependencies = [ + "bytemuck", + "chrono", + "either", + "hash_hasher", + "num-traits", + "simdutf8", +] + [[package]] name = "async-trait" version = "0.1.53" @@ -13,12 +27,48 @@ dependencies = [ "syn", ] +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bytemuck" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562e382481975bc61d11275ac5e62a19abd00b0547d99516a415336f183dcd0e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "num-integer", + "num-traits", +] + [[package]] name = "common" version = "0.1.0" @@ -33,6 +83,11 @@ dependencies = [ [[package]] name = "datatypes" version = "0.1.0" +dependencies = [ + "arrow2", + "common", + "paste", +] [[package]] name = "doc-comment" @@ -40,6 +95,18 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "hash_hasher" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" + [[package]] name = "heck" version = "0.3.3" @@ -66,10 +133,35 @@ version = "0.1.0" name = "logical-plans" version = "0.1.0" +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + [[package]] name = "object-store" version = "0.1.0" +[[package]] +name = "paste" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" + [[package]] name = "proc-macro2" version = "1.0.37" @@ -92,6 +184,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "snafu" version = "0.7.0" diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs index 8b13789179..ae09dd0f2c 100644 --- a/src/common/src/lib.rs +++ b/src/common/src/lib.rs @@ -1 +1,7 @@ +/// Bytes buffer. +#[derive(Debug, Default, Clone)] +pub struct Bytes(Vec); +/// String buffer with arbitrary encoding. +#[derive(Debug, Default, Clone)] +pub struct StringBytes(Vec); diff --git a/src/datatypes/Cargo.toml b/src/datatypes/Cargo.toml index f32b26efb9..00fe122272 100644 --- a/src/datatypes/Cargo.toml +++ b/src/datatypes/Cargo.toml @@ -6,3 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +common = { path = "../common" } +arrow2 = "0.10" +paste = "1.0" diff --git a/src/datatypes/src/data_type.rs b/src/datatypes/src/data_type.rs new file mode 100644 index 0000000000..122a2c173a --- /dev/null +++ b/src/datatypes/src/data_type.rs @@ -0,0 +1,18 @@ +use std::sync::Arc; + +use crate::type_id::LogicalTypeId; +use crate::value::Value; + +/// Data type abstraction. +pub trait DataType: std::fmt::Debug { + /// Name of this data type. + fn name(&self) -> &str; + + /// Returns id of the Logical data type. + fn logical_type_id(&self) -> LogicalTypeId; + + /// Returns the default value of this type. + fn default_value(&self) -> Value; +} + +pub type DataTypeRef = Arc; diff --git a/src/datatypes/src/lib.rs b/src/datatypes/src/lib.rs index 6bde67a2b1..3b22def878 100644 --- a/src/datatypes/src/lib.rs +++ b/src/datatypes/src/lib.rs @@ -1 +1,6 @@ +mod data_type; mod schema; +pub mod type_id; +mod types; +pub mod value; +pub mod vectors; diff --git a/src/datatypes/src/type_id.rs b/src/datatypes/src/type_id.rs new file mode 100644 index 0000000000..e299594caa --- /dev/null +++ b/src/datatypes/src/type_id.rs @@ -0,0 +1,30 @@ +/// Unique identifier for logical data type. +#[derive(Debug)] +pub enum LogicalTypeId { + Null, + + // Numeric types: + Boolean, + Int8, + Int16, + Int32, + Int64, + UInt8, + UInt16, + UInt32, + UInt64, + Float32, + Float64, + + // String types: + String, + Binary, + + // Date & Time types: + /// Date representing the elapsed time since UNIX epoch (1970-01-01) + /// in days (32 bits). + Date, + /// Datetime representing the elapsed time since UNIX epoch (1970-01-01) in + /// seconds/milliseconds/microseconds/nanoseconds, determined by precision. + DateTime, +} diff --git a/src/datatypes/src/types.rs b/src/datatypes/src/types.rs new file mode 100644 index 0000000000..fd30e143a9 --- /dev/null +++ b/src/datatypes/src/types.rs @@ -0,0 +1,2 @@ +pub mod primitive_traits; +pub mod primitive_type; diff --git a/src/datatypes/src/types/primitive_traits.rs b/src/datatypes/src/types/primitive_traits.rs new file mode 100644 index 0000000000..6402c7a727 --- /dev/null +++ b/src/datatypes/src/types/primitive_traits.rs @@ -0,0 +1,28 @@ +use arrow2::types::NativeType; + +use crate::value::Value; + +/// Primitive type. +pub trait Primitive: PartialOrd + Default + Clone + Copy + Into + NativeType { + /// Largest numeric type this primitive type can be cast to. + type LargestType: Primitive; +} + +macro_rules! impl_primitive { + ($Type:ident, $LargestType: ident) => { + impl Primitive for $Type { + type LargestType = $LargestType; + } + }; +} + +impl_primitive!(u8, u64); +impl_primitive!(u16, u64); +impl_primitive!(u32, u64); +impl_primitive!(u64, u64); +impl_primitive!(i8, i64); +impl_primitive!(i16, i64); +impl_primitive!(i32, i64); +impl_primitive!(i64, i64); +impl_primitive!(f32, f64); +impl_primitive!(f64, f64); diff --git a/src/datatypes/src/types/primitive_type.rs b/src/datatypes/src/types/primitive_type.rs new file mode 100644 index 0000000000..ade799acfe --- /dev/null +++ b/src/datatypes/src/types/primitive_type.rs @@ -0,0 +1,73 @@ +use std::marker::PhantomData; +use std::sync::Arc; + +use crate::data_type::{DataType, DataTypeRef}; +use crate::type_id::LogicalTypeId; +use crate::types::primitive_traits::Primitive; +use crate::value::Value; + +pub struct PrimitiveType { + _phantom: PhantomData, +} + +impl PrimitiveType { + pub fn new() -> Self { + Self { + _phantom: PhantomData, + } + } +} + +/// Create a new [DataTypeRef] from a primitive type. +pub trait CreateDataType { + fn create_data_type() -> DataTypeRef; +} + +macro_rules! impl_create_data_type { + ($Type:ident) => { + paste::paste! { + impl CreateDataType for $Type { + fn create_data_type() -> DataTypeRef { + Arc::new(PrimitiveType::<$Type>::new()) + } + } + } + }; +} + +macro_rules! impl_numeric { + ($Type:ident, $TypeId:ident) => { + impl DataType for PrimitiveType<$Type> { + fn name(&self) -> &str { + stringify!($TypeId) + } + + fn logical_type_id(&self) -> LogicalTypeId { + LogicalTypeId::$TypeId + } + + fn default_value(&self) -> Value { + $Type::default().into() + } + } + + impl std::fmt::Debug for PrimitiveType<$Type> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.name()) + } + } + + impl_create_data_type!($Type); + }; +} + +impl_numeric!(u8, UInt8); +impl_numeric!(u16, UInt16); +impl_numeric!(u32, UInt32); +impl_numeric!(u64, UInt64); +impl_numeric!(i8, Int8); +impl_numeric!(i16, Int16); +impl_numeric!(i32, Int32); +impl_numeric!(i64, Int64); +impl_numeric!(f32, Float32); +impl_numeric!(f64, Float64); diff --git a/src/datatypes/src/value.rs b/src/datatypes/src/value.rs new file mode 100644 index 0000000000..063ec321f5 --- /dev/null +++ b/src/datatypes/src/value.rs @@ -0,0 +1,59 @@ +use common::{Bytes, StringBytes}; + +/// Value holds a single arbitrary value of any [DataType](crate::data_type::DataType). +#[derive(Debug)] +pub enum Value { + Null, + + // Numeric types: + Boolean(bool), + UInt8(u8), + UInt16(u16), + UInt32(u32), + UInt64(u64), + Int8(i8), + Int16(i16), + Int32(i32), + Int64(i64), + Float32(f32), + Float64(f64), + + // String types: + String(StringBytes), + Binary(Bytes), + + // Date & Time types: + Date(i32), + DateTime(i64), +} + +macro_rules! impl_from { + ($Variant:ident, $Type:ident) => { + impl From<$Type> for Value { + fn from(value: $Type) -> Self { + Value::$Variant(value) + } + } + + impl From> for Value { + fn from(value: Option<$Type>) -> Self { + match value { + Some(v) => Value::$Variant(v), + None => Value::Null, + } + } + } + }; +} + +impl_from!(Boolean, bool); +impl_from!(UInt8, u8); +impl_from!(UInt16, u16); +impl_from!(UInt32, u32); +impl_from!(UInt64, u64); +impl_from!(Int8, i8); +impl_from!(Int16, i16); +impl_from!(Int32, i32); +impl_from!(Int64, i64); +impl_from!(Float32, f32); +impl_from!(Float64, f64); diff --git a/src/datatypes/src/vectors.rs b/src/datatypes/src/vectors.rs new file mode 100644 index 0000000000..cbb95f5c33 --- /dev/null +++ b/src/datatypes/src/vectors.rs @@ -0,0 +1,26 @@ +pub mod primitive; + +use std::any::Any; +use std::sync::Arc; + +use crate::data_type::DataTypeRef; + +/// Vector of data values. +pub trait Vector: Send + Sync { + /// Returns the data type of the vector. + fn data_type(&self) -> DataTypeRef; + + /// Returns the vector as [Any](std::any::Any) so that it can be + /// downcast to a specific implementation. + fn as_any(&self) -> &dyn Any; + + /// Returns number of elements in the vector. + fn len(&self) -> usize; + + /// Returns whether the vector is empty. + fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +pub type VectorRef = Arc; diff --git a/src/datatypes/src/vectors/primitive.rs b/src/datatypes/src/vectors/primitive.rs new file mode 100644 index 0000000000..b58fe5e1c6 --- /dev/null +++ b/src/datatypes/src/vectors/primitive.rs @@ -0,0 +1,33 @@ +use std::any::Any; + +use arrow2::array::PrimitiveArray; + +use crate::data_type::DataTypeRef; +use crate::types::primitive_traits::Primitive; +use crate::types::primitive_type::CreateDataType; +use crate::vectors::Vector; + +/// Vector for primitive data types. +pub struct PrimitiveVector { + array: PrimitiveArray, +} + +impl PrimitiveVector { + pub fn new(array: PrimitiveArray) -> Self { + Self { array } + } +} + +impl Vector for PrimitiveVector { + fn data_type(&self) -> DataTypeRef { + T::create_data_type() + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn len(&self) -> usize { + self.array.len() + } +}