feat: Implement PrimitiveType and PrimitiveVector

This commit is contained in:
evenyag
2022-04-22 19:19:48 +08:00
parent cb74f1ac34
commit d20191572e
12 changed files with 381 additions and 0 deletions

98
Cargo.lock generated
View File

@@ -2,6 +2,20 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "arrow2"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e387b20dd573a96f36b173d9027483898f944d696521afd74e2caa3c813d86e"
dependencies = [
"bytemuck",
"chrono",
"either",
"hash_hasher",
"num-traits",
"simdutf8",
]
[[package]]
name = "async-trait"
version = "0.1.53"
@@ -13,12 +27,48 @@ dependencies = [
"syn",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bytemuck"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdead85bdec19c194affaeeb670c0e41fe23de31459efd1c174d049269cf02cc"
dependencies = [
"bytemuck_derive",
]
[[package]]
name = "bytemuck_derive"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "562e382481975bc61d11275ac5e62a19abd00b0547d99516a415336f183dcd0e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"num-integer",
"num-traits",
]
[[package]]
name = "common"
version = "0.1.0"
@@ -33,6 +83,11 @@ dependencies = [
[[package]]
name = "datatypes"
version = "0.1.0"
dependencies = [
"arrow2",
"common",
"paste",
]
[[package]]
name = "doc-comment"
@@ -40,6 +95,18 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "hash_hasher"
version = "2.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c"
[[package]]
name = "heck"
version = "0.3.3"
@@ -66,10 +133,35 @@ version = "0.1.0"
name = "logical-plans"
version = "0.1.0"
[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]]
name = "object-store"
version = "0.1.0"
[[package]]
name = "paste"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc"
[[package]]
name = "proc-macro2"
version = "1.0.37"
@@ -92,6 +184,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "simdutf8"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a"
[[package]]
name = "snafu"
version = "0.7.0"

View File

@@ -1 +1,7 @@
/// Bytes buffer.
#[derive(Debug, Default, Clone)]
pub struct Bytes(Vec<u8>);
/// String buffer with arbitrary encoding.
#[derive(Debug, Default, Clone)]
pub struct StringBytes(Vec<u8>);

View File

@@ -6,3 +6,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
common = { path = "../common" }
arrow2 = "0.10"
paste = "1.0"

View File

@@ -0,0 +1,18 @@
use std::sync::Arc;
use crate::type_id::LogicalTypeId;
use crate::value::Value;
/// Data type abstraction.
pub trait DataType: std::fmt::Debug {
/// Name of this data type.
fn name(&self) -> &str;
/// Returns id of the Logical data type.
fn logical_type_id(&self) -> LogicalTypeId;
/// Returns the default value of this type.
fn default_value(&self) -> Value;
}
pub type DataTypeRef = Arc<dyn DataType>;

View File

@@ -1 +1,6 @@
mod data_type;
mod schema;
pub mod type_id;
mod types;
pub mod value;
pub mod vectors;

View File

@@ -0,0 +1,30 @@
/// Unique identifier for logical data type.
#[derive(Debug)]
pub enum LogicalTypeId {
Null,
// Numeric types:
Boolean,
Int8,
Int16,
Int32,
Int64,
UInt8,
UInt16,
UInt32,
UInt64,
Float32,
Float64,
// String types:
String,
Binary,
// Date & Time types:
/// Date representing the elapsed time since UNIX epoch (1970-01-01)
/// in days (32 bits).
Date,
/// Datetime representing the elapsed time since UNIX epoch (1970-01-01) in
/// seconds/milliseconds/microseconds/nanoseconds, determined by precision.
DateTime,
}

View File

@@ -0,0 +1,2 @@
pub mod primitive_traits;
pub mod primitive_type;

View File

@@ -0,0 +1,28 @@
use arrow2::types::NativeType;
use crate::value::Value;
/// Primitive type.
pub trait Primitive: PartialOrd + Default + Clone + Copy + Into<Value> + NativeType {
/// Largest numeric type this primitive type can be cast to.
type LargestType: Primitive;
}
macro_rules! impl_primitive {
($Type:ident, $LargestType: ident) => {
impl Primitive for $Type {
type LargestType = $LargestType;
}
};
}
impl_primitive!(u8, u64);
impl_primitive!(u16, u64);
impl_primitive!(u32, u64);
impl_primitive!(u64, u64);
impl_primitive!(i8, i64);
impl_primitive!(i16, i64);
impl_primitive!(i32, i64);
impl_primitive!(i64, i64);
impl_primitive!(f32, f64);
impl_primitive!(f64, f64);

View File

@@ -0,0 +1,73 @@
use std::marker::PhantomData;
use std::sync::Arc;
use crate::data_type::{DataType, DataTypeRef};
use crate::type_id::LogicalTypeId;
use crate::types::primitive_traits::Primitive;
use crate::value::Value;
pub struct PrimitiveType<T: Primitive> {
_phantom: PhantomData<T>,
}
impl<T: Primitive> PrimitiveType<T> {
pub fn new() -> Self {
Self {
_phantom: PhantomData,
}
}
}
/// Create a new [DataTypeRef] from a primitive type.
pub trait CreateDataType {
fn create_data_type() -> DataTypeRef;
}
macro_rules! impl_create_data_type {
($Type:ident) => {
paste::paste! {
impl CreateDataType for $Type {
fn create_data_type() -> DataTypeRef {
Arc::new(PrimitiveType::<$Type>::new())
}
}
}
};
}
macro_rules! impl_numeric {
($Type:ident, $TypeId:ident) => {
impl DataType for PrimitiveType<$Type> {
fn name(&self) -> &str {
stringify!($TypeId)
}
fn logical_type_id(&self) -> LogicalTypeId {
LogicalTypeId::$TypeId
}
fn default_value(&self) -> Value {
$Type::default().into()
}
}
impl std::fmt::Debug for PrimitiveType<$Type> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.name())
}
}
impl_create_data_type!($Type);
};
}
impl_numeric!(u8, UInt8);
impl_numeric!(u16, UInt16);
impl_numeric!(u32, UInt32);
impl_numeric!(u64, UInt64);
impl_numeric!(i8, Int8);
impl_numeric!(i16, Int16);
impl_numeric!(i32, Int32);
impl_numeric!(i64, Int64);
impl_numeric!(f32, Float32);
impl_numeric!(f64, Float64);

View File

@@ -0,0 +1,59 @@
use common::{Bytes, StringBytes};
/// Value holds a single arbitrary value of any [DataType](crate::data_type::DataType).
#[derive(Debug)]
pub enum Value {
Null,
// Numeric types:
Boolean(bool),
UInt8(u8),
UInt16(u16),
UInt32(u32),
UInt64(u64),
Int8(i8),
Int16(i16),
Int32(i32),
Int64(i64),
Float32(f32),
Float64(f64),
// String types:
String(StringBytes),
Binary(Bytes),
// Date & Time types:
Date(i32),
DateTime(i64),
}
macro_rules! impl_from {
($Variant:ident, $Type:ident) => {
impl From<$Type> for Value {
fn from(value: $Type) -> Self {
Value::$Variant(value)
}
}
impl From<Option<$Type>> for Value {
fn from(value: Option<$Type>) -> Self {
match value {
Some(v) => Value::$Variant(v),
None => Value::Null,
}
}
}
};
}
impl_from!(Boolean, bool);
impl_from!(UInt8, u8);
impl_from!(UInt16, u16);
impl_from!(UInt32, u32);
impl_from!(UInt64, u64);
impl_from!(Int8, i8);
impl_from!(Int16, i16);
impl_from!(Int32, i32);
impl_from!(Int64, i64);
impl_from!(Float32, f32);
impl_from!(Float64, f64);

View File

@@ -0,0 +1,26 @@
pub mod primitive;
use std::any::Any;
use std::sync::Arc;
use crate::data_type::DataTypeRef;
/// Vector of data values.
pub trait Vector: Send + Sync {
/// Returns the data type of the vector.
fn data_type(&self) -> DataTypeRef;
/// Returns the vector as [Any](std::any::Any) so that it can be
/// downcast to a specific implementation.
fn as_any(&self) -> &dyn Any;
/// Returns number of elements in the vector.
fn len(&self) -> usize;
/// Returns whether the vector is empty.
fn is_empty(&self) -> bool {
self.len() == 0
}
}
pub type VectorRef = Arc<dyn Vector>;

View File

@@ -0,0 +1,33 @@
use std::any::Any;
use arrow2::array::PrimitiveArray;
use crate::data_type::DataTypeRef;
use crate::types::primitive_traits::Primitive;
use crate::types::primitive_type::CreateDataType;
use crate::vectors::Vector;
/// Vector for primitive data types.
pub struct PrimitiveVector<T: Primitive> {
array: PrimitiveArray<T>,
}
impl<T: Primitive> PrimitiveVector<T> {
pub fn new(array: PrimitiveArray<T>) -> Self {
Self { array }
}
}
impl<T: Primitive + CreateDataType> Vector for PrimitiveVector<T> {
fn data_type(&self) -> DataTypeRef {
T::create_data_type()
}
fn as_any(&self) -> &dyn Any {
self
}
fn len(&self) -> usize {
self.array.len()
}
}