feat: new datatypes subcrate based on the official arrow (#705)

* feat: Init datatypes2 crate

* chore: Remove some unimplemented types

* feat: Implements PrimitiveType and PrimitiveVector for datatypes2 (#633)

* feat: Implement primitive types and vectors

* feat: Implement a wrapper type

* feat: Remove VectorType from ScalarRef

* feat: Move some trait bound from NativeType to WrapperType

* feat: pub use  primitive vectors and builders

* feat: Returns error in try_from when type mismatch

* feat: Impl PartialEq for some vectors

* test: Pass vector tests

* chore: Add license header

* test: Pass more vector tests

* feat: Implement some methods of vector Helper

* test: Pass more tests

* style: Fix clippy

* chore: Add license header

* feat: Remove IntoValueRef trait

* feat: Add NativeType trait bound to WrapperType::Native

* docs: Explain what is wrapper type

* chore: Fix typos

* refactor: LogicalPrimitiveType::type_name returns str

* feat: Implements DateType and DateVector (#651)

* feat: Implement DateType and DateVector

* test: Pass more value and data type tests

* chore: Address CR comments

* test: Skip list value test

* feat: datatypes2 datetime (#661)

* feat: impl DateTime type and vector

* fix: add license header

* fix: CR comments and add more tests

* fix: customized serialization for wrapper type

* feat: Implements NullType and NullVector (#658)

* feat: Implements NullType and NullVector

* chore: Address CR comment

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* chore: Address CR comment

Co-authored-by: Ruihang Xia <waynestxia@gmail.com>

* feat: Implements StringType and StringVector (#659)

* feat: implement string vector

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more test and from

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* cover NUL

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: impl datatypes2/timestamp (#686)

* feat: add timestamp datatype and vectors

* fix: cr comments and reformat code

* chore: add some tests

* feat: Implements ListType and ListVector (#681)

* feat: Implement ListType and ListVector

* test: Pass more tests

* style: Fix clippy

* chore: Fix comment

* chore: Address CR comments

* feat: impl constant vector (#680)

* feat: impl constant vector

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* Apply suggestions from code review

Co-authored-by: Yingwen <realevenyag@gmail.com>

* rename fn names

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove println

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>

* feat: Implements Validity (#684)

* feat: Implements Validity

* chore: remove pub from sub mod in vectors

* feat: Implements schema for datatypes2 (#695)

* feat: Add is_timestamp_compatible to DataType

* feat: Implement ColumnSchema and Schema

* feat: Impl RawSchema

* chore: Remove useless codes and run more tests

* chore: Fix clippy

* feat: Impl from_arrow_time_unit and pass schema tests

* chore: add more tests for timestamp (#702)

* chore: add more tests for timestamp

* chore: add replicate test for timestamps

* feat: Implements helper methods for vectors/values (#703)

* feat: Implement helper methods for vectors/values

* chore: Address CR comments

* chore: add more test for timestamp

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: evenyag <realevenyag@gmail.com>
Co-authored-by: Lei, HUANG <6406592+v0y4g3r@users.noreply.github.com>
Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
This commit is contained in:
Ruihang Xia
2022-12-05 19:59:23 +08:00
committed by GitHub
parent 4275e47bdb
commit beb07fc895
48 changed files with 10493 additions and 34 deletions

View File

@@ -23,6 +23,7 @@ use snafu::ResultExt;
use crate::error::{self, Result};
// TODO(yingwen): We should hold vectors in the RecordBatch.
#[derive(Clone, Debug, PartialEq)]
pub struct RecordBatch {
pub schema: SchemaRef,
@@ -103,6 +104,7 @@ impl<'a> Iterator for RecordBatchRowIterator<'a> {
} else {
let mut row = Vec::with_capacity(self.columns);
// TODO(yingwen): Get from the vector if RecordBatch also holds vectors.
for col in 0..self.columns {
let column_array = self.record_batch.df_recordbatch.column(col);
match arrow_array_get(column_array.as_ref(), self.row_cursor)

View File

@@ -147,6 +147,18 @@ impl From<i64> for Timestamp {
}
}
impl From<Timestamp> for i64 {
fn from(t: Timestamp) -> Self {
t.value
}
}
impl From<Timestamp> for serde_json::Value {
fn from(d: Timestamp) -> Self {
serde_json::Value::String(d.to_iso8601_string())
}
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TimeUnit {
Second,
@@ -197,6 +209,7 @@ impl Hash for Timestamp {
#[cfg(test)]
mod tests {
use chrono::Offset;
use serde_json::Value;
use super::*;
@@ -318,4 +331,39 @@ mod tests {
let ts = Timestamp::from_millis(ts_millis);
assert_eq!("1969-12-31 23:59:58.999+0000", ts.to_iso8601_string());
}
#[test]
fn test_serialize_to_json_value() {
assert_eq!(
"1970-01-01 00:00:01+0000",
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Second)) {
Value::String(s) => s,
_ => unreachable!(),
}
);
assert_eq!(
"1970-01-01 00:00:00.001+0000",
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Millisecond)) {
Value::String(s) => s,
_ => unreachable!(),
}
);
assert_eq!(
"1970-01-01 00:00:00.000001+0000",
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Microsecond)) {
Value::String(s) => s,
_ => unreachable!(),
}
);
assert_eq!(
"1970-01-01 00:00:00.000000001+0000",
match serde_json::Value::from(Timestamp::new(1, TimeUnit::Nanosecond)) {
Value::String(s) => s,
_ => unreachable!(),
}
);
}
}