feat: type conversion between Values (#2394)

* feat: add cast() in datatype trait.

* feat: add cast for primitive type

* feat: add unit test cases

* test: add datetime/time cases.

* refactor: time_type cast function.

* chore: typos.

* refactor code.

* feat: add can_cast_type func.

* chore: rename cast to try_cast

* feat: impl cast_with_opt

* chore: pub use cast_with_opt

* chore: add timezone for test

* Update src/common/time/src/date.rs

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* chore: duration type

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
This commit is contained in:
Wei
2023-09-18 22:25:38 +08:00
committed by GitHub
parent 342cc0a4c4
commit 5805e8d4b6
20 changed files with 1143 additions and 17 deletions

View File

@@ -82,6 +82,10 @@ impl Date {
pub fn to_chrono_date(&self) -> Option<NaiveDate> {
NaiveDate::from_num_days_from_ce_opt(UNIX_EPOCH_FROM_CE + self.0)
}
pub fn to_secs(&self) -> i64 {
(self.0 as i64) * 24 * 3600
}
}
#[cfg(test)]
@@ -132,4 +136,14 @@ mod tests {
let d: Date = 42.into();
assert_eq!(42, d.val());
}
#[test]
fn test_to_secs() {
let d = Date::from_str("1970-01-01").unwrap();
assert_eq!(d.to_secs(), 0);
let d = Date::from_str("1970-01-02").unwrap();
assert_eq!(d.to_secs(), 24 * 3600);
let d = Date::from_str("1970-01-03").unwrap();
assert_eq!(d.to_secs(), 2 * 24 * 3600);
}
}

View File

@@ -50,6 +50,12 @@ impl From<DateTime> for serde_json::Value {
}
}
impl From<NaiveDateTime> for DateTime {
fn from(value: NaiveDateTime) -> Self {
DateTime::from(value.timestamp())
}
}
impl FromStr for DateTime {
type Err = Error;
@@ -88,7 +94,7 @@ impl DateTime {
}
pub fn to_chrono_datetime(&self) -> Option<NaiveDateTime> {
NaiveDateTime::from_timestamp_millis(self.0)
NaiveDateTime::from_timestamp_opt(self.0, 0)
}
}

View File

@@ -77,6 +77,19 @@ impl Time {
self.value
}
/// Convert a time to given time unit.
/// Return `None` if conversion causes overflow.
pub fn convert_to(&self, unit: TimeUnit) -> Option<Time> {
if self.unit().factor() >= unit.factor() {
let mul = self.unit().factor() / unit.factor();
let value = self.value.checked_mul(mul as i64)?;
Some(Time::new(value, unit))
} else {
let mul = unit.factor() / self.unit().factor();
Some(Time::new(self.value.div_euclid(mul as i64), unit))
}
}
/// Split a [Time] into seconds part and nanoseconds part.
/// Notice the seconds part of split result is always rounded down to floor.
fn split(&self) -> (i64, u32) {

View File

@@ -20,7 +20,9 @@ use std::str::FromStr;
use std::time::Duration;
use arrow::datatypes::TimeUnit as ArrowTimeUnit;
use chrono::{DateTime, LocalResult, NaiveDateTime, TimeZone as ChronoTimeZone, Utc};
use chrono::{
DateTime, LocalResult, NaiveDate, NaiveDateTime, NaiveTime, TimeZone as ChronoTimeZone, Utc,
};
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
@@ -250,11 +252,25 @@ impl Timestamp {
NaiveDateTime::from_timestamp_opt(sec, nsec)
}
/// Convert timestamp to chrono date.
pub fn to_chrono_date(&self) -> Option<NaiveDate> {
self.to_chrono_datetime().map(|ndt| ndt.date())
}
/// Convert timestamp to chrono time.
pub fn to_chrono_time(&self) -> Option<NaiveTime> {
self.to_chrono_datetime().map(|ndt| ndt.time())
}
pub fn from_chrono_datetime(ndt: NaiveDateTime) -> Option<Self> {
let sec = ndt.timestamp();
let nsec = ndt.timestamp_subsec_nanos();
Timestamp::from_splits(sec, nsec)
}
pub fn from_chrono_date(date: NaiveDate) -> Option<Self> {
Timestamp::from_chrono_datetime(date.and_time(NaiveTime::default()))
}
}
impl FromStr for Timestamp {

View File

@@ -120,6 +120,10 @@ impl ConcreteDataType {
matches!(self, ConcreteDataType::Boolean(_))
}
pub fn is_string(&self) -> bool {
matches!(self, ConcreteDataType::String(_))
}
pub fn is_stringifiable(&self) -> bool {
matches!(
self,
@@ -159,6 +163,22 @@ impl ConcreteDataType {
)
}
pub fn is_numeric(&self) -> bool {
matches!(
self,
ConcreteDataType::Int8(_)
| ConcreteDataType::Int16(_)
| ConcreteDataType::Int32(_)
| ConcreteDataType::Int64(_)
| ConcreteDataType::UInt8(_)
| ConcreteDataType::UInt16(_)
| ConcreteDataType::UInt32(_)
| ConcreteDataType::UInt64(_)
| ConcreteDataType::Float32(_)
| ConcreteDataType::Float64(_)
)
}
pub fn numerics() -> Vec<ConcreteDataType> {
vec![
ConcreteDataType::int8_datatype(),
@@ -456,6 +476,10 @@ pub trait DataType: std::fmt::Debug + Send + Sync {
/// Returns true if the data type is compatible with timestamp type so we can
/// use it as a timestamp.
fn is_timestamp_compatible(&self) -> bool;
/// Casts the value to specific DataType.
/// Return None if cast failed.
fn try_cast(&self, from: Value) -> Option<Value>;
}
pub type DataTypeRef = Arc<dyn DataType>;

View File

@@ -14,6 +14,7 @@
mod binary_type;
mod boolean_type;
pub mod cast;
mod date_type;
mod datetime_type;
mod dictionary_type;
@@ -28,6 +29,7 @@ mod timestamp_type;
pub use binary_type::BinaryType;
pub use boolean_type::BooleanType;
pub use cast::cast_with_opt;
pub use date_type::DateType;
pub use datetime_type::DateTimeType;
pub use dictionary_type::DictionaryType;

View File

@@ -57,4 +57,12 @@ impl DataType for BinaryType {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Binary(v) => Some(Value::Binary(v)),
Value::String(v) => Some(Value::Binary(Bytes::from(v.as_utf8().as_bytes()))),
_ => None,
}
}
}

View File

@@ -15,6 +15,7 @@
use std::sync::Arc;
use arrow::datatypes::DataType as ArrowDataType;
use num_traits::Num;
use serde::{Deserialize, Serialize};
use crate::data_type::{DataType, DataTypeRef};
@@ -56,4 +57,137 @@ impl DataType for BooleanType {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => Some(Value::Boolean(v)),
Value::UInt8(v) => numeric_to_bool(v),
Value::UInt16(v) => numeric_to_bool(v),
Value::UInt32(v) => numeric_to_bool(v),
Value::UInt64(v) => numeric_to_bool(v),
Value::Int8(v) => numeric_to_bool(v),
Value::Int16(v) => numeric_to_bool(v),
Value::Int32(v) => numeric_to_bool(v),
Value::Int64(v) => numeric_to_bool(v),
Value::Float32(v) => numeric_to_bool(v),
Value::Float64(v) => numeric_to_bool(v),
Value::String(v) => v.as_utf8().parse::<bool>().ok().map(Value::Boolean),
_ => None,
}
}
}
pub fn numeric_to_bool<T>(num: T) -> Option<Value>
where
T: Num + Default,
{
if num != T::default() {
Some(Value::Boolean(true))
} else {
Some(Value::Boolean(false))
}
}
pub fn bool_to_numeric<T>(value: bool) -> Option<T>
where
T: Num,
{
if value {
Some(T::one())
} else {
Some(T::zero())
}
}
#[cfg(test)]
mod tests {
use ordered_float::OrderedFloat;
use super::*;
use crate::data_type::ConcreteDataType;
macro_rules! test_cast_to_bool {
($value: expr, $expected: expr) => {
let val = $value;
let b = ConcreteDataType::boolean_datatype().try_cast(val).unwrap();
assert_eq!(b, Value::Boolean($expected));
};
}
macro_rules! test_cast_from_bool {
($value: expr, $datatype: expr, $expected: expr) => {
let val = Value::Boolean($value);
let b = $datatype.try_cast(val).unwrap();
assert_eq!(b, $expected);
};
}
#[test]
fn test_other_type_cast_to_bool() {
// false cases
test_cast_to_bool!(Value::UInt8(0), false);
test_cast_to_bool!(Value::UInt16(0), false);
test_cast_to_bool!(Value::UInt32(0), false);
test_cast_to_bool!(Value::UInt64(0), false);
test_cast_to_bool!(Value::Int8(0), false);
test_cast_to_bool!(Value::Int16(0), false);
test_cast_to_bool!(Value::Int32(0), false);
test_cast_to_bool!(Value::Int64(0), false);
test_cast_to_bool!(Value::Float32(OrderedFloat(0.0)), false);
test_cast_to_bool!(Value::Float64(OrderedFloat(0.0)), false);
// true cases
test_cast_to_bool!(Value::UInt8(1), true);
test_cast_to_bool!(Value::UInt16(2), true);
test_cast_to_bool!(Value::UInt32(3), true);
test_cast_to_bool!(Value::UInt64(4), true);
test_cast_to_bool!(Value::Int8(5), true);
test_cast_to_bool!(Value::Int16(6), true);
test_cast_to_bool!(Value::Int32(7), true);
test_cast_to_bool!(Value::Int64(8), true);
test_cast_to_bool!(Value::Float32(OrderedFloat(1.0)), true);
test_cast_to_bool!(Value::Float64(OrderedFloat(2.0)), true);
}
#[test]
fn test_bool_cast_to_other_type() {
// false cases
test_cast_from_bool!(false, ConcreteDataType::uint8_datatype(), Value::UInt8(0));
test_cast_from_bool!(false, ConcreteDataType::uint16_datatype(), Value::UInt16(0));
test_cast_from_bool!(false, ConcreteDataType::uint32_datatype(), Value::UInt32(0));
test_cast_from_bool!(false, ConcreteDataType::uint64_datatype(), Value::UInt64(0));
test_cast_from_bool!(false, ConcreteDataType::int8_datatype(), Value::Int8(0));
test_cast_from_bool!(false, ConcreteDataType::int16_datatype(), Value::Int16(0));
test_cast_from_bool!(false, ConcreteDataType::int32_datatype(), Value::Int32(0));
test_cast_from_bool!(false, ConcreteDataType::int64_datatype(), Value::Int64(0));
test_cast_from_bool!(
false,
ConcreteDataType::float32_datatype(),
Value::Float32(OrderedFloat(0.0))
);
test_cast_from_bool!(
false,
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(0.0))
);
// true cases
test_cast_from_bool!(true, ConcreteDataType::uint8_datatype(), Value::UInt8(1));
test_cast_from_bool!(true, ConcreteDataType::uint16_datatype(), Value::UInt16(1));
test_cast_from_bool!(true, ConcreteDataType::uint32_datatype(), Value::UInt32(1));
test_cast_from_bool!(true, ConcreteDataType::uint64_datatype(), Value::UInt64(1));
test_cast_from_bool!(true, ConcreteDataType::int8_datatype(), Value::Int8(1));
test_cast_from_bool!(true, ConcreteDataType::int16_datatype(), Value::Int16(1));
test_cast_from_bool!(true, ConcreteDataType::int32_datatype(), Value::Int32(1));
test_cast_from_bool!(true, ConcreteDataType::int64_datatype(), Value::Int64(1));
test_cast_from_bool!(
true,
ConcreteDataType::float32_datatype(),
Value::Float32(OrderedFloat(1.0))
);
test_cast_from_bool!(
true,
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(1.0))
);
}
}

View File

@@ -0,0 +1,298 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::data_type::{ConcreteDataType, DataType};
use crate::error::{self, Error, Result};
use crate::types::TimeType;
use crate::value::Value;
/// Cast options for cast functions.
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
pub struct CastOption {
/// decide how to handle cast failures,
/// either return NULL (strict=false) or return ERR (strict=true)
pub strict: bool,
}
impl CastOption {
pub fn is_strict(&self) -> bool {
self.strict
}
}
/// Cast the value to dest_type with CastOption.
///
/// # Arguments
/// * `src_value` - The value to be casted.
/// * `dest_type` - The destination type.
/// * `cast_option` - The CastOption.
///
/// # Returns
/// If success, return the casted value.
/// If CastOption's strict is true, return an error if the cast fails.
/// If CastOption's strict is false, return NULL if the cast fails.
pub fn cast_with_opt(
src_value: Value,
dest_type: &ConcreteDataType,
cast_option: &CastOption,
) -> Result<Value> {
if !can_cast_type(&src_value, dest_type) {
if cast_option.strict {
return Err(invalid_type_cast(&src_value, dest_type));
} else {
return Ok(Value::Null);
}
}
let new_value = dest_type.try_cast(src_value.clone());
match new_value {
Some(v) => Ok(v),
None => {
if cast_option.strict {
Err(invalid_type_cast(&src_value, dest_type))
} else {
Ok(Value::Null)
}
}
}
}
/// Return true if the src_value can be casted to dest_type,
/// Otherwise, return false.
/// Notice: this function does not promise that the `cast_with_opt` will succeed,
/// it only checks whether the src_value can be casted to dest_type.
pub fn can_cast_type(src_value: &Value, dest_type: &ConcreteDataType) -> bool {
use ConcreteDataType::*;
use TimeType::*;
let src_type = &src_value.data_type();
if src_type == dest_type {
return true;
}
match (src_type, dest_type) {
// null type cast
(_, Null(_)) => true,
// boolean type cast
(_, Boolean(_)) => src_type.is_numeric() || src_type.is_string(),
(Boolean(_), _) => dest_type.is_numeric() || dest_type.is_string(),
// numeric types cast
(
UInt8(_) | UInt16(_) | UInt32(_) | UInt64(_) | Int8(_) | Int16(_) | Int32(_) | Int64(_)
| Float32(_) | Float64(_) | String(_),
UInt8(_) | UInt16(_) | UInt32(_) | UInt64(_) | Int8(_) | Int16(_) | Int32(_) | Int64(_)
| Float32(_) | Float64(_) | String(_),
) => true,
(String(_), Binary(_)) => true,
// temporal types cast
// Date type
(Date(_), Int32(_) | Timestamp(_) | String(_)) => true,
(Int32(_) | String(_) | Timestamp(_), Date(_)) => true,
// DateTime type
(DateTime(_), Int64(_) | Timestamp(_) | String(_)) => true,
(Int64(_) | Timestamp(_) | String(_), DateTime(_)) => true,
// Timestamp type
(Timestamp(_), Int64(_) | String(_)) => true,
(Int64(_) | String(_), Timestamp(_)) => true,
// Time type
(Time(_), String(_)) => true,
(Time(Second(_)), Int32(_)) => true,
(Time(Millisecond(_)), Int32(_)) => true,
(Time(Microsecond(_)), Int64(_)) => true,
(Time(Nanosecond(_)), Int64(_)) => true,
// TODO(QuenKar): interval type cast
(Interval(_), String(_)) => true,
(Duration(_), String(_)) => true,
// other situations return false
(_, _) => false,
}
}
fn invalid_type_cast(src_value: &Value, dest_type: &ConcreteDataType) -> Error {
let src_type = src_value.data_type();
if src_type.is_string() {
error::CastTypeSnafu {
msg: format!("Could not parse string '{}' to {}", src_value, dest_type),
}
.build()
} else if src_type.is_numeric() && dest_type.is_numeric() {
error::CastTypeSnafu {
msg: format!(
"Type {} with value {} can't be cast because the value is out of range for the destination type {}",
src_type,
src_value,
dest_type
),
}
.build()
} else {
error::CastTypeSnafu {
msg: format!(
"Type {} with value {} can't be cast to the destination type {}",
src_type, src_value, dest_type
),
}
.build()
}
}
#[cfg(test)]
mod tests {
use std::str::FromStr;
use common_base::bytes::StringBytes;
use common_time::time::Time;
use common_time::{Date, DateTime, Timestamp};
use ordered_float::OrderedFloat;
use super::*;
macro_rules! test_can_cast {
($src_value: expr, $($dest_type: ident),+) => {
$(
let val = $src_value;
let t = ConcreteDataType::$dest_type();
assert_eq!(can_cast_type(&val, &t), true);
)*
};
}
macro_rules! test_primitive_cast {
($($value: expr),*) => {
$(
test_can_cast!(
$value,
uint8_datatype,
uint16_datatype,
uint32_datatype,
uint64_datatype,
int8_datatype,
int16_datatype,
int32_datatype,
int64_datatype,
float32_datatype,
float64_datatype
);
)*
};
}
#[test]
fn test_cast_with_opt() {
std::env::set_var("TZ", "Asia/Shanghai");
// non-strict mode
let cast_option = CastOption { strict: false };
let src_value = Value::Int8(-1);
let dest_type = ConcreteDataType::uint8_datatype();
let res = cast_with_opt(src_value, &dest_type, &cast_option);
assert!(res.is_ok());
assert_eq!(res.unwrap(), Value::Null);
// strict mode
let cast_option = CastOption { strict: true };
let src_value = Value::Int8(-1);
let dest_type = ConcreteDataType::uint8_datatype();
let res = cast_with_opt(src_value, &dest_type, &cast_option);
assert!(res.is_err());
assert_eq!(
res.unwrap_err().to_string(),
"Type Int8 with value -1 can't be cast because the value is out of range for the destination type UInt8"
);
let src_value = Value::String(StringBytes::from("abc"));
let dest_type = ConcreteDataType::uint8_datatype();
let res = cast_with_opt(src_value, &dest_type, &cast_option);
assert!(res.is_err());
assert_eq!(
res.unwrap_err().to_string(),
"Could not parse string 'abc' to UInt8"
);
let src_value = Value::Timestamp(Timestamp::new_second(10));
let dest_type = ConcreteDataType::int8_datatype();
let res = cast_with_opt(src_value, &dest_type, &cast_option);
assert!(res.is_err());
assert_eq!(
res.unwrap_err().to_string(),
"Type Timestamp with value 1970-01-01 08:00:10+0800 can't be cast to the destination type Int8"
);
}
#[test]
fn test_can_cast_type() {
// numeric cast
test_primitive_cast!(
Value::UInt8(0),
Value::UInt16(1),
Value::UInt32(2),
Value::UInt64(3),
Value::Int8(4),
Value::Int16(5),
Value::Int32(6),
Value::Int64(7),
Value::Float32(OrderedFloat(8.0)),
Value::Float64(OrderedFloat(9.0)),
Value::String(StringBytes::from("10"))
);
// string -> other types
test_can_cast!(
Value::String(StringBytes::from("0")),
null_datatype,
boolean_datatype,
date_datatype,
datetime_datatype,
timestamp_second_datatype,
binary_datatype
);
// date -> other types
test_can_cast!(
Value::Date(Date::from_str("2021-01-01").unwrap()),
null_datatype,
int32_datatype,
timestamp_second_datatype,
string_datatype
);
// datetime -> other types
test_can_cast!(
Value::DateTime(DateTime::from_str("2021-01-01 00:00:00").unwrap()),
null_datatype,
int64_datatype,
timestamp_second_datatype,
string_datatype
);
// timestamp -> other types
test_can_cast!(
Value::Timestamp(Timestamp::from_str("2021-01-01 00:00:00").unwrap()),
null_datatype,
int64_datatype,
date_datatype,
datetime_datatype,
string_datatype
);
// time -> other types
test_can_cast!(
Value::Time(Time::new_second(0)),
null_datatype,
string_datatype
);
}
}

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::str::FromStr;
use arrow::datatypes::{DataType as ArrowDataType, Date32Type};
use common_time::Date;
use serde::{Deserialize, Serialize};
@@ -53,6 +55,15 @@ impl DataType for DateType {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Int32(v) => Some(Value::Date(Date::from(v))),
Value::String(v) => Date::from_str(v.as_utf8()).map(Value::Date).ok(),
Value::Timestamp(v) => v.to_chrono_date().map(|date| Value::Date(date.into())),
_ => None,
}
}
}
impl LogicalPrimitiveType for DateType {
@@ -89,3 +100,39 @@ impl LogicalPrimitiveType for DateType {
}
}
}
#[cfg(test)]
mod tests {
use common_base::bytes::StringBytes;
use common_time::Timestamp;
use super::*;
#[test]
fn test_date_cast() {
std::env::set_var("TZ", "Asia/Shanghai");
// timestamp -> date
let ts = Value::Timestamp(Timestamp::from_str("2000-01-01 08:00:01").unwrap());
let date = ConcreteDataType::date_datatype().try_cast(ts).unwrap();
assert_eq!(date, Value::Date(Date::from_str("2000-01-01").unwrap()));
// this case bind with local timezone.
let ts = Value::Timestamp(Timestamp::from_str("2000-01-02 07:59:59").unwrap());
let date = ConcreteDataType::date_datatype().try_cast(ts).unwrap();
assert_eq!(date, Value::Date(Date::from_str("2000-01-01").unwrap()));
// Int32 -> date
let val = Value::Int32(0);
let date = ConcreteDataType::date_datatype().try_cast(val).unwrap();
assert_eq!(date, Value::Date(Date::from_str("1970-01-01").unwrap()));
let val = Value::Int32(19614);
let date = ConcreteDataType::date_datatype().try_cast(val).unwrap();
assert_eq!(date, Value::Date(Date::from_str("2023-09-14").unwrap()));
// String -> date
let s = Value::String(StringBytes::from("1970-02-12"));
let date = ConcreteDataType::date_datatype().try_cast(s).unwrap();
assert_eq!(date, Value::Date(Date::from_str("1970-02-12").unwrap()));
}
}

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::str::FromStr;
use arrow::datatypes::{DataType as ArrowDataType, Date64Type};
use common_time::DateTime;
use serde::{Deserialize, Serialize};
@@ -51,6 +53,15 @@ impl DataType for DateTimeType {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Int64(v) => Some(Value::DateTime(DateTime::from(v))),
Value::Timestamp(v) => v.to_chrono_datetime().map(|d| Value::DateTime(d.into())),
Value::String(v) => DateTime::from_str(v.as_utf8()).map(Value::DateTime).ok(),
_ => None,
}
}
}
impl LogicalPrimitiveType for DateTimeType {
@@ -90,3 +101,36 @@ impl LogicalPrimitiveType for DateTimeType {
}
}
}
#[cfg(test)]
mod tests {
use common_time::Timestamp;
use super::*;
#[test]
fn test_datetime_cast() {
// cast from Int64
let val = Value::Int64(1000);
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
assert_eq!(dt, Value::DateTime(DateTime::from(1000)));
// cast from String
std::env::set_var("TZ", "Asia/Shanghai");
let val = Value::String("1970-01-01 00:00:00+0800".into());
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
assert_eq!(
dt,
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00+0800").unwrap())
);
// cast from Timestamp
let val = Value::Timestamp(Timestamp::from_str("2020-09-08 21:42:29.042+0800").unwrap());
let dt = ConcreteDataType::datetime_datatype().try_cast(val).unwrap();
assert_eq!(
dt,
Value::DateTime(DateTime::from_str("2020-09-08 21:42:29+0800").unwrap())
);
}
}

View File

@@ -88,4 +88,8 @@ impl DataType for DictionaryType {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, _: Value) -> Option<Value> {
None
}
}

View File

@@ -101,6 +101,11 @@ macro_rules! impl_data_type_for_duration {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, _: Value) -> Option<Value> {
// TODO(QuenKar): Implement casting for duration types.
None
}
}
impl LogicalPrimitiveType for [<Duration $unit Type>] {

View File

@@ -89,6 +89,11 @@ macro_rules! impl_data_type_for_interval {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, _: Value) -> Option<Value> {
// TODO(QuenKar): Implement casting for interval types.
None
}
}
impl LogicalPrimitiveType for [<Interval $unit Type>] {

View File

@@ -79,6 +79,13 @@ impl DataType for ListType {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::List(v) => Some(Value::List(v)),
_ => None,
}
}
}
#[cfg(test)]

View File

@@ -55,4 +55,9 @@ impl DataType for NullType {
fn is_timestamp_compatible(&self) -> bool {
false
}
// Unconditional cast other type to Value::Null
fn try_cast(&self, _from: Value) -> Option<Value> {
Some(Value::Null)
}
}

View File

@@ -16,11 +16,13 @@ use std::cmp::Ordering;
use std::fmt;
use arrow::datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType as ArrowDataType};
use common_time::interval::IntervalUnit;
use common_time::{Date, DateTime};
use num::NumCast;
use serde::{Deserialize, Serialize};
use snafu::OptionExt;
use super::boolean_type::bool_to_numeric;
use crate::data_type::{ConcreteDataType, DataType};
use crate::error::{self, Result};
use crate::scalars::{Scalar, ScalarRef, ScalarVectorBuilder};
@@ -245,7 +247,7 @@ macro_rules! define_logical_primitive_type {
}
macro_rules! define_non_timestamp_primitive {
($Native: ident, $TypeId: ident, $DataType: ident, $Largest: ident) => {
( $Native: ident, $TypeId: ident, $DataType: ident, $Largest: ident $(, $TargetType: ident)* ) => {
define_logical_primitive_type!($Native, $TypeId, $DataType, $Largest);
impl DataType for $DataType {
@@ -272,23 +274,84 @@ macro_rules! define_non_timestamp_primitive {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => bool_to_numeric(v).map(Value::$TypeId),
Value::String(v) => v.as_utf8().parse::<$Native>().map(|val| Value::from(val)).ok(),
$(
Value::$TargetType(v) => num::cast::cast(v).map(Value::$TypeId),
)*
_ => None,
}
}
}
};
}
define_non_timestamp_primitive!(u8, UInt8, UInt8Type, UInt64Type);
define_non_timestamp_primitive!(u16, UInt16, UInt16Type, UInt64Type);
define_non_timestamp_primitive!(u32, UInt32, UInt32Type, UInt64Type);
define_non_timestamp_primitive!(u64, UInt64, UInt64Type, UInt64Type);
define_non_timestamp_primitive!(i8, Int8, Int8Type, Int64Type);
define_non_timestamp_primitive!(i16, Int16, Int16Type, Int64Type);
define_non_timestamp_primitive!(i32, Int32, Int32Type, Int64Type);
define_non_timestamp_primitive!(f32, Float32, Float32Type, Float64Type);
define_non_timestamp_primitive!(f64, Float64, Float64Type, Float64Type);
define_non_timestamp_primitive!(
u8, UInt8, UInt8Type, UInt64Type, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64,
Float32, Float64
);
define_non_timestamp_primitive!(
u16, UInt16, UInt16Type, UInt64Type, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64,
Float32, Float64
);
define_non_timestamp_primitive!(
u32, UInt32, UInt32Type, UInt64Type, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64,
Float32, Float64
);
define_non_timestamp_primitive!(
u64, UInt64, UInt64Type, UInt64Type, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64,
Float32, Float64
);
define_non_timestamp_primitive!(
i8, Int8, Int8Type, Int64Type, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64,
Float32, Float64
);
define_non_timestamp_primitive!(
i16, Int16, Int16Type, Int64Type, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64,
Float32, Float64
);
define_non_timestamp_primitive!(
f32,
Float32,
Float32Type,
Float64Type,
Int8,
Int16,
Int32,
Int64,
UInt8,
UInt16,
UInt32,
UInt64,
Float32,
Float64
);
define_non_timestamp_primitive!(
f64,
Float64,
Float64Type,
Float64Type,
Int8,
Int16,
Int32,
Int64,
UInt8,
UInt16,
UInt32,
UInt64,
Float32,
Float64
);
// Timestamp primitive:
define_logical_primitive_type!(i64, Int64, Int64Type, Int64Type);
define_logical_primitive_type!(i32, Int32, Int32Type, Int64Type);
impl DataType for Int64Type {
fn name(&self) -> &str {
"Int64"
@@ -313,12 +376,89 @@ impl DataType for Int64Type {
fn is_timestamp_compatible(&self) -> bool {
true
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => bool_to_numeric(v).map(Value::Int64),
Value::Int8(v) => num::cast::cast(v).map(Value::Int64),
Value::Int16(v) => num::cast::cast(v).map(Value::Int64),
Value::Int32(v) => num::cast::cast(v).map(Value::Int64),
Value::Int64(v) => Some(Value::Int64(v)),
Value::UInt8(v) => num::cast::cast(v).map(Value::Int64),
Value::UInt16(v) => num::cast::cast(v).map(Value::Int64),
Value::UInt32(v) => num::cast::cast(v).map(Value::Int64),
Value::Float32(v) => num::cast::cast(v).map(Value::Int64),
Value::Float64(v) => num::cast::cast(v).map(Value::Int64),
Value::String(v) => v.as_utf8().parse::<i64>().map(Value::Int64).ok(),
Value::DateTime(v) => Some(Value::Int64(v.val())),
Value::Timestamp(v) => Some(Value::Int64(v.value())),
Value::Time(v) => Some(Value::Int64(v.value())),
Value::Interval(v) => match v.unit() {
IntervalUnit::DayTime => Some(Value::Int64(v.to_i64())),
IntervalUnit::YearMonth => None,
IntervalUnit::MonthDayNano => None,
},
_ => None,
}
}
}
impl DataType for Int32Type {
fn name(&self) -> &str {
"Int32"
}
fn logical_type_id(&self) -> LogicalTypeId {
LogicalTypeId::Int32
}
fn default_value(&self) -> Value {
Value::Int32(0)
}
fn as_arrow_type(&self) -> ArrowDataType {
ArrowDataType::Int32
}
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
Box::new(PrimitiveVectorBuilder::<Int32Type>::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => bool_to_numeric(v).map(Value::Int32),
Value::Int8(v) => num::cast::cast(v).map(Value::Int32),
Value::Int16(v) => num::cast::cast(v).map(Value::Int32),
Value::Int32(v) => Some(Value::Int32(v)),
Value::Int64(v) => num::cast::cast(v).map(Value::Int64),
Value::UInt8(v) => num::cast::cast(v).map(Value::Int32),
Value::UInt16(v) => num::cast::cast(v).map(Value::Int32),
Value::UInt32(v) => num::cast::cast(v).map(Value::UInt32),
Value::UInt64(v) => num::cast::cast(v).map(Value::UInt64),
Value::Float32(v) => num::cast::cast(v).map(Value::Int32),
Value::Float64(v) => num::cast::cast(v).map(Value::Int32),
Value::String(v) => v.as_utf8().parse::<i32>().map(Value::Int32).ok(),
Value::Date(v) => Some(Value::Int32(v.val())),
Value::Interval(v) => match v.unit() {
IntervalUnit::YearMonth => Some(Value::Int32(v.to_i32())),
IntervalUnit::DayTime => None,
IntervalUnit::MonthDayNano => None,
},
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use std::collections::BinaryHeap;
use ordered_float::OrderedFloat;
use super::*;
#[test]
@@ -361,4 +501,201 @@ mod tests {
test!(f32);
test!(f64);
}
macro_rules! assert_primitive_cast {
($value: expr, $datatype:expr, $expected: expr) => {
let val = $value;
let b = $datatype.try_cast(val).unwrap();
assert_eq!(b, $expected);
};
}
#[test]
fn test_primitive_cast() {
// Integer cast
assert_primitive_cast!(
Value::UInt8(123),
ConcreteDataType::uint16_datatype(),
Value::UInt16(123)
);
assert_primitive_cast!(
Value::UInt8(123),
ConcreteDataType::uint32_datatype(),
Value::UInt32(123)
);
assert_primitive_cast!(
Value::UInt8(123),
ConcreteDataType::uint64_datatype(),
Value::UInt64(123)
);
assert_primitive_cast!(
Value::UInt16(1234),
ConcreteDataType::uint32_datatype(),
Value::UInt32(1234)
);
assert_primitive_cast!(
Value::UInt16(1234),
ConcreteDataType::uint64_datatype(),
Value::UInt64(1234)
);
assert_primitive_cast!(
Value::UInt32(12345),
ConcreteDataType::uint64_datatype(),
Value::UInt64(12345)
);
assert_primitive_cast!(
Value::UInt8(123),
ConcreteDataType::uint16_datatype(),
Value::UInt16(123)
);
assert_primitive_cast!(
Value::Int8(123),
ConcreteDataType::int32_datatype(),
Value::Int32(123)
);
assert_primitive_cast!(
Value::Int8(123),
ConcreteDataType::int64_datatype(),
Value::Int64(123)
);
assert_primitive_cast!(
Value::Int16(1234),
ConcreteDataType::int32_datatype(),
Value::Int32(1234)
);
assert_primitive_cast!(
Value::Int16(1234),
ConcreteDataType::int64_datatype(),
Value::Int64(1234)
);
assert_primitive_cast!(
Value::Int32(12345),
ConcreteDataType::int64_datatype(),
Value::Int64(12345)
);
}
#[test]
fn test_float_cast() {
// cast to Float32
assert_primitive_cast!(
Value::UInt8(12),
ConcreteDataType::float32_datatype(),
Value::Float32(OrderedFloat(12.0))
);
assert_primitive_cast!(
Value::UInt16(12),
ConcreteDataType::float32_datatype(),
Value::Float32(OrderedFloat(12.0))
);
assert_primitive_cast!(
Value::Int8(12),
ConcreteDataType::float32_datatype(),
Value::Float32(OrderedFloat(12.0))
);
assert_primitive_cast!(
Value::Int16(12),
ConcreteDataType::float32_datatype(),
Value::Float32(OrderedFloat(12.0))
);
assert_primitive_cast!(
Value::Int32(12),
ConcreteDataType::float32_datatype(),
Value::Float32(OrderedFloat(12.0))
);
// cast to Float64
assert_primitive_cast!(
Value::UInt8(12),
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(12.0))
);
assert_primitive_cast!(
Value::UInt16(12),
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(12.0))
);
assert_primitive_cast!(
Value::UInt32(12),
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(12.0))
);
assert_primitive_cast!(
Value::Int8(12),
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(12.0))
);
assert_primitive_cast!(
Value::Int16(12),
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(12.0))
);
assert_primitive_cast!(
Value::Int32(12),
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(12.0))
);
assert_primitive_cast!(
Value::Int64(12),
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(12.0))
);
}
#[test]
fn test_string_cast_to_primitive() {
assert_primitive_cast!(
Value::String("123".into()),
ConcreteDataType::uint8_datatype(),
Value::UInt8(123)
);
assert_primitive_cast!(
Value::String("123".into()),
ConcreteDataType::uint16_datatype(),
Value::UInt16(123)
);
assert_primitive_cast!(
Value::String("123".into()),
ConcreteDataType::uint32_datatype(),
Value::UInt32(123)
);
assert_primitive_cast!(
Value::String("123".into()),
ConcreteDataType::uint64_datatype(),
Value::UInt64(123)
);
assert_primitive_cast!(
Value::String("123".into()),
ConcreteDataType::int8_datatype(),
Value::Int8(123)
);
assert_primitive_cast!(
Value::String("123".into()),
ConcreteDataType::int16_datatype(),
Value::Int16(123)
);
assert_primitive_cast!(
Value::String("123".into()),
ConcreteDataType::int32_datatype(),
Value::Int32(123)
);
assert_primitive_cast!(
Value::String("123".into()),
ConcreteDataType::int64_datatype(),
Value::Int64(123)
);
assert_primitive_cast!(
Value::String("1.23".into()),
ConcreteDataType::float32_datatype(),
Value::Float32(OrderedFloat(1.23))
);
assert_primitive_cast!(
Value::String("1.23".into()),
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(1.23))
);
}
}

View File

@@ -57,4 +57,36 @@ impl DataType for StringType {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
if from.logical_type_id() == self.logical_type_id() {
return Some(from);
}
match from {
Value::Null => Some(Value::String(StringBytes::from("null".to_string()))),
Value::Boolean(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::UInt8(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::UInt16(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::UInt32(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::UInt64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Int8(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Int16(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Int32(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Int64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Float32(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Float64(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::String(v) => Some(Value::String(v)),
Value::Date(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::DateTime(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Timestamp(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
Value::Time(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
Value::Interval(v) => Some(Value::String(StringBytes::from(v.to_iso8601_string()))),
Value::Duration(v) => Some(Value::String(StringBytes::from(v.to_string()))),
// StringBytes is only support for utf-8, Value::Binary is not allowed.
Value::Binary(_) | Value::List(_) => None,
}
}
}

View File

@@ -86,7 +86,7 @@ impl TimeType {
}
macro_rules! impl_data_type_for_time {
($unit: ident,$arrow_type: ident, $type: ty) => {
($unit: ident,$arrow_type: ident, $type: ty, $TargetType: ident) => {
paste! {
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct [<Time $unit Type>];
@@ -115,6 +115,14 @@ macro_rules! impl_data_type_for_time {
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::$TargetType(v) => Some(Value::Time(Time::new(v as i64, TimeUnit::$unit))),
Value::Time(v) => v.convert_to(TimeUnit::$unit).map(Value::Time),
_ => None,
}
}
}
impl LogicalPrimitiveType for [<Time $unit Type>] {
@@ -172,10 +180,10 @@ macro_rules! impl_data_type_for_time {
}
}
impl_data_type_for_time!(Second, Time32, i32);
impl_data_type_for_time!(Millisecond, Time32, i32);
impl_data_type_for_time!(Nanosecond, Time64, i64);
impl_data_type_for_time!(Microsecond, Time64, i64);
impl_data_type_for_time!(Second, Time32, i32, Int32);
impl_data_type_for_time!(Millisecond, Time32, i32, Int32);
impl_data_type_for_time!(Nanosecond, Time64, i64, Int64);
impl_data_type_for_time!(Microsecond, Time64, i64, Int64);
#[cfg(test)]
mod tests {
@@ -217,4 +225,60 @@ mod tests {
TimeNanosecondType.as_arrow_type()
);
}
#[test]
fn test_time_cast() {
// Int32 -> TimeSecondType
let val = Value::Int32(1000);
let time = ConcreteDataType::time_second_datatype()
.try_cast(val)
.unwrap();
assert_eq!(time, Value::Time(Time::new_second(1000)));
// Int32 -> TimeMillisecondType
let val = Value::Int32(2000);
let time = ConcreteDataType::time_millisecond_datatype()
.try_cast(val)
.unwrap();
assert_eq!(time, Value::Time(Time::new_millisecond(2000)));
// Int64 -> TimeMicrosecondType
let val = Value::Int64(3000);
let time = ConcreteDataType::time_microsecond_datatype()
.try_cast(val)
.unwrap();
assert_eq!(time, Value::Time(Time::new_microsecond(3000)));
// Int64 -> TimeNanosecondType
let val = Value::Int64(4000);
let time = ConcreteDataType::time_nanosecond_datatype()
.try_cast(val)
.unwrap();
assert_eq!(time, Value::Time(Time::new_nanosecond(4000)));
// Other situations will return None, such as Int64 -> TimeSecondType or
// Int32 -> TimeMicrosecondType etc.
let val = Value::Int64(123);
let time = ConcreteDataType::time_second_datatype().try_cast(val);
assert_eq!(time, None);
let val = Value::Int32(123);
let time = ConcreteDataType::time_microsecond_datatype().try_cast(val);
assert_eq!(time, None);
// TimeSecond -> TimeMicroSecond
let second = Value::Time(Time::new_second(2023));
let microsecond = ConcreteDataType::time_microsecond_datatype()
.try_cast(second)
.unwrap();
assert_eq!(
microsecond,
Value::Time(Time::new_microsecond(2023 * 1000000))
);
// test overflow
let second = Value::Time(Time::new_second(i64::MAX));
let microsecond = ConcreteDataType::time_microsecond_datatype().try_cast(second);
assert_eq!(microsecond, None);
}
}

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::str::FromStr;
use arrow::datatypes::{
DataType as ArrowDataType, TimeUnit as ArrowTimeUnit,
TimestampMicrosecondType as ArrowTimestampMicrosecondType,
@@ -130,6 +132,17 @@ macro_rules! impl_data_type_for_timestamp {
fn is_timestamp_compatible(&self) -> bool {
true
}
fn try_cast(&self, from: Value)-> Option<Value>{
match from {
Value::Timestamp(v) => v.convert_to(TimeUnit::$unit).map(Value::Timestamp),
Value::String(v) => Timestamp::from_str(v.as_utf8()).map(Value::Timestamp).ok(),
Value::Int64(v) => Some(Value::Timestamp(Timestamp::new(v, TimeUnit::$unit))),
Value::DateTime(v) => Timestamp::new_second(v.val()).convert_to(TimeUnit::$unit).map(Value::Timestamp),
Value::Date(v) => Timestamp::new_second(v.to_secs()).convert_to(TimeUnit::$unit).map(Value::Timestamp),
_ => None
}
}
}
impl LogicalPrimitiveType for [<Timestamp $unit Type>] {
@@ -194,6 +207,8 @@ impl_data_type_for_timestamp!(Microsecond);
#[cfg(test)]
mod tests {
use common_time::{Date, DateTime};
use super::*;
#[test]
@@ -215,4 +230,50 @@ mod tests {
TimestampType::Nanosecond(TimestampNanosecondType).unit()
);
}
#[test]
fn test_timestamp_cast() {
std::env::set_var("TZ", "Asia/Shanghai");
// String -> TimestampSecond
let s = Value::String("2021-01-01 01:02:03".to_string().into());
let ts = ConcreteDataType::timestamp_second_datatype()
.try_cast(s)
.unwrap();
assert_eq!(ts, Value::Timestamp(Timestamp::new_second(1609434123)));
// String cast failed
let s = Value::String("12345".to_string().into());
let ts = ConcreteDataType::timestamp_second_datatype().try_cast(s);
assert_eq!(ts, None);
let n = Value::Int64(1694589525);
// Int64 -> TimestampSecond
let ts = ConcreteDataType::timestamp_second_datatype()
.try_cast(n)
.unwrap();
assert_eq!(ts, Value::Timestamp(Timestamp::new_second(1694589525)));
// Datetime -> TimestampSecond
let dt = Value::DateTime(DateTime::from(1234567));
let ts = ConcreteDataType::timestamp_second_datatype()
.try_cast(dt)
.unwrap();
assert_eq!(ts, Value::Timestamp(Timestamp::new_second(1234567)));
// Date -> TimestampMillisecond
let d = Value::Date(Date::from_str("1970-01-01").unwrap());
let ts = ConcreteDataType::timestamp_millisecond_datatype()
.try_cast(d)
.unwrap();
assert_eq!(ts, Value::Timestamp(Timestamp::new_millisecond(0)));
// TimestampSecond -> TimestampMicrosecond
let second = Value::Timestamp(Timestamp::new_second(123));
let microsecond = ConcreteDataType::timestamp_microsecond_datatype()
.try_cast(second)
.unwrap();
assert_eq!(
microsecond,
Value::Timestamp(Timestamp::new_microsecond(123 * 1000000))
)
}
}