refactor: not allowed int64 type as time index (#2460)

* refactor: remove is_timestamp_compatible.

* chore: fmt

* refactor: remove int64 to timestamp match

* chore

* chore: apply suggestions from code review

Co-authored-by: dennis zhuang <killme2008@gmail.com>

* chore: fmt

---------

Co-authored-by: dennis zhuang <killme2008@gmail.com>
This commit is contained in:
Wei
2023-09-22 14:28:02 +08:00
committed by GitHub
parent c6e95ffe63
commit aef9e7bfc3
21 changed files with 19 additions and 135 deletions

View File

@@ -179,6 +179,10 @@ impl ConcreteDataType {
)
}
pub fn is_timestamp(&self) -> bool {
matches!(self, ConcreteDataType::Timestamp(_))
}
pub fn numerics() -> Vec<ConcreteDataType> {
vec![
ConcreteDataType::int8_datatype(),
@@ -217,9 +221,6 @@ impl ConcreteDataType {
/// Try to cast data type as a [`TimestampType`].
pub fn as_timestamp(&self) -> Option<TimestampType> {
match self {
ConcreteDataType::Int64(_) => {
Some(TimestampType::Millisecond(TimestampMillisecondType))
}
ConcreteDataType::Timestamp(t) => Some(*t),
_ => None,
}
@@ -473,10 +474,6 @@ pub trait DataType: std::fmt::Debug + Send + Sync {
/// Creates a mutable vector with given `capacity` of this type.
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;
/// Returns true if the data type is compatible with timestamp type so we can
/// use it as a timestamp.
fn is_timestamp_compatible(&self) -> bool;
/// Casts the value to specific DataType.
/// Return None if cast failed.
fn try_cast(&self, from: Value) -> Option<Value>;
@@ -596,41 +593,6 @@ mod tests {
);
}
#[test]
fn test_is_timestamp_compatible() {
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp_compatible());
assert!(
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp_compatible()
);
assert!(
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp_compatible()
);
assert!(
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp_compatible()
);
assert!(ConcreteDataType::timestamp_second_datatype().is_timestamp_compatible());
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_timestamp_compatible());
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_timestamp_compatible());
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_timestamp_compatible());
assert!(ConcreteDataType::int64_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::null_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::binary_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::boolean_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::date_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::datetime_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::string_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::int32_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::uint64_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::time_second_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::time_millisecond_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::time_microsecond_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::time_nanosecond_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::duration_second_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::duration_millisecond_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::duration_microsecond_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::duration_nanosecond_datatype().is_timestamp_compatible());
}
#[test]
fn test_is_null() {
assert!(ConcreteDataType::null_datatype().is_null());

View File

@@ -23,7 +23,6 @@ use arrow::datatypes::{Field, Schema as ArrowSchema};
use datafusion_common::DFSchemaRef;
use snafu::{ensure, ResultExt};
use crate::data_type::DataType;
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
pub use crate::schema::column_schema::{ColumnSchema, Metadata, COMMENT_KEY, TIME_INDEX_KEY};
pub use crate::schema::constraint::ColumnDefaultConstraint;
@@ -269,7 +268,7 @@ fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: us
let column_schema = &column_schemas[timestamp_index];
ensure!(
column_schema.data_type.is_timestamp_compatible(),
column_schema.data_type.is_timestamp(),
error::InvalidTimestampIndexSnafu {
index: timestamp_index,
}

View File

@@ -81,7 +81,7 @@ impl ColumnDefaultConstraint {
error::UnsupportedDefaultExprSnafu { expr }
);
ensure!(
data_type.is_timestamp_compatible(),
data_type.is_timestamp(),
error::DefaultValueTypeSnafu {
reason: "return value of the function must has timestamp type",
}
@@ -199,7 +199,7 @@ fn create_current_timestamp_vector(
let current_timestamp_vector = TimestampMillisecondVector::from_values(
std::iter::repeat(util::current_time_millis()).take(num_rows),
);
if data_type.is_timestamp_compatible() {
if data_type.is_timestamp() {
current_timestamp_vector.cast(data_type)
} else {
error::DefaultValueTypeSnafu {
@@ -350,15 +350,8 @@ mod tests {
// Int64 type.
let data_type = ConcreteDataType::int64_datatype();
let v = constraint
.create_default_vector(&data_type, false, 4)
.unwrap();
assert_eq!(4, v.len());
assert!(
matches!(v.get(0), Value::Int64(_)),
"v {:?} is not timestamp",
v.get(0)
);
let v = constraint.create_default_vector(&data_type, false, 4);
assert!(v.is_err());
let constraint = ColumnDefaultConstraint::Function("no".to_string());
let data_type = ConcreteDataType::timestamp_millisecond_datatype();

View File

@@ -54,10 +54,6 @@ impl DataType for BinaryType {
Box::new(BinaryVectorBuilder::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Binary(v) => Some(Value::Binary(v)),

View File

@@ -54,10 +54,6 @@ impl DataType for BooleanType {
Box::new(BooleanVectorBuilder::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => Some(Value::Boolean(v)),

View File

@@ -52,10 +52,6 @@ impl DataType for DateType {
Box::new(DateVectorBuilder::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Int32(v) => Some(Value::Date(Date::from(v))),

View File

@@ -50,10 +50,6 @@ impl DataType for DateTimeType {
Box::new(DateTimeVectorBuilder::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Int64(v) => Some(Value::DateTime(DateTime::from(v))),

View File

@@ -85,10 +85,6 @@ impl DataType for DictionaryType {
unimplemented!()
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, _: Value) -> Option<Value> {
None
}

View File

@@ -98,9 +98,6 @@ macro_rules! impl_data_type_for_duration {
Box::new([<Duration $unit Vector Builder>]::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, _: Value) -> Option<Value> {
// TODO(QuenKar): Implement casting for duration types.

View File

@@ -86,9 +86,6 @@ macro_rules! impl_data_type_for_interval {
Box::new([<Interval $unit Vector Builder>]::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, _: Value) -> Option<Value> {
// TODO(QuenKar): Implement casting for interval types.

View File

@@ -76,10 +76,6 @@ impl DataType for ListType {
))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::List(v) => Some(Value::List(v)),

View File

@@ -52,10 +52,6 @@ impl DataType for NullType {
Box::<NullVectorBuilder>::default()
}
fn is_timestamp_compatible(&self) -> bool {
false
}
// Unconditional cast other type to Value::Null
fn try_cast(&self, _from: Value) -> Option<Value> {
Some(Value::Null)

View File

@@ -271,9 +271,6 @@ macro_rules! define_non_timestamp_primitive {
Box::new(PrimitiveVectorBuilder::<$DataType>::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
@@ -373,10 +370,6 @@ impl DataType for Int64Type {
Box::new(PrimitiveVectorBuilder::<Int64Type>::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
true
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => bool_to_numeric(v).map(Value::Int64),
@@ -424,10 +417,6 @@ impl DataType for Int32Type {
Box::new(PrimitiveVectorBuilder::<Int32Type>::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => bool_to_numeric(v).map(Value::Int32),

View File

@@ -54,10 +54,6 @@ impl DataType for StringType {
Box::new(StringVectorBuilder::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
if from.logical_type_id() == self.logical_type_id() {
return Some(from);

View File

@@ -112,10 +112,6 @@ macro_rules! impl_data_type_for_time {
Box::new([<Time $unit Vector Builder>]::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
false
}
fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::$TargetType(v) => Some(Value::Time(Time::new(v as i64, TimeUnit::$unit))),

View File

@@ -129,10 +129,6 @@ macro_rules! impl_data_type_for_timestamp {
Box::new([<Timestamp $unit Vector Builder>]::with_capacity(capacity))
}
fn is_timestamp_compatible(&self) -> bool {
true
}
fn try_cast(&self, from: Value)-> Option<Value>{
match from {
Value::Timestamp(v) => v.convert_to(TimeUnit::$unit).map(Value::Timestamp),

View File

@@ -198,7 +198,6 @@ impl Value {
/// Cast Value to timestamp. Return None if value is not a valid timestamp data type.
pub fn as_timestamp(&self) -> Option<Timestamp> {
match self {
Value::Int64(v) => Some(Timestamp::new_millisecond(*v)),
Value::Timestamp(t) => Some(*t),
_ => None,
}
@@ -388,7 +387,6 @@ pub fn duration_to_scalar_value(unit: TimeUnit, val: Option<i64>) -> ScalarValue
/// Return `None` if given scalar value cannot be converted to a valid timestamp.
pub fn scalar_value_to_timestamp(scalar: &ScalarValue) -> Option<Timestamp> {
match scalar {
ScalarValue::Int64(val) => val.map(Timestamp::new_millisecond),
ScalarValue::Utf8(Some(s)) => match Timestamp::from_str(s) {
Ok(t) => Some(t),
Err(e) => {

View File

@@ -382,9 +382,7 @@ impl Batch {
fn get_timestamp(&self, index: usize) -> Timestamp {
match self.timestamps.get_ref(index) {
ValueRef::Timestamp(timestamp) => timestamp,
// Int64 is always millisecond.
// TODO(yingwen): Don't allow using int64 as time index.
ValueRef::Int64(v) => Timestamp::new_millisecond(v),
// We have check the data type is timestamp compatible in the [BatchBuilder] so it's safe to panic.
value => panic!("{:?} is not a timestamp", value),
}
@@ -483,9 +481,9 @@ impl BatchBuilder {
pub fn timestamps_array(&mut self, array: ArrayRef) -> Result<&mut Self> {
let vector = Helper::try_into_vector(array).context(ConvertVectorSnafu)?;
ensure!(
vector.data_type().is_timestamp_compatible(),
vector.data_type().is_timestamp(),
InvalidBatchSnafu {
reason: format!("{:?} is a timestamp type", vector.data_type()),
reason: format!("{:?} is not a timestamp type", vector.data_type()),
}
);

View File

@@ -23,7 +23,6 @@ use async_stream::try_stream;
use async_trait::async_trait;
use common_telemetry::{debug, error};
use common_time::range::TimestampRange;
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use datatypes::arrow::record_batch::RecordBatch;
use datatypes::prelude::ConcreteDataType;
@@ -162,7 +161,6 @@ fn decode_timestamp_range_inner(
let mut end = i64::MIN;
let unit = match ts_datatype {
ConcreteDataType::Int64(_) => TimeUnit::Millisecond,
ConcreteDataType::Timestamp(type_) => type_.unit(),
_ => {
return DecodeParquetTimeRangeSnafu {
@@ -358,6 +356,7 @@ mod tests {
use api::v1::OpType;
use common_base::readable_size::ReadableSize;
use common_test_util::temp_dir::create_temp_dir;
use common_time::timestamp::TimeUnit;
use datatypes::arrow::array::{Array, UInt64Array, UInt8Array};
use datatypes::prelude::{ScalarVector, Vector};
use datatypes::types::{TimestampMillisecondType, TimestampType};

View File

@@ -15,10 +15,10 @@
use std::sync::Arc;
use arrow::array::{
PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
TimestampSecondArray,
};
use arrow::datatypes::{DataType, Int64Type};
use arrow::datatypes::DataType;
use arrow::error::ArrowError;
use arrow_array::{Array, BooleanArray, RecordBatch};
use common_time::range::TimestampRange;
@@ -45,7 +45,6 @@ pub(crate) fn build_row_filter(
let ts_col_idx = store_schema.timestamp_index();
let ts_col = store_schema.columns().get(ts_col_idx)?;
let ts_col_unit = match &ts_col.desc.data_type {
ConcreteDataType::Int64(_) => TimeUnit::Millisecond,
ConcreteDataType::Timestamp(ts_type) => ts_type.unit(),
_ => unreachable!(),
};
@@ -205,7 +204,6 @@ impl ArrowPredicate for FastTimestampRowFilter {
downcast_and_compute!(TimestampNanosecondArray)
}
},
DataType::Int64 => downcast_and_compute!(PrimitiveArray<Int64Type>),
_ => {
unreachable!()
}
@@ -270,9 +268,6 @@ impl ArrowPredicate for PlainTimestampRowFilter {
downcast_and_compute!(TimestampNanosecondArray, Nanosecond)
}
},
DataType::Int64 => {
downcast_and_compute!(PrimitiveArray<Int64Type>, Millisecond)
}
_ => {
unreachable!()
}

View File

@@ -26,7 +26,6 @@ use api::v1::SemanticType;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use datatypes::arrow::datatypes::FieldRef;
use datatypes::prelude::DataType;
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, Schema, SchemaRef};
use serde::de::Error;
use serde::{Deserialize, Deserializer, Serialize};
@@ -355,13 +354,10 @@ impl RegionMetadata {
fn validate_column_metadata(column_metadata: &ColumnMetadata) -> Result<()> {
if column_metadata.semantic_type == SemanticType::Timestamp {
ensure!(
column_metadata
.column_schema
.data_type
.is_timestamp_compatible(),
column_metadata.column_schema.data_type.is_timestamp(),
InvalidMetaSnafu {
reason: format!(
"{} is not timestamp compatible",
"column `{}` is not timestamp type",
column_metadata.column_schema.name
),
}
@@ -675,7 +671,8 @@ mod test {
builder.push_column_metadata(col);
let err = builder.build().unwrap_err();
assert!(
err.to_string().contains("ts is not timestamp compatible"),
err.to_string()
.contains("column `ts` is not timestamp type"),
"unexpected err: {err}",
);
}