From d10de46e0376d1d9922fdd6d64111031a5252ca1 Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Thu, 6 Apr 2023 11:18:20 +0800 Subject: [PATCH] feat: support timestamp precision on creating table (#1332) * feat: support timestamp precision on creating table * fix sqlness * fix: substrait representation of different timestamp precision --- src/common/substrait/src/types.rs | 16 ++++-- src/datatypes/src/error.rs | 6 +++ src/datatypes/src/types/timestamp_type.rs | 40 ++++++++++++++ src/servers/src/mysql/writer.rs | 9 +--- src/sql/src/parser.rs | 53 +++++++++++++++++++ src/sql/src/statements.rs | 14 ++++- .../common/timestamp/timestamp.result | 21 ++++++++ .../standalone/common/timestamp/timestamp.sql | 7 +++ 8 files changed, 154 insertions(+), 12 deletions(-) create mode 100644 tests/cases/standalone/common/timestamp/timestamp.result create mode 100644 tests/cases/standalone/common/timestamp/timestamp.sql diff --git a/src/common/substrait/src/types.rs b/src/common/substrait/src/types.rs index 0652e30e8e..cc0bf6110e 100644 --- a/src/common/substrait/src/types.rs +++ b/src/common/substrait/src/types.rs @@ -20,6 +20,7 @@ use datafusion::scalar::ScalarValue; use datatypes::prelude::ConcreteDataType; +use datatypes::types::TimestampType; use substrait_proto::proto::expression::literal::LiteralType; use substrait_proto::proto::r#type::{self as s_type, Kind, Nullability}; use substrait_proto::proto::{Type as SType, Type}; @@ -71,7 +72,14 @@ pub fn to_concrete_type(ty: &SType) -> Result<(ConcreteDataType, bool)> { Kind::Binary(desc) => substrait_kind!(desc, binary_datatype), Kind::Timestamp(desc) => substrait_kind!( desc, - ConcreteDataType::timestamp_datatype(Default::default()) + ConcreteDataType::timestamp_datatype( + TimestampType::try_from(desc.type_variation_reference as u64) + .map_err(|_| UnsupportedSubstraitTypeSnafu { + ty: format!("{kind:?}") + } + .build())? + .unit() + ) ), Kind::Date(desc) => substrait_kind!(desc, date_datatype), Kind::Time(_) @@ -95,7 +103,7 @@ pub fn to_concrete_type(ty: &SType) -> Result<(ConcreteDataType, bool)> { } macro_rules! build_substrait_kind { - ($kind:ident,$s_type:ident,$nullable:ident,$variation:literal) => {{ + ($kind:ident,$s_type:ident,$nullable:ident,$variation:expr) => {{ let nullability = match $nullable { Some(true) => Nullability::Nullable, Some(false) => Nullability::Required, @@ -129,8 +137,8 @@ pub fn from_concrete_type(ty: ConcreteDataType, nullability: Option) -> Re ConcreteDataType::String(_) => build_substrait_kind!(String, String, nullability, 0), ConcreteDataType::Date(_) => build_substrait_kind!(Date, Date, nullability, 0), ConcreteDataType::DateTime(_) => UnsupportedConcreteTypeSnafu { ty }.fail()?, - ConcreteDataType::Timestamp(_) => { - build_substrait_kind!(Timestamp, Timestamp, nullability, 0) + ConcreteDataType::Timestamp(ty) => { + build_substrait_kind!(Timestamp, Timestamp, nullability, ty.precision() as u32) } ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => { UnsupportedConcreteTypeSnafu { ty }.fail()? diff --git a/src/datatypes/src/error.rs b/src/datatypes/src/error.rs index 53b81d7dd6..cab8fc5d71 100644 --- a/src/datatypes/src/error.rs +++ b/src/datatypes/src/error.rs @@ -112,6 +112,12 @@ pub enum Error { reason: String, backtrace: Backtrace, }, + + #[snafu(display("Invalid timestamp precision: {}", precision))] + InvalidTimestampPrecision { + precision: u64, + backtrace: Backtrace, + }, } impl ErrorExt for Error { diff --git a/src/datatypes/src/types/timestamp_type.rs b/src/datatypes/src/types/timestamp_type.rs index 15c555c272..faa9a7b926 100644 --- a/src/datatypes/src/types/timestamp_type.rs +++ b/src/datatypes/src/types/timestamp_type.rs @@ -28,6 +28,7 @@ use snafu::OptionExt; use crate::data_type::ConcreteDataType; use crate::error; +use crate::error::InvalidTimestampPrecisionSnafu; use crate::prelude::{ DataType, LogicalTypeId, MutableVector, ScalarVectorBuilder, Value, ValueRef, Vector, }; @@ -41,6 +42,11 @@ use crate::vectors::{ TimestampNanosecondVectorBuilder, TimestampSecondVector, TimestampSecondVectorBuilder, }; +const SECOND_VARIATION: u64 = 0; +const MILLISECOND_VARIATION: u64 = 3; +const MICROSECOND_VARIATION: u64 = 6; +const NANOSECOND_VARIATION: u64 = 9; + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[enum_dispatch(DataType)] pub enum TimestampType { @@ -50,6 +56,31 @@ pub enum TimestampType { Nanosecond(TimestampNanosecondType), } +impl TryFrom for TimestampType { + type Error = error::Error; + + /// Convert fractional timestamp precision to timestamp types. Supported precisions are: + /// - 0: second + /// - 3: millisecond + /// - 6: microsecond + /// - 9: nanosecond + fn try_from(value: u64) -> Result { + match value { + SECOND_VARIATION => Ok(TimestampType::Second(TimestampSecondType::default())), + MILLISECOND_VARIATION => Ok(TimestampType::Millisecond( + TimestampMillisecondType::default(), + )), + MICROSECOND_VARIATION => Ok(TimestampType::Microsecond( + TimestampMicrosecondType::default(), + )), + NANOSECOND_VARIATION => { + Ok(TimestampType::Nanosecond(TimestampNanosecondType::default())) + } + _ => InvalidTimestampPrecisionSnafu { precision: value }.fail(), + } + } +} + impl TimestampType { /// Returns the [`TimeUnit`] of this type. pub fn unit(&self) -> TimeUnit { @@ -60,6 +91,15 @@ impl TimestampType { TimestampType::Nanosecond(_) => TimeUnit::Nanosecond, } } + + pub fn precision(&self) -> u64 { + match self { + TimestampType::Second(_) => SECOND_VARIATION, + TimestampType::Millisecond(_) => MILLISECOND_VARIATION, + TimestampType::Microsecond(_) => MICROSECOND_VARIATION, + TimestampType::Nanosecond(_) => NANOSECOND_VARIATION, + } + } } macro_rules! impl_data_type_for_timestamp { diff --git a/src/servers/src/mysql/writer.rs b/src/servers/src/mysql/writer.rs index af03f3a698..fe5d7eb627 100644 --- a/src/servers/src/mysql/writer.rs +++ b/src/servers/src/mysql/writer.rs @@ -17,8 +17,6 @@ use std::ops::Deref; use common_query::Output; use common_recordbatch::{util, RecordBatch}; use common_telemetry::error; -use common_time::datetime::DateTime; -use common_time::timestamp::TimeUnit; use datatypes::prelude::{ConcreteDataType, Value}; use datatypes::schema::{ColumnSchema, SchemaRef}; use opensrv_mysql::{ @@ -163,10 +161,7 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> { Value::Binary(v) => row_writer.write_col(v.deref())?, Value::Date(v) => row_writer.write_col(v.val())?, Value::DateTime(v) => row_writer.write_col(v.val())?, - Value::Timestamp(v) => row_writer.write_col( - // safety: converting timestamp with whatever unit to second will not cause overflow - DateTime::new(v.convert_to(TimeUnit::Second).unwrap().value()).to_string(), - )?, + Value::Timestamp(v) => row_writer.write_col(v.to_iso8601_string())?, Value::List(_) => { return Err(Error::Internal { err_msg: format!( @@ -213,7 +208,7 @@ fn create_mysql_column(column_schema: &ColumnSchema) -> Result { ConcreteDataType::Binary(_) | ConcreteDataType::String(_) => { Ok(ColumnType::MYSQL_TYPE_VARCHAR) } - ConcreteDataType::Timestamp(_) => Ok(ColumnType::MYSQL_TYPE_DATETIME), + ConcreteDataType::Timestamp(_) => Ok(ColumnType::MYSQL_TYPE_TIMESTAMP), _ => error::InternalSnafu { err_msg: format!( "not implemented for column datatype {:?}", diff --git a/src/sql/src/parser.rs b/src/sql/src/parser.rs index 93fbb8cfab..d00b9d0d9c 100644 --- a/src/sql/src/parser.rs +++ b/src/sql/src/parser.rs @@ -379,12 +379,15 @@ impl<'a> ParserContext<'a> { mod tests { use std::assert_matches::assert_matches; + use datatypes::prelude::ConcreteDataType; use sqlparser::ast::{ Ident, ObjectName, Query as SpQuery, Statement as SpStatement, WildcardAdditionalOptions, }; use sqlparser::dialect::GenericDialect; use super::*; + use crate::statements::create::CreateTable; + use crate::statements::sql_data_type_to_concrete_data_type; #[test] pub fn test_show_database_all() { @@ -606,4 +609,54 @@ mod tests { ]))) ) } + + fn test_timestamp_precision(sql: &str, expected_type: ConcreteDataType) { + match ParserContext::create_with_dialect(sql, &GenericDialect {}) + .unwrap() + .pop() + .unwrap() + { + Statement::CreateTable(CreateTable { columns, .. }) => { + let ts_col = columns.get(0).unwrap(); + assert_eq!( + expected_type, + sql_data_type_to_concrete_data_type(&ts_col.data_type).unwrap() + ); + } + _ => unreachable!(), + } + } + + #[test] + pub fn test_create_table_with_precision() { + test_timestamp_precision( + "create table demo (ts timestamp time index, cnt int);", + ConcreteDataType::timestamp_millisecond_datatype(), + ); + test_timestamp_precision( + "create table demo (ts timestamp(0) time index, cnt int);", + ConcreteDataType::timestamp_second_datatype(), + ); + test_timestamp_precision( + "create table demo (ts timestamp(3) time index, cnt int);", + ConcreteDataType::timestamp_millisecond_datatype(), + ); + test_timestamp_precision( + "create table demo (ts timestamp(6) time index, cnt int);", + ConcreteDataType::timestamp_microsecond_datatype(), + ); + test_timestamp_precision( + "create table demo (ts timestamp(9) time index, cnt int);", + ConcreteDataType::timestamp_nanosecond_datatype(), + ); + } + + #[test] + #[should_panic] + pub fn test_create_table_with_invalid_precision() { + test_timestamp_precision( + "create table demo (ts timestamp(1) time index, cnt int);", + ConcreteDataType::timestamp_millisecond_datatype(), + ); + } } diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index 944382daac..34a4e49e1d 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -32,6 +32,7 @@ use common_base::bytes::Bytes; use common_time::Timestamp; use datatypes::prelude::ConcreteDataType; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema}; +use datatypes::types::TimestampType; use datatypes::value::Value; use snafu::{ensure, OptionExt, ResultExt}; @@ -304,7 +305,18 @@ pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result Ok(ConcreteDataType::date_datatype()), SqlDataType::Varbinary(_) => Ok(ConcreteDataType::binary_datatype()), SqlDataType::Datetime(_) => Ok(ConcreteDataType::datetime_datatype()), - SqlDataType::Timestamp(_, _) => Ok(ConcreteDataType::timestamp_millisecond_datatype()), + SqlDataType::Timestamp(precision, _) => Ok(precision + .as_ref() + .map(|v| TimestampType::try_from(*v)) + .transpose() + .map_err(|_| { + error::SqlTypeNotSupportedSnafu { + t: data_type.clone(), + } + .build() + })? + .map(|t| ConcreteDataType::timestamp_datatype(t.unit())) + .unwrap_or(ConcreteDataType::timestamp_millisecond_datatype())), _ => error::SqlTypeNotSupportedSnafu { t: data_type.clone(), } diff --git a/tests/cases/standalone/common/timestamp/timestamp.result b/tests/cases/standalone/common/timestamp/timestamp.result new file mode 100644 index 0000000000..ebd0c6e14f --- /dev/null +++ b/tests/cases/standalone/common/timestamp/timestamp.result @@ -0,0 +1,21 @@ +CREATE TABLE timestamp_with_precision (ts TIMESTAMP(6) TIME INDEX, cnt INT); + +Affected Rows: 0 + +INSERT INTO timestamp_with_precision(ts,cnt) VALUES ('2023-04-04 08:00:00.0052+0000', 1); + +Affected Rows: 1 + +INSERT INTO timestamp_with_precision(ts,cnt) VALUES ('2023-04-04 08:00:00.0052+0800', 2); + +Affected Rows: 1 + +SELECT * FROM timestamp_with_precision ORDER BY ts ASC; + ++----------------------------+-----+ +| ts | cnt | ++----------------------------+-----+ +| 2023-04-04T00:00:00.005200 | 2 | +| 2023-04-04T08:00:00.005200 | 1 | ++----------------------------+-----+ + diff --git a/tests/cases/standalone/common/timestamp/timestamp.sql b/tests/cases/standalone/common/timestamp/timestamp.sql new file mode 100644 index 0000000000..d202867b2c --- /dev/null +++ b/tests/cases/standalone/common/timestamp/timestamp.sql @@ -0,0 +1,7 @@ +CREATE TABLE timestamp_with_precision (ts TIMESTAMP(6) TIME INDEX, cnt INT); + +INSERT INTO timestamp_with_precision(ts,cnt) VALUES ('2023-04-04 08:00:00.0052+0000', 1); + +INSERT INTO timestamp_with_precision(ts,cnt) VALUES ('2023-04-04 08:00:00.0052+0800', 2); + +SELECT * FROM timestamp_with_precision ORDER BY ts ASC;