feat: support timestamp precision on creating table (#1332)

* feat: support timestamp precision on creating table

* fix sqlness

* fix: substrait representation of different timestamp precision
This commit is contained in:
Lei, HUANG
2023-04-06 11:18:20 +08:00
committed by GitHub
parent 59f7630000
commit d10de46e03
8 changed files with 154 additions and 12 deletions

View File

@@ -20,6 +20,7 @@
use datafusion::scalar::ScalarValue;
use datatypes::prelude::ConcreteDataType;
use datatypes::types::TimestampType;
use substrait_proto::proto::expression::literal::LiteralType;
use substrait_proto::proto::r#type::{self as s_type, Kind, Nullability};
use substrait_proto::proto::{Type as SType, Type};
@@ -71,7 +72,14 @@ pub fn to_concrete_type(ty: &SType) -> Result<(ConcreteDataType, bool)> {
Kind::Binary(desc) => substrait_kind!(desc, binary_datatype),
Kind::Timestamp(desc) => substrait_kind!(
desc,
ConcreteDataType::timestamp_datatype(Default::default())
ConcreteDataType::timestamp_datatype(
TimestampType::try_from(desc.type_variation_reference as u64)
.map_err(|_| UnsupportedSubstraitTypeSnafu {
ty: format!("{kind:?}")
}
.build())?
.unit()
)
),
Kind::Date(desc) => substrait_kind!(desc, date_datatype),
Kind::Time(_)
@@ -95,7 +103,7 @@ pub fn to_concrete_type(ty: &SType) -> Result<(ConcreteDataType, bool)> {
}
macro_rules! build_substrait_kind {
($kind:ident,$s_type:ident,$nullable:ident,$variation:literal) => {{
($kind:ident,$s_type:ident,$nullable:ident,$variation:expr) => {{
let nullability = match $nullable {
Some(true) => Nullability::Nullable,
Some(false) => Nullability::Required,
@@ -129,8 +137,8 @@ pub fn from_concrete_type(ty: ConcreteDataType, nullability: Option<bool>) -> Re
ConcreteDataType::String(_) => build_substrait_kind!(String, String, nullability, 0),
ConcreteDataType::Date(_) => build_substrait_kind!(Date, Date, nullability, 0),
ConcreteDataType::DateTime(_) => UnsupportedConcreteTypeSnafu { ty }.fail()?,
ConcreteDataType::Timestamp(_) => {
build_substrait_kind!(Timestamp, Timestamp, nullability, 0)
ConcreteDataType::Timestamp(ty) => {
build_substrait_kind!(Timestamp, Timestamp, nullability, ty.precision() as u32)
}
ConcreteDataType::List(_) | ConcreteDataType::Dictionary(_) => {
UnsupportedConcreteTypeSnafu { ty }.fail()?

View File

@@ -112,6 +112,12 @@ pub enum Error {
reason: String,
backtrace: Backtrace,
},
#[snafu(display("Invalid timestamp precision: {}", precision))]
InvalidTimestampPrecision {
precision: u64,
backtrace: Backtrace,
},
}
impl ErrorExt for Error {

View File

@@ -28,6 +28,7 @@ use snafu::OptionExt;
use crate::data_type::ConcreteDataType;
use crate::error;
use crate::error::InvalidTimestampPrecisionSnafu;
use crate::prelude::{
DataType, LogicalTypeId, MutableVector, ScalarVectorBuilder, Value, ValueRef, Vector,
};
@@ -41,6 +42,11 @@ use crate::vectors::{
TimestampNanosecondVectorBuilder, TimestampSecondVector, TimestampSecondVectorBuilder,
};
const SECOND_VARIATION: u64 = 0;
const MILLISECOND_VARIATION: u64 = 3;
const MICROSECOND_VARIATION: u64 = 6;
const NANOSECOND_VARIATION: u64 = 9;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[enum_dispatch(DataType)]
pub enum TimestampType {
@@ -50,6 +56,31 @@ pub enum TimestampType {
Nanosecond(TimestampNanosecondType),
}
impl TryFrom<u64> for TimestampType {
type Error = error::Error;
/// Convert fractional timestamp precision to timestamp types. Supported precisions are:
/// - 0: second
/// - 3: millisecond
/// - 6: microsecond
/// - 9: nanosecond
fn try_from(value: u64) -> Result<Self, Self::Error> {
match value {
SECOND_VARIATION => Ok(TimestampType::Second(TimestampSecondType::default())),
MILLISECOND_VARIATION => Ok(TimestampType::Millisecond(
TimestampMillisecondType::default(),
)),
MICROSECOND_VARIATION => Ok(TimestampType::Microsecond(
TimestampMicrosecondType::default(),
)),
NANOSECOND_VARIATION => {
Ok(TimestampType::Nanosecond(TimestampNanosecondType::default()))
}
_ => InvalidTimestampPrecisionSnafu { precision: value }.fail(),
}
}
}
impl TimestampType {
/// Returns the [`TimeUnit`] of this type.
pub fn unit(&self) -> TimeUnit {
@@ -60,6 +91,15 @@ impl TimestampType {
TimestampType::Nanosecond(_) => TimeUnit::Nanosecond,
}
}
pub fn precision(&self) -> u64 {
match self {
TimestampType::Second(_) => SECOND_VARIATION,
TimestampType::Millisecond(_) => MILLISECOND_VARIATION,
TimestampType::Microsecond(_) => MICROSECOND_VARIATION,
TimestampType::Nanosecond(_) => NANOSECOND_VARIATION,
}
}
}
macro_rules! impl_data_type_for_timestamp {

View File

@@ -17,8 +17,6 @@ use std::ops::Deref;
use common_query::Output;
use common_recordbatch::{util, RecordBatch};
use common_telemetry::error;
use common_time::datetime::DateTime;
use common_time::timestamp::TimeUnit;
use datatypes::prelude::{ConcreteDataType, Value};
use datatypes::schema::{ColumnSchema, SchemaRef};
use opensrv_mysql::{
@@ -163,10 +161,7 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> {
Value::Binary(v) => row_writer.write_col(v.deref())?,
Value::Date(v) => row_writer.write_col(v.val())?,
Value::DateTime(v) => row_writer.write_col(v.val())?,
Value::Timestamp(v) => row_writer.write_col(
// safety: converting timestamp with whatever unit to second will not cause overflow
DateTime::new(v.convert_to(TimeUnit::Second).unwrap().value()).to_string(),
)?,
Value::Timestamp(v) => row_writer.write_col(v.to_iso8601_string())?,
Value::List(_) => {
return Err(Error::Internal {
err_msg: format!(
@@ -213,7 +208,7 @@ fn create_mysql_column(column_schema: &ColumnSchema) -> Result<Column> {
ConcreteDataType::Binary(_) | ConcreteDataType::String(_) => {
Ok(ColumnType::MYSQL_TYPE_VARCHAR)
}
ConcreteDataType::Timestamp(_) => Ok(ColumnType::MYSQL_TYPE_DATETIME),
ConcreteDataType::Timestamp(_) => Ok(ColumnType::MYSQL_TYPE_TIMESTAMP),
_ => error::InternalSnafu {
err_msg: format!(
"not implemented for column datatype {:?}",

View File

@@ -379,12 +379,15 @@ impl<'a> ParserContext<'a> {
mod tests {
use std::assert_matches::assert_matches;
use datatypes::prelude::ConcreteDataType;
use sqlparser::ast::{
Ident, ObjectName, Query as SpQuery, Statement as SpStatement, WildcardAdditionalOptions,
};
use sqlparser::dialect::GenericDialect;
use super::*;
use crate::statements::create::CreateTable;
use crate::statements::sql_data_type_to_concrete_data_type;
#[test]
pub fn test_show_database_all() {
@@ -606,4 +609,54 @@ mod tests {
])))
)
}
fn test_timestamp_precision(sql: &str, expected_type: ConcreteDataType) {
match ParserContext::create_with_dialect(sql, &GenericDialect {})
.unwrap()
.pop()
.unwrap()
{
Statement::CreateTable(CreateTable { columns, .. }) => {
let ts_col = columns.get(0).unwrap();
assert_eq!(
expected_type,
sql_data_type_to_concrete_data_type(&ts_col.data_type).unwrap()
);
}
_ => unreachable!(),
}
}
#[test]
pub fn test_create_table_with_precision() {
test_timestamp_precision(
"create table demo (ts timestamp time index, cnt int);",
ConcreteDataType::timestamp_millisecond_datatype(),
);
test_timestamp_precision(
"create table demo (ts timestamp(0) time index, cnt int);",
ConcreteDataType::timestamp_second_datatype(),
);
test_timestamp_precision(
"create table demo (ts timestamp(3) time index, cnt int);",
ConcreteDataType::timestamp_millisecond_datatype(),
);
test_timestamp_precision(
"create table demo (ts timestamp(6) time index, cnt int);",
ConcreteDataType::timestamp_microsecond_datatype(),
);
test_timestamp_precision(
"create table demo (ts timestamp(9) time index, cnt int);",
ConcreteDataType::timestamp_nanosecond_datatype(),
);
}
#[test]
#[should_panic]
pub fn test_create_table_with_invalid_precision() {
test_timestamp_precision(
"create table demo (ts timestamp(1) time index, cnt int);",
ConcreteDataType::timestamp_millisecond_datatype(),
);
}
}

View File

@@ -32,6 +32,7 @@ use common_base::bytes::Bytes;
use common_time::Timestamp;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema};
use datatypes::types::TimestampType;
use datatypes::value::Value;
use snafu::{ensure, OptionExt, ResultExt};
@@ -304,7 +305,18 @@ pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result<Co
SqlDataType::Date => Ok(ConcreteDataType::date_datatype()),
SqlDataType::Varbinary(_) => Ok(ConcreteDataType::binary_datatype()),
SqlDataType::Datetime(_) => Ok(ConcreteDataType::datetime_datatype()),
SqlDataType::Timestamp(_, _) => Ok(ConcreteDataType::timestamp_millisecond_datatype()),
SqlDataType::Timestamp(precision, _) => Ok(precision
.as_ref()
.map(|v| TimestampType::try_from(*v))
.transpose()
.map_err(|_| {
error::SqlTypeNotSupportedSnafu {
t: data_type.clone(),
}
.build()
})?
.map(|t| ConcreteDataType::timestamp_datatype(t.unit()))
.unwrap_or(ConcreteDataType::timestamp_millisecond_datatype())),
_ => error::SqlTypeNotSupportedSnafu {
t: data_type.clone(),
}

View File

@@ -0,0 +1,21 @@
CREATE TABLE timestamp_with_precision (ts TIMESTAMP(6) TIME INDEX, cnt INT);
Affected Rows: 0
INSERT INTO timestamp_with_precision(ts,cnt) VALUES ('2023-04-04 08:00:00.0052+0000', 1);
Affected Rows: 1
INSERT INTO timestamp_with_precision(ts,cnt) VALUES ('2023-04-04 08:00:00.0052+0800', 2);
Affected Rows: 1
SELECT * FROM timestamp_with_precision ORDER BY ts ASC;
+----------------------------+-----+
| ts | cnt |
+----------------------------+-----+
| 2023-04-04T00:00:00.005200 | 2 |
| 2023-04-04T08:00:00.005200 | 1 |
+----------------------------+-----+

View File

@@ -0,0 +1,7 @@
CREATE TABLE timestamp_with_precision (ts TIMESTAMP(6) TIME INDEX, cnt INT);
INSERT INTO timestamp_with_precision(ts,cnt) VALUES ('2023-04-04 08:00:00.0052+0000', 1);
INSERT INTO timestamp_with_precision(ts,cnt) VALUES ('2023-04-04 08:00:00.0052+0800', 2);
SELECT * FROM timestamp_with_precision ORDER BY ts ASC;