From 29739b556e6daec6776999b0ef8e93a7b026d3d8 Mon Sep 17 00:00:00 2001 From: Lin Yihai Date: Tue, 8 Jul 2025 20:08:33 +0800 Subject: [PATCH] refactor: split some convert function into `sql-common` crate (#6452) refactor: split some convert function into `sql-common` crates Signed-off-by: Yihai Lin --- Cargo.lock | 21 + Cargo.toml | 2 + src/common/sql/Cargo.toml | 22 + src/common/sql/src/convert.rs | 1084 +++++++++++++++ src/common/sql/src/default_constraint.rs | 182 +++ src/common/sql/src/error.rs | 158 +++ src/common/sql/src/lib.rs | 19 + src/operator/Cargo.toml | 1 + src/operator/src/error.rs | 8 + .../src/req_convert/insert/stmt_to_region.rs | 5 +- src/operator/src/statement/admin.rs | 4 +- src/operator/src/statement/ddl.rs | 12 +- src/servers/Cargo.toml | 1 + src/servers/src/mysql/helper.rs | 2 +- src/sql/Cargo.toml | 1 + src/sql/src/error.rs | 104 +- src/sql/src/statements.rs | 1222 +---------------- 17 files changed, 1535 insertions(+), 1313 deletions(-) create mode 100644 src/common/sql/Cargo.toml create mode 100644 src/common/sql/src/convert.rs create mode 100644 src/common/sql/src/default_constraint.rs create mode 100644 src/common/sql/src/error.rs create mode 100644 src/common/sql/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 6aef0c238f..3d853da4c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2668,6 +2668,24 @@ dependencies = [ "strum 0.27.1", ] +[[package]] +name = "common-sql" +version = "0.16.0" +dependencies = [ + "common-base", + "common-datasource", + "common-decimal", + "common-error", + "common-macro", + "common-time", + "datafusion-sql", + "datatypes", + "hex", + "jsonb", + "snafu 0.8.5", + "sqlparser 0.54.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=0cf6c04490d59435ee965edd2078e8855bd8471e)", +] + [[package]] name = "common-telemetry" version = "0.16.0" @@ -8457,6 +8475,7 @@ dependencies = [ "common-query", "common-recordbatch", "common-runtime", + "common-sql", "common-telemetry", "common-test-util", "common-time", @@ -11243,6 +11262,7 @@ dependencies = [ "common-recordbatch", "common-runtime", "common-session", + "common-sql", "common-telemetry", "common-test-util", "common-time", @@ -11681,6 +11701,7 @@ dependencies = [ "common-error", "common-macro", "common-query", + "common-sql", "common-time", "datafusion", "datafusion-common", diff --git a/Cargo.toml b/Cargo.toml index 88c2dbcc88..5ae8cc5b62 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ members = [ "src/common/recordbatch", "src/common/runtime", "src/common/session", + "src/common/sql", "src/common/stat", "src/common/substrait", "src/common/telemetry", @@ -262,6 +263,7 @@ common-query = { path = "src/common/query" } common-recordbatch = { path = "src/common/recordbatch" } common-runtime = { path = "src/common/runtime" } common-session = { path = "src/common/session" } +common-sql = { path = "src/common/sql" } common-telemetry = { path = "src/common/telemetry" } common-test-util = { path = "src/common/test-util" } common-time = { path = "src/common/time" } diff --git a/src/common/sql/Cargo.toml b/src/common/sql/Cargo.toml new file mode 100644 index 0000000000..e9596b6459 --- /dev/null +++ b/src/common/sql/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "common-sql" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +common-base.workspace = true +common-datasource.workspace = true +common-decimal.workspace = true +common-error.workspace = true +common-macro.workspace = true +common-time.workspace = true +datafusion-sql.workspace = true +datatypes.workspace = true +hex = "0.4" +jsonb.workspace = true +snafu.workspace = true +sqlparser.workspace = true + +[lints] +workspace = true diff --git a/src/common/sql/src/convert.rs b/src/common/sql/src/convert.rs new file mode 100644 index 0000000000..a21d6c9b32 --- /dev/null +++ b/src/common/sql/src/convert.rs @@ -0,0 +1,1084 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::str::FromStr; + +use common_time::timezone::Timezone; +use common_time::Timestamp; +use datatypes::prelude::ConcreteDataType; +use datatypes::types::{parse_string_to_json_type_value, parse_string_to_vector_type_value}; +use datatypes::value::{OrderedF32, OrderedF64, Value}; +use snafu::{ensure, OptionExt, ResultExt}; +pub use sqlparser::ast::{ + visit_expressions_mut, visit_statements_mut, BinaryOperator, ColumnDef, ColumnOption, + ColumnOptionDef, DataType, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArguments, + Ident, ObjectName, SqlOption, TableConstraint, TimezoneInfo, UnaryOperator, Value as SqlValue, + Visit, VisitMut, Visitor, VisitorMut, +}; + +use crate::error::{ + ColumnTypeMismatchSnafu, ConvertSqlValueSnafu, ConvertStrSnafu, DatatypeSnafu, + InvalidCastSnafu, InvalidSqlValueSnafu, InvalidUnaryOpSnafu, ParseSqlValueSnafu, Result, + TimestampOverflowSnafu, UnsupportedUnaryOpSnafu, +}; + +fn parse_sql_number(n: &str) -> Result +where + ::Err: std::fmt::Debug, +{ + match n.parse::() { + Ok(n) => Ok(n), + Err(e) => ParseSqlValueSnafu { + msg: format!("Fail to parse number {n}, {e:?}"), + } + .fail(), + } +} + +macro_rules! parse_number_to_value { + ($data_type: expr, $n: ident, $(($Type: ident, $PrimitiveType: ident, $Target: ident)), +) => { + match $data_type { + $( + ConcreteDataType::$Type(_) => { + let n = parse_sql_number::<$PrimitiveType>($n)?; + Ok(Value::$Type($Target::from(n))) + }, + )+ + ConcreteDataType::Timestamp(t) => { + let n = parse_sql_number::($n)?; + Ok(Value::Timestamp(Timestamp::new(n, t.unit()))) + }, + // TODO(QuenKar): This could need to be optimized + // if this from_str function is slow, + // we can implement parse decimal string with precision and scale manually. + ConcreteDataType::Decimal128(_) => { + if let Ok(val) = common_decimal::Decimal128::from_str($n) { + Ok(Value::Decimal128(val)) + } else { + ParseSqlValueSnafu { + msg: format!("Fail to parse number {}, invalid column type: {:?}", + $n, $data_type) + }.fail() + } + } + // It's valid for MySQL JDBC to send "0" and "1" for boolean types, so adapt to that. + ConcreteDataType::Boolean(_) => { + match $n { + "0" => Ok(Value::Boolean(false)), + "1" => Ok(Value::Boolean(true)), + _ => ParseSqlValueSnafu { + msg: format!("Failed to parse number '{}' to boolean column type", $n)}.fail(), + } + } + _ => ParseSqlValueSnafu { + msg: format!("Fail to parse number {}, invalid column type: {:?}", + $n, $data_type + )}.fail(), + } + } +} + +/// Convert a sql value into datatype's value +pub(crate) fn sql_number_to_value(data_type: &ConcreteDataType, n: &str) -> Result { + parse_number_to_value!( + data_type, + n, + (UInt8, u8, u8), + (UInt16, u16, u16), + (UInt32, u32, u32), + (UInt64, u64, u64), + (Int8, i8, i8), + (Int16, i16, i16), + (Int32, i32, i32), + (Int64, i64, i64), + (Float64, f64, OrderedF64), + (Float32, f32, OrderedF32) + ) + // TODO(hl): also Date/DateTime +} + +/// Converts SQL value to value according to the data type. +/// If `auto_string_to_numeric` is true, tries to cast the string value to numeric values, +/// and returns error if the cast fails. +pub fn sql_value_to_value( + column_name: &str, + data_type: &ConcreteDataType, + sql_val: &SqlValue, + timezone: Option<&Timezone>, + unary_op: Option, + auto_string_to_numeric: bool, +) -> Result { + let mut value = match sql_val { + SqlValue::Number(n, _) => sql_number_to_value(data_type, n)?, + SqlValue::Null => Value::Null, + SqlValue::Boolean(b) => { + ensure!( + data_type.is_boolean(), + ColumnTypeMismatchSnafu { + column_name, + expect: data_type.clone(), + actual: ConcreteDataType::boolean_datatype(), + } + ); + + (*b).into() + } + SqlValue::DoubleQuotedString(s) | SqlValue::SingleQuotedString(s) => parse_string_to_value( + column_name, + s.clone(), + data_type, + timezone, + auto_string_to_numeric, + )?, + SqlValue::HexStringLiteral(s) => { + // Should not directly write binary into json column + ensure!( + !matches!(data_type, ConcreteDataType::Json(_)), + ColumnTypeMismatchSnafu { + column_name, + expect: ConcreteDataType::binary_datatype(), + actual: ConcreteDataType::json_datatype(), + } + ); + + parse_hex_string(s)? + } + SqlValue::Placeholder(s) => return InvalidSqlValueSnafu { value: s }.fail(), + + // TODO(dennis): supports binary string + _ => { + return ConvertSqlValueSnafu { + value: sql_val.clone(), + datatype: data_type.clone(), + } + .fail() + } + }; + + if let Some(unary_op) = unary_op { + match unary_op { + UnaryOperator::Plus | UnaryOperator::Minus | UnaryOperator::Not => {} + _ => { + return UnsupportedUnaryOpSnafu { unary_op }.fail(); + } + } + + match value { + Value::Null => {} + Value::Boolean(bool) => match unary_op { + UnaryOperator::Not => value = Value::Boolean(!bool), + _ => { + return InvalidUnaryOpSnafu { unary_op, value }.fail(); + } + }, + Value::UInt8(_) + | Value::UInt16(_) + | Value::UInt32(_) + | Value::UInt64(_) + | Value::Int8(_) + | Value::Int16(_) + | Value::Int32(_) + | Value::Int64(_) + | Value::Float32(_) + | Value::Float64(_) + | Value::Decimal128(_) + | Value::Date(_) + | Value::Timestamp(_) + | Value::Time(_) + | Value::Duration(_) + | Value::IntervalYearMonth(_) + | Value::IntervalDayTime(_) + | Value::IntervalMonthDayNano(_) => match unary_op { + UnaryOperator::Plus => {} + UnaryOperator::Minus => { + value = value + .try_negative() + .with_context(|| InvalidUnaryOpSnafu { unary_op, value })?; + } + _ => return InvalidUnaryOpSnafu { unary_op, value }.fail(), + }, + + Value::String(_) | Value::Binary(_) | Value::List(_) => { + return InvalidUnaryOpSnafu { unary_op, value }.fail() + } + } + } + + if value.data_type() != *data_type { + datatypes::types::cast(value, data_type).with_context(|_| InvalidCastSnafu { + sql_value: sql_val.clone(), + datatype: data_type, + }) + } else { + Ok(value) + } +} + +pub(crate) fn parse_string_to_value( + column_name: &str, + s: String, + data_type: &ConcreteDataType, + timezone: Option<&Timezone>, + auto_string_to_numeric: bool, +) -> Result { + if auto_string_to_numeric { + if let Some(value) = auto_cast_to_numeric(&s, data_type)? { + return Ok(value); + } + } + + ensure!( + data_type.is_stringifiable(), + ColumnTypeMismatchSnafu { + column_name, + expect: data_type.clone(), + actual: ConcreteDataType::string_datatype(), + } + ); + + match data_type { + ConcreteDataType::String(_) => Ok(Value::String(s.into())), + ConcreteDataType::Date(_) => { + if let Ok(date) = common_time::date::Date::from_str(&s, timezone) { + Ok(Value::Date(date)) + } else { + ParseSqlValueSnafu { + msg: format!("Failed to parse {s} to Date value"), + } + .fail() + } + } + ConcreteDataType::Timestamp(t) => { + if let Ok(ts) = Timestamp::from_str(&s, timezone) { + Ok(Value::Timestamp(ts.convert_to(t.unit()).context( + TimestampOverflowSnafu { + timestamp: ts, + target_unit: t.unit(), + }, + )?)) + } else if let Ok(ts) = i64::from_str(s.as_str()) { + Ok(Value::Timestamp(Timestamp::new(ts, t.unit()))) + } else { + ParseSqlValueSnafu { + msg: format!("Failed to parse {s} to Timestamp value"), + } + .fail() + } + } + ConcreteDataType::Decimal128(_) => { + if let Ok(val) = common_decimal::Decimal128::from_str(&s) { + Ok(Value::Decimal128(val)) + } else { + ParseSqlValueSnafu { + msg: format!("Fail to parse number {s} to Decimal128 value"), + } + .fail() + } + } + ConcreteDataType::Binary(_) => Ok(Value::Binary(s.as_bytes().into())), + ConcreteDataType::Json(j) => { + let v = parse_string_to_json_type_value(&s, &j.format).context(DatatypeSnafu)?; + Ok(Value::Binary(v.into())) + } + ConcreteDataType::Vector(d) => { + let v = parse_string_to_vector_type_value(&s, Some(d.dim)).context(DatatypeSnafu)?; + Ok(Value::Binary(v.into())) + } + _ => ParseSqlValueSnafu { + msg: format!("Failed to parse {s} to {data_type} value"), + } + .fail(), + } +} + +/// Casts string to value of specified numeric data type. +/// If the string cannot be parsed, returns an error. +/// +/// Returns None if the data type doesn't support auto casting. +pub(crate) fn auto_cast_to_numeric(s: &str, data_type: &ConcreteDataType) -> Result> { + let value = match data_type { + ConcreteDataType::Boolean(_) => s.parse::().map(Value::Boolean).ok(), + ConcreteDataType::Int8(_) => s.parse::().map(Value::Int8).ok(), + ConcreteDataType::Int16(_) => s.parse::().map(Value::Int16).ok(), + ConcreteDataType::Int32(_) => s.parse::().map(Value::Int32).ok(), + ConcreteDataType::Int64(_) => s.parse::().map(Value::Int64).ok(), + ConcreteDataType::UInt8(_) => s.parse::().map(Value::UInt8).ok(), + ConcreteDataType::UInt16(_) => s.parse::().map(Value::UInt16).ok(), + ConcreteDataType::UInt32(_) => s.parse::().map(Value::UInt32).ok(), + ConcreteDataType::UInt64(_) => s.parse::().map(Value::UInt64).ok(), + ConcreteDataType::Float32(_) => s + .parse::() + .map(|v| Value::Float32(OrderedF32::from(v))) + .ok(), + ConcreteDataType::Float64(_) => s + .parse::() + .map(|v| Value::Float64(OrderedF64::from(v))) + .ok(), + _ => return Ok(None), + }; + + match value { + Some(value) => Ok(Some(value)), + None => ConvertStrSnafu { + value: s, + datatype: data_type.clone(), + } + .fail(), + } +} + +pub(crate) fn parse_hex_string(s: &str) -> Result { + match hex::decode(s) { + Ok(b) => Ok(Value::Binary(common_base::bytes::Bytes::from(b))), + Err(hex::FromHexError::InvalidHexCharacter { c, index }) => ParseSqlValueSnafu { + msg: format!( + "Fail to parse hex string to Byte: invalid character {c:?} at position {index}" + ), + } + .fail(), + Err(hex::FromHexError::OddLength) => ParseSqlValueSnafu { + msg: "Fail to parse hex string to Byte: odd number of digits".to_string(), + } + .fail(), + Err(e) => ParseSqlValueSnafu { + msg: format!("Fail to parse hex string to Byte {s}, {e:?}"), + } + .fail(), + } +} + +#[cfg(test)] +mod test { + use common_base::bytes::Bytes; + use common_time::timestamp::TimeUnit; + use datatypes::value::OrderedFloat; + + use super::*; + + #[test] + fn test_string_to_value_auto_numeric() { + // Test string to boolean with auto cast + let result = parse_string_to_value( + "col", + "true".to_string(), + &ConcreteDataType::boolean_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::Boolean(true), result); + + // Test invalid string to boolean with auto cast + let result = parse_string_to_value( + "col", + "not_a_boolean".to_string(), + &ConcreteDataType::boolean_datatype(), + None, + true, + ); + assert!(result.is_err()); + + // Test string to int8 + let result = parse_string_to_value( + "col", + "42".to_string(), + &ConcreteDataType::int8_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::Int8(42), result); + + // Test invalid string to int8 with auto cast + let result = parse_string_to_value( + "col", + "not_an_int8".to_string(), + &ConcreteDataType::int8_datatype(), + None, + true, + ); + assert!(result.is_err()); + + // Test string to int16 + let result = parse_string_to_value( + "col", + "1000".to_string(), + &ConcreteDataType::int16_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::Int16(1000), result); + + // Test invalid string to int16 with auto cast + let result = parse_string_to_value( + "col", + "not_an_int16".to_string(), + &ConcreteDataType::int16_datatype(), + None, + true, + ); + assert!(result.is_err()); + + // Test string to int32 + let result = parse_string_to_value( + "col", + "100000".to_string(), + &ConcreteDataType::int32_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::Int32(100000), result); + + // Test invalid string to int32 with auto cast + let result = parse_string_to_value( + "col", + "not_an_int32".to_string(), + &ConcreteDataType::int32_datatype(), + None, + true, + ); + assert!(result.is_err()); + + // Test string to int64 + let result = parse_string_to_value( + "col", + "1000000".to_string(), + &ConcreteDataType::int64_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::Int64(1000000), result); + + // Test invalid string to int64 with auto cast + let result = parse_string_to_value( + "col", + "not_an_int64".to_string(), + &ConcreteDataType::int64_datatype(), + None, + true, + ); + assert!(result.is_err()); + + // Test string to uint8 + let result = parse_string_to_value( + "col", + "200".to_string(), + &ConcreteDataType::uint8_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::UInt8(200), result); + + // Test invalid string to uint8 with auto cast + let result = parse_string_to_value( + "col", + "not_a_uint8".to_string(), + &ConcreteDataType::uint8_datatype(), + None, + true, + ); + assert!(result.is_err()); + + // Test string to uint16 + let result = parse_string_to_value( + "col", + "60000".to_string(), + &ConcreteDataType::uint16_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::UInt16(60000), result); + + // Test invalid string to uint16 with auto cast + let result = parse_string_to_value( + "col", + "not_a_uint16".to_string(), + &ConcreteDataType::uint16_datatype(), + None, + true, + ); + assert!(result.is_err()); + + // Test string to uint32 + let result = parse_string_to_value( + "col", + "4000000000".to_string(), + &ConcreteDataType::uint32_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::UInt32(4000000000), result); + + // Test invalid string to uint32 with auto cast + let result = parse_string_to_value( + "col", + "not_a_uint32".to_string(), + &ConcreteDataType::uint32_datatype(), + None, + true, + ); + assert!(result.is_err()); + + // Test string to uint64 + let result = parse_string_to_value( + "col", + "18446744073709551615".to_string(), + &ConcreteDataType::uint64_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::UInt64(18446744073709551615), result); + + // Test invalid string to uint64 with auto cast + let result = parse_string_to_value( + "col", + "not_a_uint64".to_string(), + &ConcreteDataType::uint64_datatype(), + None, + true, + ); + assert!(result.is_err()); + + // Test string to float32 + let result = parse_string_to_value( + "col", + "3.5".to_string(), + &ConcreteDataType::float32_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::Float32(OrderedF32::from(3.5)), result); + + // Test invalid string to float32 with auto cast + let result = parse_string_to_value( + "col", + "not_a_float32".to_string(), + &ConcreteDataType::float32_datatype(), + None, + true, + ); + assert!(result.is_err()); + + // Test string to float64 + let result = parse_string_to_value( + "col", + "3.5".to_string(), + &ConcreteDataType::float64_datatype(), + None, + true, + ) + .unwrap(); + assert_eq!(Value::Float64(OrderedF64::from(3.5)), result); + + // Test invalid string to float64 with auto cast + let result = parse_string_to_value( + "col", + "not_a_float64".to_string(), + &ConcreteDataType::float64_datatype(), + None, + true, + ); + assert!(result.is_err()); + } + + #[test] + fn test_sql_value_to_value() { + let sql_val = SqlValue::Null; + assert_eq!( + Value::Null, + sql_value_to_value( + "a", + &ConcreteDataType::float64_datatype(), + &sql_val, + None, + None, + false + ) + .unwrap() + ); + + let sql_val = SqlValue::Boolean(true); + assert_eq!( + Value::Boolean(true), + sql_value_to_value( + "a", + &ConcreteDataType::boolean_datatype(), + &sql_val, + None, + None, + false + ) + .unwrap() + ); + + let sql_val = SqlValue::Number("3.0".to_string(), false); + assert_eq!( + Value::Float64(OrderedFloat(3.0)), + sql_value_to_value( + "a", + &ConcreteDataType::float64_datatype(), + &sql_val, + None, + None, + false + ) + .unwrap() + ); + + let sql_val = SqlValue::Number("3.0".to_string(), false); + let v = sql_value_to_value( + "a", + &ConcreteDataType::boolean_datatype(), + &sql_val, + None, + None, + false, + ); + assert!(v.is_err()); + assert!(format!("{v:?}").contains("Failed to parse number '3.0' to boolean column type")); + + let sql_val = SqlValue::Boolean(true); + let v = sql_value_to_value( + "a", + &ConcreteDataType::float64_datatype(), + &sql_val, + None, + None, + false, + ); + assert!(v.is_err()); + assert!( + format!("{v:?}").contains( + "Column a expect type: Float64(Float64Type), actual: Boolean(BooleanType)" + ), + "v is {v:?}", + ); + + let sql_val = SqlValue::HexStringLiteral("48656c6c6f20776f726c6421".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::binary_datatype(), + &sql_val, + None, + None, + false, + ) + .unwrap(); + assert_eq!(Value::Binary(Bytes::from(b"Hello world!".as_slice())), v); + + let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::binary_datatype(), + &sql_val, + None, + None, + false, + ) + .unwrap(); + assert_eq!( + Value::Binary(Bytes::from(b"MorningMyFriends".as_slice())), + v + ); + + let sql_val = SqlValue::HexStringLiteral("9AF".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::binary_datatype(), + &sql_val, + None, + None, + false, + ); + assert!(v.is_err()); + assert!( + format!("{v:?}").contains("odd number of digits"), + "v is {v:?}" + ); + + let sql_val = SqlValue::HexStringLiteral("AG".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::binary_datatype(), + &sql_val, + None, + None, + false, + ); + assert!(v.is_err()); + assert!(format!("{v:?}").contains("invalid character"), "v is {v:?}",); + + let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::json_datatype(), + &sql_val, + None, + None, + false, + ); + assert!(v.is_err()); + + let sql_val = SqlValue::DoubleQuotedString(r#"{"a":"b"}"#.to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::json_datatype(), + &sql_val, + None, + None, + false, + ) + .unwrap(); + assert_eq!( + Value::Binary(Bytes::from( + jsonb::parse_value(r#"{"a":"b"}"#.as_bytes()) + .unwrap() + .to_vec() + .as_slice() + )), + v + ); + } + + #[test] + fn test_parse_json_to_jsonb() { + match parse_string_to_value( + "json_col", + r#"{"a": "b"}"#.to_string(), + &ConcreteDataType::json_datatype(), + None, + false, + ) { + Ok(Value::Binary(b)) => { + assert_eq!( + b, + jsonb::parse_value(r#"{"a": "b"}"#.as_bytes()) + .unwrap() + .to_vec() + ); + } + _ => { + unreachable!() + } + } + + assert!(parse_string_to_value( + "json_col", + r#"Nicola Kovac is the best rifler in the world"#.to_string(), + &ConcreteDataType::json_datatype(), + None, + false, + ) + .is_err()) + } + + #[test] + fn test_sql_number_to_value() { + let v = sql_number_to_value(&ConcreteDataType::float64_datatype(), "3.0").unwrap(); + assert_eq!(Value::Float64(OrderedFloat(3.0)), v); + + let v = sql_number_to_value(&ConcreteDataType::int32_datatype(), "999").unwrap(); + assert_eq!(Value::Int32(999), v); + + let v = sql_number_to_value( + &ConcreteDataType::timestamp_nanosecond_datatype(), + "1073741821", + ) + .unwrap(); + assert_eq!(Value::Timestamp(Timestamp::new_nanosecond(1073741821)), v); + + let v = sql_number_to_value( + &ConcreteDataType::timestamp_millisecond_datatype(), + "999999", + ) + .unwrap(); + assert_eq!(Value::Timestamp(Timestamp::new_millisecond(999999)), v); + + let v = sql_number_to_value(&ConcreteDataType::string_datatype(), "999"); + assert!(v.is_err(), "parse value error is: {v:?}"); + + let v = sql_number_to_value(&ConcreteDataType::boolean_datatype(), "0").unwrap(); + assert_eq!(v, Value::Boolean(false)); + let v = sql_number_to_value(&ConcreteDataType::boolean_datatype(), "1").unwrap(); + assert_eq!(v, Value::Boolean(true)); + assert!(sql_number_to_value(&ConcreteDataType::boolean_datatype(), "2").is_err()); + } + + #[test] + fn test_parse_date_literal() { + let value = sql_value_to_value( + "date", + &ConcreteDataType::date_datatype(), + &SqlValue::DoubleQuotedString("2022-02-22".to_string()), + None, + None, + false, + ) + .unwrap(); + assert_eq!(ConcreteDataType::date_datatype(), value.data_type()); + if let Value::Date(d) = value { + assert_eq!("2022-02-22", d.to_string()); + } else { + unreachable!() + } + + // with timezone + let value = sql_value_to_value( + "date", + &ConcreteDataType::date_datatype(), + &SqlValue::DoubleQuotedString("2022-02-22".to_string()), + Some(&Timezone::from_tz_string("+07:00").unwrap()), + None, + false, + ) + .unwrap(); + assert_eq!(ConcreteDataType::date_datatype(), value.data_type()); + if let Value::Date(d) = value { + assert_eq!("2022-02-21", d.to_string()); + } else { + unreachable!() + } + } + + #[test] + fn test_parse_timestamp_literal() { + match parse_string_to_value( + "timestamp_col", + "2022-02-22T00:01:01+08:00".to_string(), + &ConcreteDataType::timestamp_millisecond_datatype(), + None, + false, + ) + .unwrap() + { + Value::Timestamp(ts) => { + assert_eq!(1645459261000, ts.value()); + assert_eq!(TimeUnit::Millisecond, ts.unit()); + } + _ => { + unreachable!() + } + } + + match parse_string_to_value( + "timestamp_col", + "2022-02-22T00:01:01+08:00".to_string(), + &ConcreteDataType::timestamp_datatype(TimeUnit::Second), + None, + false, + ) + .unwrap() + { + Value::Timestamp(ts) => { + assert_eq!(1645459261, ts.value()); + assert_eq!(TimeUnit::Second, ts.unit()); + } + _ => { + unreachable!() + } + } + + match parse_string_to_value( + "timestamp_col", + "2022-02-22T00:01:01+08:00".to_string(), + &ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond), + None, + false, + ) + .unwrap() + { + Value::Timestamp(ts) => { + assert_eq!(1645459261000000, ts.value()); + assert_eq!(TimeUnit::Microsecond, ts.unit()); + } + _ => { + unreachable!() + } + } + + match parse_string_to_value( + "timestamp_col", + "2022-02-22T00:01:01+08:00".to_string(), + &ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond), + None, + false, + ) + .unwrap() + { + Value::Timestamp(ts) => { + assert_eq!(1645459261000000000, ts.value()); + assert_eq!(TimeUnit::Nanosecond, ts.unit()); + } + _ => { + unreachable!() + } + } + + assert!(parse_string_to_value( + "timestamp_col", + "2022-02-22T00:01:01+08".to_string(), + &ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond), + None, + false, + ) + .is_err()); + + // with timezone + match parse_string_to_value( + "timestamp_col", + "2022-02-22T00:01:01".to_string(), + &ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond), + Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap()), + false, + ) + .unwrap() + { + Value::Timestamp(ts) => { + assert_eq!(1645459261000000000, ts.value()); + assert_eq!("2022-02-21 16:01:01+0000", ts.to_iso8601_string()); + assert_eq!(TimeUnit::Nanosecond, ts.unit()); + } + _ => { + unreachable!() + } + } + } + + #[test] + fn test_parse_placeholder_value() { + assert!(sql_value_to_value( + "test", + &ConcreteDataType::string_datatype(), + &SqlValue::Placeholder("default".into()), + None, + None, + false + ) + .is_err()); + assert!(sql_value_to_value( + "test", + &ConcreteDataType::string_datatype(), + &SqlValue::Placeholder("default".into()), + None, + Some(UnaryOperator::Minus), + false + ) + .is_err()); + assert!(sql_value_to_value( + "test", + &ConcreteDataType::uint16_datatype(), + &SqlValue::Number("3".into(), false), + None, + Some(UnaryOperator::Minus), + false + ) + .is_err()); + assert!(sql_value_to_value( + "test", + &ConcreteDataType::uint16_datatype(), + &SqlValue::Number("3".into(), false), + None, + None, + false + ) + .is_ok()); + } + + #[test] + fn test_auto_string_to_numeric() { + // Test with auto_string_to_numeric=true + let sql_val = SqlValue::SingleQuotedString("123".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::int32_datatype(), + &sql_val, + None, + None, + true, + ) + .unwrap(); + assert_eq!(Value::Int32(123), v); + + // Test with a float string + let sql_val = SqlValue::SingleQuotedString("3.5".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::float64_datatype(), + &sql_val, + None, + None, + true, + ) + .unwrap(); + assert_eq!(Value::Float64(OrderedFloat(3.5)), v); + + // Test with auto_string_to_numeric=false + let sql_val = SqlValue::SingleQuotedString("123".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::int32_datatype(), + &sql_val, + None, + None, + false, + ); + assert!(v.is_err()); + + // Test with an invalid numeric string but auto_string_to_numeric=true + // Should return an error now with the new auto_cast_to_numeric behavior + let sql_val = SqlValue::SingleQuotedString("not_a_number".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::int32_datatype(), + &sql_val, + None, + None, + true, + ); + assert!(v.is_err()); + + // Test with boolean type + let sql_val = SqlValue::SingleQuotedString("true".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::boolean_datatype(), + &sql_val, + None, + None, + true, + ) + .unwrap(); + assert_eq!(Value::Boolean(true), v); + + // Non-numeric types should still be handled normally + let sql_val = SqlValue::SingleQuotedString("hello".to_string()); + let v = sql_value_to_value( + "a", + &ConcreteDataType::string_datatype(), + &sql_val, + None, + None, + true, + ); + assert!(v.is_ok()); + } +} diff --git a/src/common/sql/src/default_constraint.rs b/src/common/sql/src/default_constraint.rs new file mode 100644 index 0000000000..ad81b2f160 --- /dev/null +++ b/src/common/sql/src/default_constraint.rs @@ -0,0 +1,182 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use common_time::timezone::Timezone; +use datatypes::prelude::ConcreteDataType; +use datatypes::schema::constraint::{CURRENT_TIMESTAMP, CURRENT_TIMESTAMP_FN}; +use datatypes::schema::ColumnDefaultConstraint; +pub use sqlparser::ast::{ + visit_expressions_mut, visit_statements_mut, BinaryOperator, ColumnDef, ColumnOption, + ColumnOptionDef, DataType, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArguments, + Ident, ObjectName, SqlOption, TableConstraint, TimezoneInfo, UnaryOperator, Value as SqlValue, + Visit, VisitMut, Visitor, VisitorMut, +}; + +use crate::convert::{sql_number_to_value, sql_value_to_value}; +use crate::error::{Result, UnsupportedDefaultValueSnafu}; + +pub fn parse_column_default_constraint( + column_name: &str, + data_type: &ConcreteDataType, + opts: &[ColumnOptionDef], + timezone: Option<&Timezone>, +) -> Result> { + if let Some(opt) = opts + .iter() + .find(|o| matches!(o.option, ColumnOption::Default(_))) + { + let default_constraint = match &opt.option { + ColumnOption::Default(Expr::Value(v)) => ColumnDefaultConstraint::Value( + sql_value_to_value(column_name, data_type, v, timezone, None, false)?, + ), + ColumnOption::Default(Expr::Function(func)) => { + let mut func = format!("{func}").to_lowercase(); + // normalize CURRENT_TIMESTAMP to CURRENT_TIMESTAMP() + if func == CURRENT_TIMESTAMP { + func = CURRENT_TIMESTAMP_FN.to_string(); + } + // Always use lowercase for function expression + ColumnDefaultConstraint::Function(func.to_lowercase()) + } + + ColumnOption::Default(Expr::UnaryOp { op, expr }) => { + // Specialized process for handling numerical inputs to prevent + // overflow errors during the parsing of negative numbers, + // See https://github.com/GreptimeTeam/greptimedb/issues/4351 + if let (UnaryOperator::Minus, Expr::Value(SqlValue::Number(n, _))) = + (op, expr.as_ref()) + { + return Ok(Some(ColumnDefaultConstraint::Value(sql_number_to_value( + data_type, + &format!("-{n}"), + )?))); + } + + if let Expr::Value(v) = &**expr { + let value = + sql_value_to_value(column_name, data_type, v, timezone, Some(*op), false)?; + ColumnDefaultConstraint::Value(value) + } else { + return UnsupportedDefaultValueSnafu { + column_name, + expr: *expr.clone(), + } + .fail(); + } + } + ColumnOption::Default(others) => { + return UnsupportedDefaultValueSnafu { + column_name, + expr: others.clone(), + } + .fail(); + } + _ => { + return UnsupportedDefaultValueSnafu { + column_name, + expr: Expr::Value(SqlValue::Null), + } + .fail(); + } + }; + + Ok(Some(default_constraint)) + } else { + Ok(None) + } +} + +#[cfg(test)] +mod test { + use std::assert_matches::assert_matches; + + use datatypes::prelude::{ConcreteDataType, Value}; + use datatypes::types::BooleanType; + + use super::*; + + #[test] + pub fn test_parse_column_default_constraint() { + let bool_value = sqlparser::ast::Value::Boolean(true); + + let opts = vec![ + ColumnOptionDef { + name: None, + option: ColumnOption::Default(Expr::Value(bool_value)), + }, + ColumnOptionDef { + name: None, + option: ColumnOption::NotNull, + }, + ]; + + let constraint = parse_column_default_constraint( + "coll", + &ConcreteDataType::Boolean(BooleanType), + &opts, + None, + ) + .unwrap(); + + assert_matches!( + constraint, + Some(ColumnDefaultConstraint::Value(Value::Boolean(true))) + ); + + // Test negative number + let opts = vec![ColumnOptionDef { + name: None, + option: ColumnOption::Default(Expr::UnaryOp { + op: UnaryOperator::Minus, + expr: Box::new(Expr::Value(SqlValue::Number("32768".to_string(), false))), + }), + }]; + + let constraint = parse_column_default_constraint( + "coll", + &ConcreteDataType::int16_datatype(), + &opts, + None, + ) + .unwrap(); + + assert_matches!( + constraint, + Some(ColumnDefaultConstraint::Value(Value::Int16(-32768))) + ); + } + + #[test] + fn test_incorrect_default_value_issue_3479() { + let opts = vec![ColumnOptionDef { + name: None, + option: ColumnOption::Default(Expr::Value(SqlValue::Number( + "0.047318541668048164".into(), + false, + ))), + }]; + let constraint = parse_column_default_constraint( + "coll", + &ConcreteDataType::float64_datatype(), + &opts, + None, + ) + .unwrap() + .unwrap(); + assert_eq!("0.047318541668048164", constraint.to_string()); + let encoded: Vec = constraint.clone().try_into().unwrap(); + let decoded = ColumnDefaultConstraint::try_from(encoded.as_ref()).unwrap(); + assert_eq!(decoded, constraint); + } +} diff --git a/src/common/sql/src/error.rs b/src/common/sql/src/error.rs new file mode 100644 index 0000000000..2b2ab8a270 --- /dev/null +++ b/src/common/sql/src/error.rs @@ -0,0 +1,158 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; + +use common_error::ext::ErrorExt; +use common_error::status_code::StatusCode; +use common_macro::stack_trace_debug; +use common_time::timestamp::TimeUnit; +use common_time::Timestamp; +use datafusion_sql::sqlparser::ast::UnaryOperator; +use datatypes::prelude::{ConcreteDataType, Value}; +use snafu::{Location, Snafu}; +pub use sqlparser::ast::{Expr, Value as SqlValue}; + +pub type Result = std::result::Result; + +/// SQL parser errors. +// Now the error in parser does not contain backtrace to avoid generating backtrace +// every time the parser parses an invalid SQL. +#[derive(Snafu)] +#[snafu(visibility(pub))] +#[stack_trace_debug] +pub enum Error { + #[snafu(display( + "Column {} expect type: {:?}, actual: {:?}", + column_name, + expect, + actual, + ))] + ColumnTypeMismatch { + column_name: String, + expect: ConcreteDataType, + actual: ConcreteDataType, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to parse value: {}", msg))] + ParseSqlValue { + msg: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display( + "Unsupported expr in default constraint: {:?} for column: {}", + expr, + column_name + ))] + UnsupportedDefaultValue { + column_name: String, + expr: Expr, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Unable to convert sql value {} to datatype {:?}", value, datatype))] + ConvertSqlValue { + value: SqlValue, + datatype: ConcreteDataType, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Invalid sql value: {}", value))] + InvalidSqlValue { + value: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Unsupported unary operator {}", unary_op))] + UnsupportedUnaryOp { + unary_op: UnaryOperator, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Invalid unary operator {} for value {}", unary_op, value))] + InvalidUnaryOp { + unary_op: UnaryOperator, + value: Value, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to cast SQL value {} to datatype {}", sql_value, datatype))] + InvalidCast { + sql_value: sqlparser::ast::Value, + datatype: ConcreteDataType, + #[snafu(implicit)] + location: Location, + source: datatypes::error::Error, + }, + + #[snafu(display("Unable to convert {} to datatype {:?}", value, datatype))] + ConvertStr { + value: String, + datatype: ConcreteDataType, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display( + "Converting timestamp {:?} to unit {:?} overflow", + timestamp, + target_unit + ))] + TimestampOverflow { + timestamp: Timestamp, + target_unit: TimeUnit, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Datatype error: {}", source))] + Datatype { + source: datatypes::error::Error, + #[snafu(implicit)] + location: Location, + }, +} + +impl ErrorExt for Error { + fn status_code(&self) -> StatusCode { + use Error::*; + + match self { + UnsupportedDefaultValue { .. } => StatusCode::Unsupported, + ParseSqlValue { .. } => StatusCode::InvalidSyntax, + ColumnTypeMismatch { .. } + | InvalidSqlValue { .. } + | UnsupportedUnaryOp { .. } + | InvalidUnaryOp { .. } + | InvalidCast { .. } + | ConvertStr { .. } + | TimestampOverflow { .. } => StatusCode::InvalidArguments, + Datatype { source, .. } => source.status_code(), + ConvertSqlValue { .. } => StatusCode::Unsupported, + } + } + + fn as_any(&self) -> &dyn Any { + self + } +} diff --git a/src/common/sql/src/lib.rs b/src/common/sql/src/lib.rs new file mode 100644 index 0000000000..abca883124 --- /dev/null +++ b/src/common/sql/src/lib.rs @@ -0,0 +1,19 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![feature(assert_matches)] + +pub mod convert; +pub mod default_constraint; +pub mod error; diff --git a/src/operator/Cargo.toml b/src/operator/Cargo.toml index 8443213d85..87e5918221 100644 --- a/src/operator/Cargo.toml +++ b/src/operator/Cargo.toml @@ -35,6 +35,7 @@ common-meta.workspace = true common-query.workspace = true common-recordbatch.workspace = true common-runtime.workspace = true +common-sql.workspace = true common-telemetry.workspace = true common-time.workspace = true datafusion.workspace = true diff --git a/src/operator/src/error.rs b/src/operator/src/error.rs index f929c91629..b023530b7b 100644 --- a/src/operator/src/error.rs +++ b/src/operator/src/error.rs @@ -844,6 +844,13 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Sql common error"))] + SqlCommon { + source: common_sql::error::Error, + #[snafu(implicit)] + location: Location, + }, } pub type Result = std::result::Result; @@ -972,6 +979,7 @@ impl ErrorExt for Error { Error::InvalidProcessId { .. } => StatusCode::InvalidArguments, Error::ProcessManagerMissing { .. } => StatusCode::Unexpected, Error::PathNotFound { .. } => StatusCode::InvalidArguments, + Error::SqlCommon { source, .. } => source.status_code(), } } diff --git a/src/operator/src/req_convert/insert/stmt_to_region.rs b/src/operator/src/req_convert/insert/stmt_to_region.rs index 5f6649b4e4..43b0689985 100644 --- a/src/operator/src/req_convert/insert/stmt_to_region.rs +++ b/src/operator/src/req_convert/insert/stmt_to_region.rs @@ -22,7 +22,6 @@ use datatypes::schema::{ColumnSchema, SchemaRef}; use partition::manager::PartitionRuleManager; use session::context::{QueryContext, QueryContextRef}; use snafu::{ensure, OptionExt, ResultExt}; -use sql::statements; use sql::statements::insert::Insert; use sqlparser::ast::{ObjectName, Value as SqlValue}; use table::metadata::TableInfoRef; @@ -227,7 +226,7 @@ fn sql_value_to_grpc_value( column: column.clone(), })? } else { - statements::sql_value_to_value( + common_sql::convert::sql_value_to_value( column, &column_schema.data_type, sql_val, @@ -235,7 +234,7 @@ fn sql_value_to_grpc_value( None, auto_string_to_numeric, ) - .context(ParseSqlSnafu)? + .context(crate::error::SqlCommonSnafu)? }; let grpc_value = value_to_grpc_value(value); diff --git a/src/operator/src/statement/admin.rs b/src/operator/src/statement/admin.rs index a956b5f7fc..a36538ff17 100644 --- a/src/operator/src/statement/admin.rs +++ b/src/operator/src/statement/admin.rs @@ -19,6 +19,7 @@ use common_function::function_registry::FUNCTION_REGISTRY; use common_query::prelude::TypeSignature; use common_query::Output; use common_recordbatch::{RecordBatch, RecordBatches}; +use common_sql::convert::sql_value_to_value; use common_telemetry::tracing; use common_time::Timezone; use datatypes::data_type::DataType; @@ -30,7 +31,6 @@ use session::context::QueryContextRef; use snafu::{ensure, OptionExt, ResultExt}; use sql::ast::{Expr, FunctionArg, FunctionArgExpr, FunctionArguments, Value as SqlValue}; use sql::statements::admin::Admin; -use sql::statements::sql_value_to_value; use crate::error::{self, Result}; use crate::statement::StatementExecutor; @@ -186,7 +186,7 @@ fn values_to_vectors_by_exact_types( .zip(exact_types.iter()) .map(|(value, data_type)| { let value = sql_value_to_value(DUMMY_COLUMN, data_type, value, tz, None, false) - .context(error::ParseSqlValueSnafu)?; + .context(error::SqlCommonSnafu)?; Ok(value_to_vector(value)) }) diff --git a/src/operator/src/statement/ddl.rs b/src/operator/src/statement/ddl.rs index e361de4197..016619c23e 100644 --- a/src/operator/src/statement/ddl.rs +++ b/src/operator/src/statement/ddl.rs @@ -45,6 +45,7 @@ use common_meta::rpc::ddl::{ }; use common_meta::rpc::router::{Partition, Partition as MetaPartition}; use common_query::Output; +use common_sql::convert::sql_value_to_value; use common_telemetry::{debug, info, tracing, warn}; use common_time::Timezone; use datafusion_common::tree_node::TreeNodeVisitor; @@ -71,7 +72,6 @@ use sql::statements::create::trigger::CreateTrigger; use sql::statements::create::{ CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, Partitions, }; -use sql::statements::sql_value_to_value; use sql::statements::statement::Statement; use sqlparser::ast::{Expr, Ident, UnaryOperator, Value as ParserValue}; use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_NAME}; @@ -87,10 +87,10 @@ use crate::error::{ ColumnNotFoundSnafu, ConvertSchemaSnafu, CreateLogicalTablesSnafu, CreateTableInfoSnafu, DeserializePartitionSnafu, EmptyDdlExprSnafu, ExternalSnafu, ExtractTableNamesSnafu, FlowNotFoundSnafu, InvalidPartitionRuleSnafu, InvalidPartitionSnafu, InvalidSqlSnafu, - InvalidTableNameSnafu, InvalidViewNameSnafu, InvalidViewStmtSnafu, ParseSqlValueSnafu, Result, - SchemaInUseSnafu, SchemaNotFoundSnafu, SchemaReadOnlySnafu, SubstraitCodecSnafu, - TableAlreadyExistsSnafu, TableMetadataManagerSnafu, TableNotFoundSnafu, - UnrecognizedTableOptionSnafu, ViewAlreadyExistsSnafu, + InvalidTableNameSnafu, InvalidViewNameSnafu, InvalidViewStmtSnafu, Result, SchemaInUseSnafu, + SchemaNotFoundSnafu, SchemaReadOnlySnafu, SubstraitCodecSnafu, TableAlreadyExistsSnafu, + TableMetadataManagerSnafu, TableNotFoundSnafu, UnrecognizedTableOptionSnafu, + ViewAlreadyExistsSnafu, }; use crate::expr_helper; use crate::statement::show::create_partitions_stmt; @@ -1859,7 +1859,7 @@ fn convert_value( unary_op, false, ) - .context(ParseSqlValueSnafu) + .context(error::SqlCommonSnafu) } #[cfg(test)] diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index bc64e19485..ab0bf0bddb 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -48,6 +48,7 @@ common-query.workspace = true common-recordbatch.workspace = true common-runtime.workspace = true common-session.workspace = true +common-sql.workspace = true common-telemetry.workspace = true common-time.workspace = true common-version = { workspace = true, features = ["codec"] } diff --git a/src/servers/src/mysql/helper.rs b/src/servers/src/mysql/helper.rs index a2e05d1c06..a418a4ef35 100644 --- a/src/servers/src/mysql/helper.rs +++ b/src/servers/src/mysql/helper.rs @@ -17,6 +17,7 @@ use std::time::Duration; use chrono::NaiveDate; use common_query::prelude::ScalarValue; +use common_sql::convert::sql_value_to_value; use common_time::Timestamp; use datafusion_common::tree_node::{Transformed, TreeNode}; use datafusion_expr::LogicalPlan; @@ -27,7 +28,6 @@ use itertools::Itertools; use opensrv_mysql::{to_naive_datetime, ParamValue, ValueInner}; use snafu::ResultExt; use sql::ast::{visit_expressions_mut, Expr, Value as ValueExpr, VisitMut}; -use sql::statements::sql_value_to_value; use sql::statements::statement::Statement; use crate::error::{self, DataFusionSnafu, Result}; diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml index 0cd54aba31..b50fbab8db 100644 --- a/src/sql/Cargo.toml +++ b/src/sql/Cargo.toml @@ -19,6 +19,7 @@ common-decimal.workspace = true common-error.workspace = true common-macro.workspace = true common-query.workspace = true +common-sql.workspace = true common-time.workspace = true datafusion.workspace = true datafusion-common.workspace = true diff --git a/src/sql/src/error.rs b/src/sql/src/error.rs index 6efc14dff7..b38b1cbf78 100644 --- a/src/sql/src/error.rs +++ b/src/sql/src/error.rs @@ -17,16 +17,13 @@ use std::any::Any; use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; -use common_time::timestamp::TimeUnit; -use common_time::Timestamp; use datafusion_common::DataFusionError; -use datafusion_sql::sqlparser::ast::UnaryOperator; use datatypes::prelude::{ConcreteDataType, Value}; use snafu::{Location, Snafu}; use sqlparser::ast::Ident; use sqlparser::parser::ParserError; -use crate::ast::{Expr, Value as SqlValue}; +use crate::ast::Expr; use crate::parsers::error::TQLError; pub type Result = std::result::Result; @@ -59,18 +56,6 @@ pub enum Error { location: Location, }, - #[snafu(display( - "Unsupported expr in default constraint: {:?} for column: {}", - expr, - column_name - ))] - UnsupportedDefaultValue { - column_name: String, - expr: Expr, - #[snafu(implicit)] - location: Location, - }, - // Syntax error from sql parser. #[snafu(display("Invalid SQL syntax"))] Syntax { @@ -218,30 +203,6 @@ pub enum Error { source: datatypes::error::Error, }, - #[snafu(display("Failed to cast SQL value {} to datatype {}", sql_value, datatype))] - InvalidCast { - sql_value: sqlparser::ast::Value, - datatype: ConcreteDataType, - #[snafu(implicit)] - location: Location, - source: datatypes::error::Error, - }, - - #[snafu(display("Invalid unary operator {} for value {}", unary_op, value))] - InvalidUnaryOp { - unary_op: UnaryOperator, - value: Value, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Unsupported unary operator {}", unary_op))] - UnsupportedUnaryOp { - unary_op: UnaryOperator, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Unrecognized table option key: {}", key))] InvalidTableOption { key: String, @@ -271,25 +232,6 @@ pub enum Error { source: api::error::Error, }, - #[snafu(display("Invalid sql value: {}", value))] - InvalidSqlValue { - value: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display( - "Converting timestamp {:?} to unit {:?} overflow", - timestamp, - target_unit - ))] - TimestampOverflow { - timestamp: Timestamp, - target_unit: TimeUnit, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Unable to convert statement {} to DataFusion statement", statement))] ConvertToDfStatement { statement: String, @@ -297,14 +239,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Unable to convert sql value {} to datatype {:?}", value, datatype))] - ConvertSqlValue { - value: SqlValue, - datatype: ConcreteDataType, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Unable to convert value {} to sql value", value))] ConvertValue { value: Value, @@ -354,13 +288,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Datatype error: {}", source))] - Datatype { - source: datatypes::error::Error, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display( "Invalid partition number: {}, should be in range [2, 65536]", partition_num @@ -371,14 +298,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Unable to convert {} to datatype {:?}", value, datatype))] - ConvertStr { - value: String, - datatype: ConcreteDataType, - #[snafu(implicit)] - location: Location, - }, - #[cfg(feature = "enterprise")] #[snafu(display("Missing `{}` clause", name))] MissingClause { @@ -410,6 +329,13 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Sql common error"))] + SqlCommon { + source: common_sql::error::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -417,7 +343,7 @@ impl ErrorExt for Error { use Error::*; match self { - UnsupportedDefaultValue { .. } | Unsupported { .. } => StatusCode::Unsupported, + Unsupported { .. } => StatusCode::Unsupported, Unexpected { .. } | Syntax { .. } | TQLSyntax { .. } @@ -441,17 +367,11 @@ impl ErrorExt for Error { | InvalidTableName { .. } | InvalidFlowName { .. } | InvalidFlowQuery { .. } - | InvalidSqlValue { .. } - | TimestampOverflow { .. } | InvalidTableOption { .. } - | InvalidCast { .. } | ConvertToLogicalExpression { .. } | Simplification { .. } | InvalidInterval { .. } - | InvalidUnaryOp { .. } - | InvalidPartitionNumber { .. } - | UnsupportedUnaryOp { .. } - | ConvertStr { .. } => StatusCode::InvalidArguments, + | InvalidPartitionNumber { .. } => StatusCode::InvalidArguments, #[cfg(feature = "enterprise")] InvalidTriggerName { .. } => StatusCode::InvalidArguments, @@ -463,9 +383,9 @@ impl ErrorExt for Error { SerializeColumnDefaultConstraint { source, .. } => source.status_code(), ConvertToGrpcDataType { source, .. } => source.status_code(), - Datatype { source, .. } => source.status_code(), + SqlCommon { source, .. } => source.status_code(), ConvertToDfStatement { .. } => StatusCode::Internal, - ConvertSqlValue { .. } | ConvertValue { .. } => StatusCode::Unsupported, + ConvertValue { .. } => StatusCode::Unsupported, PermissionDenied { .. } => StatusCode::PermissionDenied, SetFulltextOption { .. } | SetSkippingIndexOption { .. } => StatusCode::Unexpected, diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index 8bed705b14..9cad86ca63 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -32,33 +32,24 @@ pub mod tql; pub(crate) mod transform; pub mod truncate; -use std::str::FromStr; - use api::helper::ColumnDataTypeWrapper; use api::v1::SemanticType; -use common_base::bytes::Bytes; +use common_sql::default_constraint::parse_column_default_constraint; use common_time::timezone::Timezone; -use common_time::Timestamp; use datatypes::prelude::ConcreteDataType; -use datatypes::schema::constraint::{CURRENT_TIMESTAMP, CURRENT_TIMESTAMP_FN}; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, COMMENT_KEY}; -use datatypes::types::{ - cast, parse_string_to_json_type_value, parse_string_to_vector_type_value, TimestampType, -}; -use datatypes::value::{OrderedF32, OrderedF64, Value}; -use snafu::{ensure, OptionExt, ResultExt}; -use sqlparser::ast::{ExactNumberInfo, Ident, ObjectName, UnaryOperator}; +use datatypes::types::TimestampType; +use datatypes::value::Value; +use snafu::ResultExt; +use sqlparser::ast::{ExactNumberInfo, Ident, ObjectName}; use crate::ast::{ - ColumnDef, ColumnOption, ColumnOptionDef, DataType as SqlDataType, Expr, TimezoneInfo, - Value as SqlValue, + ColumnDef, ColumnOption, DataType as SqlDataType, TimezoneInfo, Value as SqlValue, }; use crate::error::{ - self, ColumnTypeMismatchSnafu, ConvertSqlValueSnafu, ConvertStrSnafu, - ConvertToGrpcDataTypeSnafu, ConvertValueSnafu, DatatypeSnafu, InvalidCastSnafu, - InvalidSqlValueSnafu, InvalidUnaryOpSnafu, ParseSqlValueSnafu, Result, + self, ConvertToGrpcDataTypeSnafu, ConvertValueSnafu, Result, SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu, - TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, UnsupportedUnaryOpSnafu, + SqlCommonSnafu, }; use crate::statements::create::Column; pub use crate::statements::option_map::OptionMap; @@ -66,331 +57,6 @@ pub(crate) use crate::statements::transform::transform_statements; const VECTOR_TYPE_NAME: &str = "VECTOR"; -fn parse_string_to_value( - column_name: &str, - s: String, - data_type: &ConcreteDataType, - timezone: Option<&Timezone>, - auto_string_to_numeric: bool, -) -> Result { - if auto_string_to_numeric { - if let Some(value) = auto_cast_to_numeric(&s, data_type)? { - return Ok(value); - } - } - - ensure!( - data_type.is_stringifiable(), - ColumnTypeMismatchSnafu { - column_name, - expect: data_type.clone(), - actual: ConcreteDataType::string_datatype(), - } - ); - - match data_type { - ConcreteDataType::String(_) => Ok(Value::String(s.into())), - ConcreteDataType::Date(_) => { - if let Ok(date) = common_time::date::Date::from_str(&s, timezone) { - Ok(Value::Date(date)) - } else { - ParseSqlValueSnafu { - msg: format!("Failed to parse {s} to Date value"), - } - .fail() - } - } - ConcreteDataType::Timestamp(t) => { - if let Ok(ts) = Timestamp::from_str(&s, timezone) { - Ok(Value::Timestamp(ts.convert_to(t.unit()).context( - TimestampOverflowSnafu { - timestamp: ts, - target_unit: t.unit(), - }, - )?)) - } else if let Ok(ts) = i64::from_str(s.as_str()) { - Ok(Value::Timestamp(Timestamp::new(ts, t.unit()))) - } else { - ParseSqlValueSnafu { - msg: format!("Failed to parse {s} to Timestamp value"), - } - .fail() - } - } - ConcreteDataType::Decimal128(_) => { - if let Ok(val) = common_decimal::Decimal128::from_str(&s) { - Ok(Value::Decimal128(val)) - } else { - ParseSqlValueSnafu { - msg: format!("Fail to parse number {s} to Decimal128 value"), - } - .fail() - } - } - ConcreteDataType::Binary(_) => Ok(Value::Binary(s.as_bytes().into())), - ConcreteDataType::Json(j) => { - let v = parse_string_to_json_type_value(&s, &j.format).context(DatatypeSnafu)?; - Ok(Value::Binary(v.into())) - } - ConcreteDataType::Vector(d) => { - let v = parse_string_to_vector_type_value(&s, Some(d.dim)).context(DatatypeSnafu)?; - Ok(Value::Binary(v.into())) - } - _ => ParseSqlValueSnafu { - msg: format!("Failed to parse {s} to {data_type} value"), - } - .fail(), - } -} - -/// Casts string to value of specified numeric data type. -/// If the string cannot be parsed, returns an error. -/// -/// Returns None if the data type doesn't support auto casting. -fn auto_cast_to_numeric(s: &str, data_type: &ConcreteDataType) -> Result> { - let value = match data_type { - ConcreteDataType::Boolean(_) => s.parse::().map(Value::Boolean).ok(), - ConcreteDataType::Int8(_) => s.parse::().map(Value::Int8).ok(), - ConcreteDataType::Int16(_) => s.parse::().map(Value::Int16).ok(), - ConcreteDataType::Int32(_) => s.parse::().map(Value::Int32).ok(), - ConcreteDataType::Int64(_) => s.parse::().map(Value::Int64).ok(), - ConcreteDataType::UInt8(_) => s.parse::().map(Value::UInt8).ok(), - ConcreteDataType::UInt16(_) => s.parse::().map(Value::UInt16).ok(), - ConcreteDataType::UInt32(_) => s.parse::().map(Value::UInt32).ok(), - ConcreteDataType::UInt64(_) => s.parse::().map(Value::UInt64).ok(), - ConcreteDataType::Float32(_) => s - .parse::() - .map(|v| Value::Float32(OrderedF32::from(v))) - .ok(), - ConcreteDataType::Float64(_) => s - .parse::() - .map(|v| Value::Float64(OrderedF64::from(v))) - .ok(), - _ => return Ok(None), - }; - - match value { - Some(value) => Ok(Some(value)), - None => ConvertStrSnafu { - value: s, - datatype: data_type.clone(), - } - .fail(), - } -} - -fn parse_hex_string(s: &str) -> Result { - match hex::decode(s) { - Ok(b) => Ok(Value::Binary(Bytes::from(b))), - Err(hex::FromHexError::InvalidHexCharacter { c, index }) => ParseSqlValueSnafu { - msg: format!( - "Fail to parse hex string to Byte: invalid character {c:?} at position {index}" - ), - } - .fail(), - Err(hex::FromHexError::OddLength) => ParseSqlValueSnafu { - msg: "Fail to parse hex string to Byte: odd number of digits".to_string(), - } - .fail(), - Err(e) => ParseSqlValueSnafu { - msg: format!("Fail to parse hex string to Byte {s}, {e:?}"), - } - .fail(), - } -} - -macro_rules! parse_number_to_value { - ($data_type: expr, $n: ident, $(($Type: ident, $PrimitiveType: ident, $Target: ident)), +) => { - match $data_type { - $( - ConcreteDataType::$Type(_) => { - let n = parse_sql_number::<$PrimitiveType>($n)?; - Ok(Value::$Type($Target::from(n))) - }, - )+ - ConcreteDataType::Timestamp(t) => { - let n = parse_sql_number::($n)?; - Ok(Value::Timestamp(Timestamp::new(n, t.unit()))) - }, - // TODO(QuenKar): This could need to be optimized - // if this from_str function is slow, - // we can implement parse decimal string with precision and scale manually. - ConcreteDataType::Decimal128(_) => { - if let Ok(val) = common_decimal::Decimal128::from_str($n) { - Ok(Value::Decimal128(val)) - } else { - ParseSqlValueSnafu { - msg: format!("Fail to parse number {}, invalid column type: {:?}", - $n, $data_type) - }.fail() - } - } - // It's valid for MySQL JDBC to send "0" and "1" for boolean types, so adapt to that. - ConcreteDataType::Boolean(_) => { - match $n { - "0" => Ok(Value::Boolean(false)), - "1" => Ok(Value::Boolean(true)), - _ => ParseSqlValueSnafu { - msg: format!("Failed to parse number '{}' to boolean column type", $n)}.fail(), - } - } - _ => ParseSqlValueSnafu { - msg: format!("Fail to parse number {}, invalid column type: {:?}", - $n, $data_type - )}.fail(), - } - } -} - -/// Convert a sql value into datatype's value -pub fn sql_number_to_value(data_type: &ConcreteDataType, n: &str) -> Result { - parse_number_to_value!( - data_type, - n, - (UInt8, u8, u8), - (UInt16, u16, u16), - (UInt32, u32, u32), - (UInt64, u64, u64), - (Int8, i8, i8), - (Int16, i16, i16), - (Int32, i32, i32), - (Int64, i64, i64), - (Float64, f64, OrderedF64), - (Float32, f32, OrderedF32) - ) - // TODO(hl): also Date/DateTime -} - -pub(crate) fn parse_sql_number(n: &str) -> Result -where - ::Err: std::fmt::Debug, -{ - match n.parse::() { - Ok(n) => Ok(n), - Err(e) => ParseSqlValueSnafu { - msg: format!("Fail to parse number {n}, {e:?}"), - } - .fail(), - } -} - -/// Converts SQL value to value according to the data type. -/// If `auto_string_to_numeric` is true, tries to cast the string value to numeric values, -/// and returns error if the cast fails. -pub fn sql_value_to_value( - column_name: &str, - data_type: &ConcreteDataType, - sql_val: &SqlValue, - timezone: Option<&Timezone>, - unary_op: Option, - auto_string_to_numeric: bool, -) -> Result { - let mut value = match sql_val { - SqlValue::Number(n, _) => sql_number_to_value(data_type, n)?, - SqlValue::Null => Value::Null, - SqlValue::Boolean(b) => { - ensure!( - data_type.is_boolean(), - ColumnTypeMismatchSnafu { - column_name, - expect: data_type.clone(), - actual: ConcreteDataType::boolean_datatype(), - } - ); - - (*b).into() - } - SqlValue::DoubleQuotedString(s) | SqlValue::SingleQuotedString(s) => parse_string_to_value( - column_name, - s.clone(), - data_type, - timezone, - auto_string_to_numeric, - )?, - SqlValue::HexStringLiteral(s) => { - // Should not directly write binary into json column - ensure!( - !matches!(data_type, ConcreteDataType::Json(_)), - ColumnTypeMismatchSnafu { - column_name, - expect: ConcreteDataType::binary_datatype(), - actual: ConcreteDataType::json_datatype(), - } - ); - - parse_hex_string(s)? - } - SqlValue::Placeholder(s) => return InvalidSqlValueSnafu { value: s }.fail(), - - // TODO(dennis): supports binary string - _ => { - return ConvertSqlValueSnafu { - value: sql_val.clone(), - datatype: data_type.clone(), - } - .fail() - } - }; - - if let Some(unary_op) = unary_op { - match unary_op { - UnaryOperator::Plus | UnaryOperator::Minus | UnaryOperator::Not => {} - _ => { - return UnsupportedUnaryOpSnafu { unary_op }.fail(); - } - } - - match value { - Value::Null => {} - Value::Boolean(bool) => match unary_op { - UnaryOperator::Not => value = Value::Boolean(!bool), - _ => { - return InvalidUnaryOpSnafu { unary_op, value }.fail(); - } - }, - Value::UInt8(_) - | Value::UInt16(_) - | Value::UInt32(_) - | Value::UInt64(_) - | Value::Int8(_) - | Value::Int16(_) - | Value::Int32(_) - | Value::Int64(_) - | Value::Float32(_) - | Value::Float64(_) - | Value::Decimal128(_) - | Value::Date(_) - | Value::Timestamp(_) - | Value::Time(_) - | Value::Duration(_) - | Value::IntervalYearMonth(_) - | Value::IntervalDayTime(_) - | Value::IntervalMonthDayNano(_) => match unary_op { - UnaryOperator::Plus => {} - UnaryOperator::Minus => { - value = value - .try_negative() - .with_context(|| InvalidUnaryOpSnafu { unary_op, value })?; - } - _ => return InvalidUnaryOpSnafu { unary_op, value }.fail(), - }, - - Value::String(_) | Value::Binary(_) | Value::List(_) => { - return InvalidUnaryOpSnafu { unary_op, value }.fail() - } - } - } - - if value.data_type() != *data_type { - cast(value, data_type).with_context(|_| InvalidCastSnafu { - sql_value: sql_val.clone(), - datatype: data_type, - }) - } else { - Ok(value) - } -} - pub fn value_to_sql_value(val: &Value) -> Result { Ok(match val { Value::Int8(v) => SqlValue::Number(v.to_string(), false), @@ -413,77 +79,6 @@ pub fn value_to_sql_value(val: &Value) -> Result { }) } -fn parse_column_default_constraint( - column_name: &str, - data_type: &ConcreteDataType, - opts: &[ColumnOptionDef], - timezone: Option<&Timezone>, -) -> Result> { - if let Some(opt) = opts - .iter() - .find(|o| matches!(o.option, ColumnOption::Default(_))) - { - let default_constraint = match &opt.option { - ColumnOption::Default(Expr::Value(v)) => ColumnDefaultConstraint::Value( - sql_value_to_value(column_name, data_type, v, timezone, None, false)?, - ), - ColumnOption::Default(Expr::Function(func)) => { - let mut func = format!("{func}").to_lowercase(); - // normalize CURRENT_TIMESTAMP to CURRENT_TIMESTAMP() - if func == CURRENT_TIMESTAMP { - func = CURRENT_TIMESTAMP_FN.to_string(); - } - // Always use lowercase for function expression - ColumnDefaultConstraint::Function(func.to_lowercase()) - } - - ColumnOption::Default(Expr::UnaryOp { op, expr }) => { - // Specialized process for handling numerical inputs to prevent - // overflow errors during the parsing of negative numbers, - // See https://github.com/GreptimeTeam/greptimedb/issues/4351 - if let (UnaryOperator::Minus, Expr::Value(SqlValue::Number(n, _))) = - (op, expr.as_ref()) - { - return Ok(Some(ColumnDefaultConstraint::Value(sql_number_to_value( - data_type, - &format!("-{n}"), - )?))); - } - - if let Expr::Value(v) = &**expr { - let value = - sql_value_to_value(column_name, data_type, v, timezone, Some(*op), false)?; - ColumnDefaultConstraint::Value(value) - } else { - return UnsupportedDefaultValueSnafu { - column_name, - expr: *expr.clone(), - } - .fail(); - } - } - ColumnOption::Default(others) => { - return UnsupportedDefaultValueSnafu { - column_name, - expr: others.clone(), - } - .fail(); - } - _ => { - return UnsupportedDefaultValueSnafu { - column_name, - expr: Expr::Value(SqlValue::Null), - } - .fail(); - } - }; - - Ok(Some(default_constraint)) - } else { - Ok(None) - } -} - /// Return true when the `ColumnDef` options contain primary key pub fn has_primary_key_option(column_def: &ColumnDef) -> bool { column_def @@ -512,7 +107,8 @@ pub fn column_to_schema( let name = column.name().value.clone(); let data_type = sql_data_type_to_concrete_data_type(column.data_type())?; let default_constraint = - parse_column_default_constraint(&name, &data_type, column.options(), timezone)?; + parse_column_default_constraint(&name, &data_type, column.options(), timezone) + .context(SqlCommonSnafu)?; let mut column_schema = ColumnSchema::new(name, data_type, is_nullable) .with_time_index(is_time_index) @@ -564,7 +160,8 @@ pub fn sql_column_def_to_grpc_column_def( .all(|o| !matches!(o.option, ColumnOption::NotNull)); let default_constraint = - parse_column_default_constraint(&name, &data_type, &col.options, timezone)? + parse_column_default_constraint(&name, &data_type, &col.options, timezone) + .context(SqlCommonSnafu)? .map(ColumnDefaultConstraint::try_into) // serialize default constraint to bytes .transpose() .context(SerializeColumnDefaultConstraintSnafu)?; @@ -720,16 +317,13 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu #[cfg(test)] mod tests { - use std::assert_matches::assert_matches; use std::collections::HashMap; use api::v1::ColumnDataType; - use common_time::timestamp::TimeUnit; use datatypes::schema::{ FulltextAnalyzer, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, }; - use datatypes::types::BooleanType; - use datatypes::value::OrderedFloat; + use sqlparser::ast::{ColumnOptionDef, Expr}; use super::*; use crate::ast::TimezoneInfo; @@ -820,443 +414,6 @@ mod tests { ); } - #[test] - fn test_sql_number_to_value() { - let v = sql_number_to_value(&ConcreteDataType::float64_datatype(), "3.0").unwrap(); - assert_eq!(Value::Float64(OrderedFloat(3.0)), v); - - let v = sql_number_to_value(&ConcreteDataType::int32_datatype(), "999").unwrap(); - assert_eq!(Value::Int32(999), v); - - let v = sql_number_to_value( - &ConcreteDataType::timestamp_nanosecond_datatype(), - "1073741821", - ) - .unwrap(); - assert_eq!(Value::Timestamp(Timestamp::new_nanosecond(1073741821)), v); - - let v = sql_number_to_value( - &ConcreteDataType::timestamp_millisecond_datatype(), - "999999", - ) - .unwrap(); - assert_eq!(Value::Timestamp(Timestamp::new_millisecond(999999)), v); - - let v = sql_number_to_value(&ConcreteDataType::string_datatype(), "999"); - assert!(v.is_err(), "parse value error is: {v:?}"); - - let v = sql_number_to_value(&ConcreteDataType::boolean_datatype(), "0").unwrap(); - assert_eq!(v, Value::Boolean(false)); - let v = sql_number_to_value(&ConcreteDataType::boolean_datatype(), "1").unwrap(); - assert_eq!(v, Value::Boolean(true)); - assert!(sql_number_to_value(&ConcreteDataType::boolean_datatype(), "2").is_err()); - } - - #[test] - fn test_sql_value_to_value() { - let sql_val = SqlValue::Null; - assert_eq!( - Value::Null, - sql_value_to_value( - "a", - &ConcreteDataType::float64_datatype(), - &sql_val, - None, - None, - false - ) - .unwrap() - ); - - let sql_val = SqlValue::Boolean(true); - assert_eq!( - Value::Boolean(true), - sql_value_to_value( - "a", - &ConcreteDataType::boolean_datatype(), - &sql_val, - None, - None, - false - ) - .unwrap() - ); - - let sql_val = SqlValue::Number("3.0".to_string(), false); - assert_eq!( - Value::Float64(OrderedFloat(3.0)), - sql_value_to_value( - "a", - &ConcreteDataType::float64_datatype(), - &sql_val, - None, - None, - false - ) - .unwrap() - ); - - let sql_val = SqlValue::Number("3.0".to_string(), false); - let v = sql_value_to_value( - "a", - &ConcreteDataType::boolean_datatype(), - &sql_val, - None, - None, - false, - ); - assert!(v.is_err()); - assert!(format!("{v:?}").contains("Failed to parse number '3.0' to boolean column type")); - - let sql_val = SqlValue::Boolean(true); - let v = sql_value_to_value( - "a", - &ConcreteDataType::float64_datatype(), - &sql_val, - None, - None, - false, - ); - assert!(v.is_err()); - assert!( - format!("{v:?}").contains( - "Column a expect type: Float64(Float64Type), actual: Boolean(BooleanType)" - ), - "v is {v:?}", - ); - - let sql_val = SqlValue::HexStringLiteral("48656c6c6f20776f726c6421".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::binary_datatype(), - &sql_val, - None, - None, - false, - ) - .unwrap(); - assert_eq!(Value::Binary(Bytes::from(b"Hello world!".as_slice())), v); - - let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::binary_datatype(), - &sql_val, - None, - None, - false, - ) - .unwrap(); - assert_eq!( - Value::Binary(Bytes::from(b"MorningMyFriends".as_slice())), - v - ); - - let sql_val = SqlValue::HexStringLiteral("9AF".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::binary_datatype(), - &sql_val, - None, - None, - false, - ); - assert!(v.is_err()); - assert!( - format!("{v:?}").contains("odd number of digits"), - "v is {v:?}" - ); - - let sql_val = SqlValue::HexStringLiteral("AG".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::binary_datatype(), - &sql_val, - None, - None, - false, - ); - assert!(v.is_err()); - assert!(format!("{v:?}").contains("invalid character"), "v is {v:?}",); - - let sql_val = SqlValue::DoubleQuotedString("MorningMyFriends".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::json_datatype(), - &sql_val, - None, - None, - false, - ); - assert!(v.is_err()); - - let sql_val = SqlValue::DoubleQuotedString(r#"{"a":"b"}"#.to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::json_datatype(), - &sql_val, - None, - None, - false, - ) - .unwrap(); - assert_eq!( - Value::Binary(Bytes::from( - jsonb::parse_value(r#"{"a":"b"}"#.as_bytes()) - .unwrap() - .to_vec() - .as_slice() - )), - v - ); - } - - #[test] - fn test_parse_date_literal() { - let value = sql_value_to_value( - "date", - &ConcreteDataType::date_datatype(), - &SqlValue::DoubleQuotedString("2022-02-22".to_string()), - None, - None, - false, - ) - .unwrap(); - assert_eq!(ConcreteDataType::date_datatype(), value.data_type()); - if let Value::Date(d) = value { - assert_eq!("2022-02-22", d.to_string()); - } else { - unreachable!() - } - - // with timezone - let value = sql_value_to_value( - "date", - &ConcreteDataType::date_datatype(), - &SqlValue::DoubleQuotedString("2022-02-22".to_string()), - Some(&Timezone::from_tz_string("+07:00").unwrap()), - None, - false, - ) - .unwrap(); - assert_eq!(ConcreteDataType::date_datatype(), value.data_type()); - if let Value::Date(d) = value { - assert_eq!("2022-02-21", d.to_string()); - } else { - unreachable!() - } - } - - #[test] - fn test_parse_timestamp_literal() { - match parse_string_to_value( - "timestamp_col", - "2022-02-22T00:01:01+08:00".to_string(), - &ConcreteDataType::timestamp_millisecond_datatype(), - None, - false, - ) - .unwrap() - { - Value::Timestamp(ts) => { - assert_eq!(1645459261000, ts.value()); - assert_eq!(TimeUnit::Millisecond, ts.unit()); - } - _ => { - unreachable!() - } - } - - match parse_string_to_value( - "timestamp_col", - "2022-02-22T00:01:01+08:00".to_string(), - &ConcreteDataType::timestamp_datatype(TimeUnit::Second), - None, - false, - ) - .unwrap() - { - Value::Timestamp(ts) => { - assert_eq!(1645459261, ts.value()); - assert_eq!(TimeUnit::Second, ts.unit()); - } - _ => { - unreachable!() - } - } - - match parse_string_to_value( - "timestamp_col", - "2022-02-22T00:01:01+08:00".to_string(), - &ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond), - None, - false, - ) - .unwrap() - { - Value::Timestamp(ts) => { - assert_eq!(1645459261000000, ts.value()); - assert_eq!(TimeUnit::Microsecond, ts.unit()); - } - _ => { - unreachable!() - } - } - - match parse_string_to_value( - "timestamp_col", - "2022-02-22T00:01:01+08:00".to_string(), - &ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond), - None, - false, - ) - .unwrap() - { - Value::Timestamp(ts) => { - assert_eq!(1645459261000000000, ts.value()); - assert_eq!(TimeUnit::Nanosecond, ts.unit()); - } - _ => { - unreachable!() - } - } - - assert!(parse_string_to_value( - "timestamp_col", - "2022-02-22T00:01:01+08".to_string(), - &ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond), - None, - false, - ) - .is_err()); - - // with timezone - match parse_string_to_value( - "timestamp_col", - "2022-02-22T00:01:01".to_string(), - &ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond), - Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap()), - false, - ) - .unwrap() - { - Value::Timestamp(ts) => { - assert_eq!(1645459261000000000, ts.value()); - assert_eq!("2022-02-21 16:01:01+0000", ts.to_iso8601_string()); - assert_eq!(TimeUnit::Nanosecond, ts.unit()); - } - _ => { - unreachable!() - } - } - } - - #[test] - fn test_parse_json_to_jsonb() { - match parse_string_to_value( - "json_col", - r#"{"a": "b"}"#.to_string(), - &ConcreteDataType::json_datatype(), - None, - false, - ) { - Ok(Value::Binary(b)) => { - assert_eq!( - b, - jsonb::parse_value(r#"{"a": "b"}"#.as_bytes()) - .unwrap() - .to_vec() - ); - } - _ => { - unreachable!() - } - } - - assert!(parse_string_to_value( - "json_col", - r#"Nicola Kovac is the best rifler in the world"#.to_string(), - &ConcreteDataType::json_datatype(), - None, - false, - ) - .is_err()) - } - - #[test] - pub fn test_parse_column_default_constraint() { - let bool_value = sqlparser::ast::Value::Boolean(true); - - let opts = vec![ - ColumnOptionDef { - name: None, - option: ColumnOption::Default(Expr::Value(bool_value)), - }, - ColumnOptionDef { - name: None, - option: ColumnOption::NotNull, - }, - ]; - - let constraint = parse_column_default_constraint( - "coll", - &ConcreteDataType::Boolean(BooleanType), - &opts, - None, - ) - .unwrap(); - - assert_matches!( - constraint, - Some(ColumnDefaultConstraint::Value(Value::Boolean(true))) - ); - - // Test negative number - let opts = vec![ColumnOptionDef { - name: None, - option: ColumnOption::Default(Expr::UnaryOp { - op: UnaryOperator::Minus, - expr: Box::new(Expr::Value(SqlValue::Number("32768".to_string(), false))), - }), - }]; - - let constraint = parse_column_default_constraint( - "coll", - &ConcreteDataType::int16_datatype(), - &opts, - None, - ) - .unwrap(); - - assert_matches!( - constraint, - Some(ColumnDefaultConstraint::Value(Value::Int16(-32768))) - ); - } - - #[test] - fn test_incorrect_default_value_issue_3479() { - let opts = vec![ColumnOptionDef { - name: None, - option: ColumnOption::Default(Expr::Value(SqlValue::Number( - "0.047318541668048164".into(), - false, - ))), - }]; - let constraint = parse_column_default_constraint( - "coll", - &ConcreteDataType::float64_datatype(), - &opts, - None, - ) - .unwrap() - .unwrap(); - assert_eq!("0.047318541668048164", constraint.to_string()); - let encoded: Vec = constraint.clone().try_into().unwrap(); - let decoded = ColumnDefaultConstraint::try_from(encoded.as_ref()).unwrap(); - assert_eq!(decoded, constraint); - } - #[test] pub fn test_sql_column_def_to_grpc_column_def() { // test basic @@ -1542,357 +699,4 @@ mod tests { assert_eq!(fulltext_options.analyzer, FulltextAnalyzer::English); assert!(fulltext_options.case_sensitive); } - - #[test] - fn test_parse_placeholder_value() { - assert!(sql_value_to_value( - "test", - &ConcreteDataType::string_datatype(), - &SqlValue::Placeholder("default".into()), - None, - None, - false - ) - .is_err()); - assert!(sql_value_to_value( - "test", - &ConcreteDataType::string_datatype(), - &SqlValue::Placeholder("default".into()), - None, - Some(UnaryOperator::Minus), - false - ) - .is_err()); - assert!(sql_value_to_value( - "test", - &ConcreteDataType::uint16_datatype(), - &SqlValue::Number("3".into(), false), - None, - Some(UnaryOperator::Minus), - false - ) - .is_err()); - assert!(sql_value_to_value( - "test", - &ConcreteDataType::uint16_datatype(), - &SqlValue::Number("3".into(), false), - None, - None, - false - ) - .is_ok()); - } - - #[test] - fn test_string_to_value_auto_numeric() { - // Test string to boolean with auto cast - let result = parse_string_to_value( - "col", - "true".to_string(), - &ConcreteDataType::boolean_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::Boolean(true), result); - - // Test invalid string to boolean with auto cast - let result = parse_string_to_value( - "col", - "not_a_boolean".to_string(), - &ConcreteDataType::boolean_datatype(), - None, - true, - ); - assert!(result.is_err()); - - // Test string to int8 - let result = parse_string_to_value( - "col", - "42".to_string(), - &ConcreteDataType::int8_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::Int8(42), result); - - // Test invalid string to int8 with auto cast - let result = parse_string_to_value( - "col", - "not_an_int8".to_string(), - &ConcreteDataType::int8_datatype(), - None, - true, - ); - assert!(result.is_err()); - - // Test string to int16 - let result = parse_string_to_value( - "col", - "1000".to_string(), - &ConcreteDataType::int16_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::Int16(1000), result); - - // Test invalid string to int16 with auto cast - let result = parse_string_to_value( - "col", - "not_an_int16".to_string(), - &ConcreteDataType::int16_datatype(), - None, - true, - ); - assert!(result.is_err()); - - // Test string to int32 - let result = parse_string_to_value( - "col", - "100000".to_string(), - &ConcreteDataType::int32_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::Int32(100000), result); - - // Test invalid string to int32 with auto cast - let result = parse_string_to_value( - "col", - "not_an_int32".to_string(), - &ConcreteDataType::int32_datatype(), - None, - true, - ); - assert!(result.is_err()); - - // Test string to int64 - let result = parse_string_to_value( - "col", - "1000000".to_string(), - &ConcreteDataType::int64_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::Int64(1000000), result); - - // Test invalid string to int64 with auto cast - let result = parse_string_to_value( - "col", - "not_an_int64".to_string(), - &ConcreteDataType::int64_datatype(), - None, - true, - ); - assert!(result.is_err()); - - // Test string to uint8 - let result = parse_string_to_value( - "col", - "200".to_string(), - &ConcreteDataType::uint8_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::UInt8(200), result); - - // Test invalid string to uint8 with auto cast - let result = parse_string_to_value( - "col", - "not_a_uint8".to_string(), - &ConcreteDataType::uint8_datatype(), - None, - true, - ); - assert!(result.is_err()); - - // Test string to uint16 - let result = parse_string_to_value( - "col", - "60000".to_string(), - &ConcreteDataType::uint16_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::UInt16(60000), result); - - // Test invalid string to uint16 with auto cast - let result = parse_string_to_value( - "col", - "not_a_uint16".to_string(), - &ConcreteDataType::uint16_datatype(), - None, - true, - ); - assert!(result.is_err()); - - // Test string to uint32 - let result = parse_string_to_value( - "col", - "4000000000".to_string(), - &ConcreteDataType::uint32_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::UInt32(4000000000), result); - - // Test invalid string to uint32 with auto cast - let result = parse_string_to_value( - "col", - "not_a_uint32".to_string(), - &ConcreteDataType::uint32_datatype(), - None, - true, - ); - assert!(result.is_err()); - - // Test string to uint64 - let result = parse_string_to_value( - "col", - "18446744073709551615".to_string(), - &ConcreteDataType::uint64_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::UInt64(18446744073709551615), result); - - // Test invalid string to uint64 with auto cast - let result = parse_string_to_value( - "col", - "not_a_uint64".to_string(), - &ConcreteDataType::uint64_datatype(), - None, - true, - ); - assert!(result.is_err()); - - // Test string to float32 - let result = parse_string_to_value( - "col", - "3.5".to_string(), - &ConcreteDataType::float32_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::Float32(OrderedF32::from(3.5)), result); - - // Test invalid string to float32 with auto cast - let result = parse_string_to_value( - "col", - "not_a_float32".to_string(), - &ConcreteDataType::float32_datatype(), - None, - true, - ); - assert!(result.is_err()); - - // Test string to float64 - let result = parse_string_to_value( - "col", - "3.5".to_string(), - &ConcreteDataType::float64_datatype(), - None, - true, - ) - .unwrap(); - assert_eq!(Value::Float64(OrderedF64::from(3.5)), result); - - // Test invalid string to float64 with auto cast - let result = parse_string_to_value( - "col", - "not_a_float64".to_string(), - &ConcreteDataType::float64_datatype(), - None, - true, - ); - assert!(result.is_err()); - } - - #[test] - fn test_auto_string_to_numeric() { - // Test with auto_string_to_numeric=true - let sql_val = SqlValue::SingleQuotedString("123".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::int32_datatype(), - &sql_val, - None, - None, - true, - ) - .unwrap(); - assert_eq!(Value::Int32(123), v); - - // Test with a float string - let sql_val = SqlValue::SingleQuotedString("3.5".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::float64_datatype(), - &sql_val, - None, - None, - true, - ) - .unwrap(); - assert_eq!(Value::Float64(OrderedFloat(3.5)), v); - - // Test with auto_string_to_numeric=false - let sql_val = SqlValue::SingleQuotedString("123".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::int32_datatype(), - &sql_val, - None, - None, - false, - ); - assert!(v.is_err()); - - // Test with an invalid numeric string but auto_string_to_numeric=true - // Should return an error now with the new auto_cast_to_numeric behavior - let sql_val = SqlValue::SingleQuotedString("not_a_number".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::int32_datatype(), - &sql_val, - None, - None, - true, - ); - assert!(v.is_err()); - - // Test with boolean type - let sql_val = SqlValue::SingleQuotedString("true".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::boolean_datatype(), - &sql_val, - None, - None, - true, - ) - .unwrap(); - assert_eq!(Value::Boolean(true), v); - - // Non-numeric types should still be handled normally - let sql_val = SqlValue::SingleQuotedString("hello".to_string()); - let v = sql_value_to_value( - "a", - &ConcreteDataType::string_datatype(), - &sql_val, - None, - None, - true, - ); - assert!(v.is_ok()); - } }