build(deps): update datafusion to latest and arrow to 51.0 (#3661)

* chore: update datafusion

* update sqlness case of time.sql

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: adjust range query partition

* fix: hisogram incorrect result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: ignore filter pushdown temporarily

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: update limit sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: histogram with wrong distribution

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: update negative ordinal sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* feat: bump df to cd7a00b

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* resolve conflicts

* ignore test_range_filter

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix promql exec panic

* fix "select count(*)" exec error

* re-enable the "test_range_filter" test since the filter push down seems not necessary to be removed

* fix: range query schema error

* update sqlness results

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* resolve conflicts

* update datafusion, again

* fix pyo3 compile error, and update some sqlness results

* update decimal sqlness cases

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: promql literal

* fix udaf tests

* fix filter pushdown sqlness tests

* fix?: test_cast

* fix: rspy test fail due to datafusion `sin` signature change

* rebase main to see if there are any failed tests

* debug ci

* debug ci

* debug ci

* enforce input partition

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* debug ci

* fix ci

* fix ci

* debug ci

* debug ci

* debug ci

* fix sqlness

* feat: do not return error while creating a filter

* chore: remove array from error

* chore: replace todo with unimplemented

* Update src/flow/clippy.toml

Co-authored-by: Yingwen <realevenyag@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: WUJingdi <taylor-lagrange@qq.com>
Co-authored-by: discord9 <discord9@163.com>
Co-authored-by: evenyag <realevenyag@gmail.com>
Co-authored-by: tison <wander4096@gmail.com>
This commit is contained in:
LFC
2024-04-18 20:07:18 +08:00
committed by GitHub
parent 510782261d
commit 314f2704d4
174 changed files with 2869 additions and 2263 deletions

View File

@@ -83,14 +83,14 @@ impl<'a> ParserContext<'a> {
}
pub(crate) fn intern_parse_table_name(&mut self) -> Result<ObjectName> {
let raw_table_name = self
.parser
.parse_object_name()
.context(error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",
actual: self.parser.peek_token().to_string(),
})?;
let raw_table_name =
self.parser
.parse_object_name(false)
.context(error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",
actual: self.parser.peek_token().to_string(),
})?;
Ok(Self::canonicalize_object_name(raw_table_name))
}
@@ -100,7 +100,7 @@ impl<'a> ParserContext<'a> {
.try_with_sql(sql)
.context(SyntaxSnafu)?;
let function_name = parser.parse_identifier().context(SyntaxSnafu)?;
let function_name = parser.parse_identifier(false).context(SyntaxSnafu)?;
parser
.parse_function(ObjectName(vec![function_name]))
.context(SyntaxSnafu)
@@ -222,6 +222,22 @@ impl<'a> ParserContext<'a> {
.collect(),
)
}
/// Simply a shortcut for sqlparser's same name method `parse_object_name`,
/// but with constant argument "false".
/// Because the argument is always "false" for us (it's introduced by BigQuery),
/// we don't want to write it again and again.
pub(crate) fn parse_object_name(&mut self) -> std::result::Result<ObjectName, ParserError> {
self.parser.parse_object_name(false)
}
/// Simply a shortcut for sqlparser's same name method `parse_identifier`,
/// but with constant argument "false".
/// Because the argument is always "false" for us (it's introduced by BigQuery),
/// we don't want to write it again and again.
pub(crate) fn parse_identifier(&mut self) -> std::result::Result<Ident, ParserError> {
self.parser.parse_identifier(false)
}
}
#[cfg(test)]

View File

@@ -33,7 +33,7 @@ impl<'a> ParserContext<'a> {
let parser = &mut self.parser;
parser.expect_keywords(&[Keyword::ALTER, Keyword::TABLE])?;
let raw_table_name = parser.parse_object_name()?;
let raw_table_name = parser.parse_object_name(false)?;
let table_name = Self::canonicalize_object_name(raw_table_name);
let alter_operation = if parser.parse_keyword(Keyword::ADD) {
@@ -48,7 +48,7 @@ impl<'a> ParserContext<'a> {
} else if let Token::Word(word) = parser.peek_token().token {
if word.value.to_ascii_uppercase() == "AFTER" {
let _ = parser.next_token();
let name = Self::canonicalize_identifier(parser.parse_identifier()?);
let name = Self::canonicalize_identifier(self.parse_identifier()?);
Some(AddColumnLocation::After {
column_name: name.value,
})
@@ -65,7 +65,7 @@ impl<'a> ParserContext<'a> {
}
} else if parser.parse_keyword(Keyword::DROP) {
if parser.parse_keyword(Keyword::COLUMN) {
let name = Self::canonicalize_identifier(self.parser.parse_identifier()?);
let name = Self::canonicalize_identifier(self.parse_identifier()?);
AlterTableOperation::DropColumn { name }
} else {
return Err(ParserError::ParserError(format!(
@@ -74,7 +74,7 @@ impl<'a> ParserContext<'a> {
)));
}
} else if parser.parse_keyword(Keyword::RENAME) {
let new_table_name_obj_raw = parser.parse_object_name()?;
let new_table_name_obj_raw = self.parse_object_name()?;
let new_table_name_obj = Self::canonicalize_object_name(new_table_name_obj_raw);
let new_table_name = match &new_table_name_obj.0[..] {
[table] => table.value.clone(),
@@ -128,7 +128,7 @@ mod tests {
location,
} => {
assert_eq!("tagk_i", column_def.name.value);
assert_eq!(DataType::String, column_def.data_type);
assert_eq!(DataType::String(None), column_def.data_type);
assert!(column_def
.options
.iter()
@@ -164,7 +164,7 @@ mod tests {
location,
} => {
assert_eq!("tagk_i", column_def.name.value);
assert_eq!(DataType::String, column_def.data_type);
assert_eq!(DataType::String(None), column_def.data_type);
assert!(column_def
.options
.iter()
@@ -200,7 +200,7 @@ mod tests {
location,
} => {
assert_eq!("tagk_i", column_def.name.value);
assert_eq!(DataType::String, column_def.data_type);
assert_eq!(DataType::String(None), column_def.data_type);
assert!(column_def
.options
.iter()

View File

@@ -47,14 +47,13 @@ impl<'a> ParserContext<'a> {
}
fn parser_copy_database(&mut self) -> Result<CopyDatabase> {
let database_name =
self.parser
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a database name",
actual: self.peek_token_as_string(),
})?;
let database_name = self
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a database name",
actual: self.peek_token_as_string(),
})?;
let req = if self.parser.parse_keyword(Keyword::TO) {
let (with, connection, location) = self.parse_copy_parameters()?;
@@ -82,14 +81,13 @@ impl<'a> ParserContext<'a> {
}
fn parse_copy_table(&mut self) -> Result<CopyTable> {
let raw_table_name =
self.parser
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",
actual: self.peek_token_as_string(),
})?;
let raw_table_name = self
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",
actual: self.peek_token_as_string(),
})?;
let table_name = Self::canonicalize_object_name(raw_table_name);
if self.parser.parse_keyword(Keyword::TO) {

View File

@@ -111,14 +111,11 @@ impl<'a> ParserContext<'a> {
self.parser
.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]);
let database_name = self
.parser
.parse_object_name()
.context(error::UnexpectedSnafu {
sql: self.sql,
expected: "a database name",
actual: self.peek_token_as_string(),
})?;
let database_name = self.parse_object_name().context(error::UnexpectedSnafu {
sql: self.sql,
expected: "a database name",
actual: self.peek_token_as_string(),
})?;
let database_name = Self::canonicalize_object_name(database_name);
Ok(Statement::CreateDatabase(CreateDatabase {
name: database_name,
@@ -319,6 +316,7 @@ impl<'a> ParserContext<'a> {
quote_style: None,
}],
is_primary: false,
characteristics: None,
};
constraints.push(constraint);
}
@@ -367,7 +365,7 @@ impl<'a> ParserContext<'a> {
pub fn parse_column_def(&mut self) -> std::result::Result<ColumnDef, ParserError> {
let parser = &mut self.parser;
let name = parser.parse_identifier()?;
let name = parser.parse_identifier(false)?;
if name.quote_style.is_none() &&
// "ALL_KEYWORDS" are sorted.
ALL_KEYWORDS.binary_search(&name.value.to_uppercase().as_str()).is_ok()
@@ -380,14 +378,14 @@ impl<'a> ParserContext<'a> {
let data_type = parser.parse_data_type()?;
let collation = if parser.parse_keyword(Keyword::COLLATE) {
Some(parser.parse_object_name()?)
Some(parser.parse_object_name(false)?)
} else {
None
};
let mut options = vec![];
loop {
if parser.parse_keyword(Keyword::CONSTRAINT) {
let name = Some(parser.parse_identifier()?);
let name = Some(parser.parse_identifier(false)?);
if let Some(option) = Self::parse_optional_column_option(parser)? {
options.push(ColumnOptionDef { name, option });
} else {
@@ -415,7 +413,7 @@ impl<'a> ParserContext<'a> {
) -> std::result::Result<Option<ColumnOption>, ParserError> {
if parser.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) {
Ok(Some(ColumnOption::CharacterSet(
parser.parse_object_name()?,
parser.parse_object_name(false)?,
)))
} else if parser.parse_keywords(&[Keyword::NOT, Keyword::NULL]) {
Ok(Some(ColumnOption::NotNull))
@@ -432,9 +430,15 @@ impl<'a> ParserContext<'a> {
} else if parser.parse_keyword(Keyword::DEFAULT) {
Ok(Some(ColumnOption::Default(parser.parse_expr()?)))
} else if parser.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) {
Ok(Some(ColumnOption::Unique { is_primary: true }))
Ok(Some(ColumnOption::Unique {
is_primary: true,
characteristics: None,
}))
} else if parser.parse_keyword(Keyword::UNIQUE) {
Ok(Some(ColumnOption::Unique { is_primary: false }))
Ok(Some(ColumnOption::Unique {
is_primary: false,
characteristics: None,
}))
} else if parser.parse_keywords(&[Keyword::TIME, Keyword::INDEX]) {
// Use a DialectSpecific option for time index
Ok(Some(ColumnOption::DialectSpecific(vec![
@@ -456,7 +460,7 @@ impl<'a> ParserContext<'a> {
fn parse_optional_table_constraint(&mut self) -> Result<Option<TableConstraint>> {
let name = if self.parser.parse_keyword(Keyword::CONSTRAINT) {
let raw_name = self.parser.parse_identifier().context(error::SyntaxSnafu)?;
let raw_name = self.parse_identifier().context(SyntaxSnafu)?;
Some(Self::canonicalize_identifier(raw_name))
} else {
None
@@ -485,6 +489,7 @@ impl<'a> ParserContext<'a> {
name,
columns,
is_primary: true,
characteristics: None,
}))
}
TokenWithLocation {
@@ -524,6 +529,7 @@ impl<'a> ParserContext<'a> {
}),
columns,
is_primary: false,
characteristics: None,
}))
}
unexpected => {
@@ -568,6 +574,7 @@ fn validate_time_index(columns: &[ColumnDef], constraints: &[TableConstraint]) -
name: Some(ident),
columns,
is_primary: false,
..
} = c
{
if ident.value == TIME_INDEX {
@@ -857,7 +864,7 @@ mod tests {
assert_column_def(&columns[0], "host", "STRING");
assert_column_def(&columns[1], "ts", "TIMESTAMP");
assert_column_def(&columns[2], "cpu", "FLOAT");
assert_column_def(&columns[3], "memory", "DOUBLE");
assert_column_def(&columns[3], "memory", "FLOAT64");
let constraints = &c.constraints;
assert_matches!(
@@ -1108,6 +1115,7 @@ ENGINE=mito";
name,
columns,
is_primary,
..
} => {
assert_eq!(name.unwrap().to_string(), "__time_index");
assert_eq!(columns.len(), 1);
@@ -1314,6 +1322,7 @@ ENGINE=mito";
name,
columns,
is_primary,
..
} => {
assert_eq!(name.unwrap().to_string(), "__time_index");
assert_eq!(columns.len(), 1);
@@ -1422,7 +1431,7 @@ ENGINE=mito";
assert_column_def(&columns[0], "host", "STRING");
assert_column_def(&columns[1], "ts", "TIMESTAMP");
assert_column_def(&columns[2], "cpu", "FLOAT");
assert_column_def(&columns[3], "memory", "DOUBLE");
assert_column_def(&columns[3], "memory", "FLOAT64");
let constraints = &c.constraints;
assert_matches!(

View File

@@ -31,8 +31,7 @@ impl<'a> ParserContext<'a> {
fn parse_describe_table(&mut self) -> Result<Statement> {
let raw_table_idents =
self.parser
.parse_object_name()
self.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",

View File

@@ -40,8 +40,7 @@ impl<'a> ParserContext<'a> {
let if_exists = self.parser.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
let raw_table_ident =
self.parser
.parse_object_name()
self.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",
@@ -62,14 +61,13 @@ impl<'a> ParserContext<'a> {
let _ = self.parser.next_token();
let if_exists = self.parser.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
let database_name =
self.parser
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a database name",
actual: self.peek_token_as_string(),
})?;
let database_name = self
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a database name",
actual: self.peek_token_as_string(),
})?;
let database_name = Self::canonicalize_object_name(database_name);
Ok(Statement::DropDatabase(DropDatabase::new(

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use snafu::ResultExt;
use sqlparser::ast::DescribeAlias;
use crate::error::{self, Result};
use crate::parser::ParserContext;
@@ -22,14 +23,14 @@ use crate::statements::statement::Statement;
/// EXPLAIN statement parser implementation
impl<'a> ParserContext<'a> {
pub(crate) fn parse_explain(&mut self) -> Result<Statement> {
let explain_statement =
self.parser
.parse_explain(false)
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a query statement",
actual: self.peek_token_as_string(),
})?;
let explain_statement = self
.parser
.parse_explain(DescribeAlias::Explain)
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a query statement",
actual: self.peek_token_as_string(),
})?;
Ok(Statement::Explain(Explain::try_from(explain_statement)?))
}
@@ -80,6 +81,7 @@ mod tests {
having: None,
qualify: None,
named_window: vec![],
value_table_mode: None,
};
let sp_statement = SpStatement::Query(Box::new(SpQuery {
@@ -87,13 +89,15 @@ mod tests {
body: Box::new(sqlparser::ast::SetExpr::Select(Box::new(select))),
order_by: vec![],
limit: None,
limit_by: vec![],
offset: None,
fetch: None,
locks: vec![],
for_clause: None,
}));
let explain = Explain::try_from(SpStatement::Explain {
describe_alias: false,
describe_alias: DescribeAlias::Explain,
analyze: false,
verbose: false,
statement: Box::new(sp_statement),

View File

@@ -59,14 +59,13 @@ impl<'a> ParserContext<'a> {
self.unsupported(self.peek_token_as_string())
}
} else if self.consume_token("VARIABLES") {
let variable =
self.parser
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a variable name",
actual: self.peek_token_as_string(),
})?;
let variable = self
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a variable name",
actual: self.peek_token_as_string(),
})?;
Ok(Statement::ShowVariables(ShowVariables { variable }))
} else {
self.unsupported(self.peek_token_as_string())
@@ -75,14 +74,13 @@ impl<'a> ParserContext<'a> {
/// Parse SHOW CREATE TABLE statement
fn parse_show_create_table(&mut self) -> Result<Statement> {
let raw_table_name =
self.parser
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",
actual: self.peek_token_as_string(),
})?;
let raw_table_name = self
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",
actual: self.peek_token_as_string(),
})?;
let table_name = Self::canonicalize_object_name(raw_table_name);
ensure!(
!table_name.0.is_empty(),
@@ -95,14 +93,13 @@ impl<'a> ParserContext<'a> {
fn parse_show_table_name(&mut self) -> Result<String> {
let _ = self.parser.next_token();
let table_name =
self.parser
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",
actual: self.peek_token_as_string(),
})?;
let table_name = self
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",
actual: self.peek_token_as_string(),
})?;
ensure!(
table_name.0.len() == 1,
@@ -120,7 +117,6 @@ impl<'a> ParserContext<'a> {
fn parse_db_name(&mut self) -> Result<Option<String>> {
let _ = self.parser.next_token();
let db_name = self
.parser
.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
@@ -182,7 +178,7 @@ impl<'a> ParserContext<'a> {
Token::Word(w) => match w.keyword {
Keyword::LIKE => {
let _ = self.parser.next_token();
ShowKind::Like(self.parser.parse_identifier().with_context(|_| {
ShowKind::Like(self.parse_identifier().with_context(|_| {
error::UnexpectedSnafu {
sql: self.sql,
expected: "LIKE",
@@ -298,7 +294,7 @@ impl<'a> ParserContext<'a> {
Token::Word(w) => match w.keyword {
Keyword::LIKE => {
let _ = self.parser.next_token();
ShowKind::Like(self.parser.parse_identifier().with_context(|_| {
ShowKind::Like(self.parse_identifier().with_context(|_| {
error::UnexpectedSnafu {
sql: self.sql,
expected: "LIKE",
@@ -337,7 +333,7 @@ impl<'a> ParserContext<'a> {
}
Token::Word(w) => match w.keyword {
Keyword::LIKE => Ok(Statement::ShowDatabases(ShowDatabases::new(
ShowKind::Like(self.parser.parse_identifier().with_context(|_| {
ShowKind::Like(self.parse_identifier().with_context(|_| {
error::UnexpectedSnafu {
sql: self.sql,
expected: "LIKE",

View File

@@ -15,6 +15,7 @@
use std::sync::Arc;
use chrono::Utc;
use datafusion::execution::context::SessionState;
use datafusion::optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext};
use datafusion_common::config::ConfigOptions;
use datafusion_common::{DFSchema, Result as DFResult, ScalarValue, TableReference};
@@ -205,7 +206,7 @@ impl<'a> ParserContext<'a> {
fn parse_to_logical_expr(expr: sqlparser::ast::Expr) -> std::result::Result<Expr, TQLError> {
let empty_df_schema = DFSchema::empty();
SqlToRel::new(&StubContextProvider {})
SqlToRel::new(&StubContextProvider::default())
.sql_to_expr(expr.into(), &empty_df_schema, &mut Default::default())
.context(ConvertToLogicalExpressionSnafu)
}
@@ -262,20 +263,29 @@ impl<'a> ParserContext<'a> {
}
}
#[derive(Default)]
struct StubContextProvider {}
struct StubContextProvider {
state: SessionState,
}
impl Default for StubContextProvider {
fn default() -> Self {
Self {
state: SessionState::new_with_config_rt(Default::default(), Default::default()),
}
}
}
impl ContextProvider for StubContextProvider {
fn get_table_provider(&self, _name: TableReference) -> DFResult<Arc<dyn TableSource>> {
fn get_table_source(&self, _name: TableReference) -> DFResult<Arc<dyn TableSource>> {
unimplemented!()
}
fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
None
fn get_function_meta(&self, name: &str) -> Option<Arc<ScalarUDF>> {
self.state.scalar_functions().get(name).cloned()
}
fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> {
unimplemented!()
fn get_aggregate_meta(&self, name: &str) -> Option<Arc<AggregateUDF>> {
self.state.aggregate_functions().get(name).cloned()
}
fn get_window_meta(&self, _name: &str) -> Option<Arc<WindowUDF>> {
@@ -289,6 +299,18 @@ impl ContextProvider for StubContextProvider {
fn options(&self) -> &ConfigOptions {
unimplemented!()
}
fn udfs_names(&self) -> Vec<String> {
self.state.scalar_functions().keys().cloned().collect()
}
fn udafs_names(&self) -> Vec<String> {
self.state.aggregate_functions().keys().cloned().collect()
}
fn udwfs_names(&self) -> Vec<String> {
self.state.window_functions().keys().cloned().collect()
}
}
#[cfg(test)]

View File

@@ -27,8 +27,7 @@ impl<'a> ParserContext<'a> {
let _ = self.parser.parse_keyword(Keyword::TABLE);
let raw_table_ident =
self.parser
.parse_object_name()
self.parse_object_name()
.with_context(|_| error::UnexpectedSnafu {
sql: self.sql,
expected: "a table name",

View File

@@ -345,7 +345,7 @@ pub fn has_primary_key_option(column_def: &ColumnDef) -> bool {
.options
.iter()
.any(|options| match options.option {
ColumnOption::Unique { is_primary } => is_primary,
ColumnOption::Unique { is_primary, .. } => is_primary,
_ => false,
})
}
@@ -414,10 +414,15 @@ pub fn sql_column_def_to_grpc_column_def(
.context(ConvertToGrpcDataTypeSnafu)?
.to_parts();
let is_primary_key = col
.options
.iter()
.any(|o| matches!(o.option, ColumnOption::Unique { is_primary: true }));
let is_primary_key = col.options.iter().any(|o| {
matches!(
o.option,
ColumnOption::Unique {
is_primary: true,
..
}
)
});
let semantic_type = if is_primary_key {
SemanticType::Tag
@@ -438,7 +443,7 @@ pub fn sql_column_def_to_grpc_column_def(
pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result<ConcreteDataType> {
match data_type {
SqlDataType::BigInt(_) => Ok(ConcreteDataType::int64_datatype()),
SqlDataType::BigInt(_) | SqlDataType::Int64 => Ok(ConcreteDataType::int64_datatype()),
SqlDataType::UnsignedBigInt(_) => Ok(ConcreteDataType::uint64_datatype()),
SqlDataType::Int(_) | SqlDataType::Integer(_) => Ok(ConcreteDataType::int32_datatype()),
SqlDataType::UnsignedInt(_) | SqlDataType::UnsignedInteger(_) => {
@@ -453,9 +458,9 @@ pub fn sql_data_type_to_concrete_data_type(data_type: &SqlDataType) -> Result<Co
SqlDataType::Char(_)
| SqlDataType::Varchar(_)
| SqlDataType::Text
| SqlDataType::String => Ok(ConcreteDataType::string_datatype()),
| SqlDataType::String(_) => Ok(ConcreteDataType::string_datatype()),
SqlDataType::Float(_) => Ok(ConcreteDataType::float32_datatype()),
SqlDataType::Double => Ok(ConcreteDataType::float64_datatype()),
SqlDataType::Double | SqlDataType::Float64 => Ok(ConcreteDataType::float64_datatype()),
SqlDataType::Boolean => Ok(ConcreteDataType::boolean_datatype()),
SqlDataType::Date => Ok(ConcreteDataType::date_datatype()),
SqlDataType::Binary(_)
@@ -502,7 +507,7 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu
ConcreteDataType::UInt16(_) => Ok(SqlDataType::UnsignedSmallInt(None)),
ConcreteDataType::Int8(_) => Ok(SqlDataType::TinyInt(None)),
ConcreteDataType::UInt8(_) => Ok(SqlDataType::UnsignedTinyInt(None)),
ConcreteDataType::String(_) => Ok(SqlDataType::String),
ConcreteDataType::String(_) => Ok(SqlDataType::String(None)),
ConcreteDataType::Float32(_) => Ok(SqlDataType::Float(None)),
ConcreteDataType::Float64(_) => Ok(SqlDataType::Double),
ConcreteDataType::Boolean(_) => Ok(SqlDataType::Boolean),
@@ -588,7 +593,10 @@ mod tests {
ConcreteDataType::string_datatype(),
);
check_type(SqlDataType::Text, ConcreteDataType::string_datatype());
check_type(SqlDataType::String, ConcreteDataType::string_datatype());
check_type(
SqlDataType::String(None),
ConcreteDataType::string_datatype(),
);
check_type(
SqlDataType::Float(None),
ConcreteDataType::float32_datatype(),
@@ -966,7 +974,10 @@ mod tests {
collation: None,
options: vec![ColumnOptionDef {
name: None,
option: ColumnOption::Unique { is_primary: true },
option: ColumnOption::Unique {
is_primary: true,
characteristics: None,
},
}],
};
@@ -1044,7 +1055,10 @@ mod tests {
collation: None,
options: vec![ColumnOptionDef {
name: None,
option: ColumnOption::Unique { is_primary: true },
option: ColumnOption::Unique {
is_primary: true,
characteristics: None,
},
}],
};
assert!(has_primary_key_option(&column_def));
@@ -1081,7 +1095,7 @@ mod tests {
let column_def = ColumnDef {
name: "col2".into(),
data_type: SqlDataType::String,
data_type: SqlDataType::String(None),
collation: None,
options: vec![
ColumnOptionDef {

View File

@@ -55,10 +55,10 @@ impl Insert {
match &self.inner {
Statement::Insert {
source:
box Query {
Some(box Query {
body: box SetExpr::Values(Values { rows, .. }),
..
},
}),
..
} => sql_exprs_to_values(rows),
_ => unreachable!(),
@@ -71,10 +71,10 @@ impl Insert {
match &self.inner {
Statement::Insert {
source:
box Query {
Some(box Query {
body: box SetExpr::Values(Values { rows, .. }),
..
},
}),
..
} => rows.iter().all(|es| {
es.iter().all(|expr| match expr {
@@ -100,7 +100,8 @@ impl Insert {
pub fn query_body(&self) -> Result<Option<GtQuery>> {
Ok(match &self.inner {
Statement::Insert {
source: box query, ..
source: Some(box query),
..
} => Some(query.clone().try_into()?),
_ => None,
})

View File

@@ -58,28 +58,56 @@ impl TransformRule for TypeAliasTransformRule {
}
fn visit_expr(&self, expr: &mut Expr) -> ControlFlow<()> {
fn cast_expr_to_arrow_cast_func(expr: Expr, cast_type: String) -> Function {
Function {
name: ObjectName(vec![Ident::new("arrow_cast")]),
args: vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString(cast_type),
))),
],
filter: None,
null_treatment: None,
over: None,
distinct: false,
special: false,
order_by: vec![],
}
}
match expr {
// In new sqlparser, the "INT64" is no longer parsed to custom datatype.
// The new "Int64" is not recognizable by Datafusion, cannot directly "CAST" to it.
// We have to replace the expr to "arrow_cast" function call here.
// Same for "FLOAT64".
Expr::Cast {
expr: cast_expr,
data_type,
..
} if matches!(data_type, DataType::Int64 | DataType::Float64) => {
if let Some(new_type) = get_data_type_by_alias_name(&data_type.to_string()) {
if let Ok(new_type) = sql_data_type_to_concrete_data_type(&new_type) {
*expr = Expr::Function(cast_expr_to_arrow_cast_func(
(**cast_expr).clone(),
new_type.as_arrow_type().to_string(),
));
}
}
}
// Type alias
Expr::Cast {
data_type: DataType::Custom(name, tokens),
expr: cast_expr,
..
} if name.0.len() == 1 && tokens.is_empty() => {
if let Some(new_type) = get_data_type_by_alias_name(name.0[0].value.as_str()) {
if let Ok(concrete_type) = sql_data_type_to_concrete_data_type(&new_type) {
let new_type = concrete_type.as_arrow_type();
*expr = Expr::Function(Function {
name: ObjectName(vec![Ident::new("arrow_cast")]),
args: vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr((**cast_expr).clone())),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString(new_type.to_string()),
))),
],
over: None,
distinct: false,
special: false,
order_by: vec![],
});
if let Ok(new_type) = sql_data_type_to_concrete_data_type(&new_type) {
*expr = Expr::Function(cast_expr_to_arrow_cast_func(
(**cast_expr).clone(),
new_type.as_arrow_type().to_string(),
));
}
}
}
@@ -89,24 +117,16 @@ impl TransformRule for TypeAliasTransformRule {
Expr::Cast {
data_type: DataType::Timestamp(precision, zone),
expr: cast_expr,
..
} => {
if let Ok(concrete_type) =
sql_data_type_to_concrete_data_type(&DataType::Timestamp(*precision, *zone))
{
let new_type = concrete_type.as_arrow_type();
*expr = Expr::Function(Function {
name: ObjectName(vec![Ident::new("arrow_cast")]),
args: vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr((**cast_expr).clone())),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString(new_type.to_string()),
))),
],
over: None,
distinct: false,
special: false,
order_by: vec![],
});
*expr = Expr::Function(cast_expr_to_arrow_cast_func(
(**cast_expr).clone(),
new_type.to_string(),
));
}
}
@@ -353,10 +373,10 @@ CREATE TABLE data_types (
tint INT8,
sint SMALLINT,
i INT,
bint BIGINT,
bint INT64,
v VARCHAR,
f FLOAT,
d DOUBLE,
d FLOAT64,
b BOOLEAN,
vb VARBINARY,
dt DATE,

View File

@@ -17,7 +17,7 @@ use std::fmt::{Display, Formatter};
use std::sync::LazyLock;
use regex::Regex;
use sqlparser::ast::{ObjectName, SqlOption, Value};
use sqlparser::ast::{Expr, ObjectName, SqlOption, Value};
static SQL_SECRET_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
vec![
@@ -47,9 +47,11 @@ pub fn format_raw_object_name(name: &ObjectName) -> String {
format!("{}", Inner { name })
}
pub fn parse_option_string(value: Value) -> Option<String> {
pub fn parse_option_string(value: Expr) -> Option<String> {
match value {
Value::SingleQuotedString(v) | Value::DoubleQuotedString(v) => Some(v),
Expr::Value(Value::SingleQuotedString(v)) | Expr::Value(Value::DoubleQuotedString(v)) => {
Some(v)
}
_ => None,
}
}
@@ -60,7 +62,9 @@ pub fn to_lowercase_options_map(opts: &[SqlOption]) -> HashMap<String, String> {
let mut map = HashMap::with_capacity(opts.len());
for SqlOption { name, value } in opts {
let value_str = match value {
Value::SingleQuotedString(s) | Value::DoubleQuotedString(s) => s.clone(),
Expr::Value(Value::SingleQuotedString(s))
| Expr::Value(Value::DoubleQuotedString(s)) => s.clone(),
Expr::Identifier(i) => i.value.clone(),
_ => value.to_string(),
};
let _ = map.insert(name.value.to_lowercase().clone(), value_str);