feat: type alias (#2331)

* fix: remove location from error msg

* feat: adds transformer for sqlparser statements

* feat: supports type alias

* fix: typo

* fix: license header

* test: adds timestamp_types test

* refactor: transform

* fix: rebase develop and fix tests

* fix: compile error

* chore: delete src/datanode/src/sql/create_external.rs
This commit is contained in:
dennis zhuang
2023-09-18 17:43:02 +08:00
committed by GitHub
parent 73af1368bd
commit b0c56a3e23
38 changed files with 1287 additions and 87 deletions

View File

@@ -15,10 +15,12 @@ datafusion-sql.workspace = true
datatypes = { workspace = true }
hex = "0.4"
itertools.workspace = true
lazy_static.workspace = true
once_cell.workspace = true
regex.workspace = true
snafu = { version = "0.7", features = ["backtraces"] }
sqlparser.workspace = true
sqlparser_derive = "0.1"
table = { workspace = true }
[dev-dependencies]

View File

@@ -13,7 +13,7 @@
// limitations under the License.
pub use sqlparser::ast::{
visit_expressions_mut, BinaryOperator, ColumnDef, ColumnOption, ColumnOptionDef, DataType,
Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName, SqlOption, TableConstraint,
TimezoneInfo, Value, VisitMut, Visitor,
visit_expressions_mut, visit_statements_mut, BinaryOperator, ColumnDef, ColumnOption,
ColumnOptionDef, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName,
SqlOption, TableConstraint, TimezoneInfo, Value, Visit, VisitMut, Visitor, VisitorMut,
};

View File

@@ -22,6 +22,7 @@ use crate::ast::{Expr, ObjectName};
use crate::error::{self, Result, SyntaxSnafu};
use crate::parsers::tql_parser;
use crate::statements::statement::Statement;
use crate::statements::transform_statements;
/// GrepTime SQL parser context, a simple wrapper for Datafusion SQL parser.
pub struct ParserContext<'a> {
@@ -58,6 +59,8 @@ impl<'a> ParserContext<'a> {
expecting_statement_delimiter = true;
}
transform_statements(&mut stmts)?;
Ok(stmts)
}

View File

@@ -62,8 +62,8 @@ impl<'a> ParserContext<'a> {
let (with, connection, location) = self.parse_copy_to()?;
Ok(CopyDatabaseArgument {
database_name,
with,
connection,
with: with.into(),
connection: connection.into(),
location,
})
}
@@ -82,8 +82,8 @@ impl<'a> ParserContext<'a> {
let (with, connection, location) = self.parse_copy_to()?;
Ok(CopyTable::To(CopyTableArgument {
table_name,
with,
connection,
with: with.into(),
connection: connection.into(),
location,
}))
} else {
@@ -308,7 +308,10 @@ mod tests {
if let Some(expected_pattern) = test.expected_pattern {
assert_eq!(copy_table.pattern().unwrap(), expected_pattern);
}
assert_eq!(copy_table.connection.clone(), test.expected_connection);
assert_eq!(
copy_table.connection.clone(),
test.expected_connection.into()
);
}
_ => unreachable!(),
}
@@ -348,7 +351,10 @@ mod tests {
Statement::Copy(crate::statements::copy::Copy::CopyTable(CopyTable::To(
copy_table,
))) => {
assert_eq!(copy_table.connection.clone(), test.expected_connection);
assert_eq!(
copy_table.connection.clone(),
test.expected_connection.into()
);
}
_ => unreachable!(),
}
@@ -374,7 +380,7 @@ mod tests {
[("format".to_string(), "parquet".to_string())]
.into_iter()
.collect::<HashMap<_, _>>(),
stmt.with
stmt.with.map
);
assert_eq!(
@@ -384,7 +390,7 @@ mod tests {
]
.into_iter()
.collect::<HashMap<_, _>>(),
stmt.connection
stmt.connection.map
);
}
}

View File

@@ -37,7 +37,9 @@ use crate::statements::create::{
CreateDatabase, CreateExternalTable, CreateTable, PartitionEntry, Partitions, TIME_INDEX,
};
use crate::statements::statement::Statement;
use crate::statements::{sql_data_type_to_concrete_data_type, sql_value_to_value};
use crate::statements::{
get_data_type_by_alias_name, sql_data_type_to_concrete_data_type, sql_value_to_value,
};
use crate::util::parse_option_string;
pub const ENGINE: &str = "ENGINE";
@@ -106,7 +108,7 @@ impl<'a> ParserContext<'a> {
name: table_name,
columns,
constraints,
options,
options: options.into(),
if_not_exists,
engine,
}))
@@ -374,8 +376,11 @@ impl<'a> ParserContext<'a> {
msg: "time index column can't be null",
}
);
// The timestamp type may be an alias type, we have to retrieve the actual type.
let data_type = get_real_timestamp_type(&column.data_type);
ensure!(
matches!(column.data_type, DataType::Timestamp(_, _)),
matches!(data_type, DataType::Timestamp(_, _)),
InvalidColumnOptionSnafu {
name: column.name.to_string(),
msg: "time index column data type should be timestamp",
@@ -653,8 +658,9 @@ fn validate_time_index(create_table: &CreateTable) -> Result<()> {
),
})?;
let time_index_data_type = get_real_timestamp_type(&time_index_column.data_type);
ensure!(
matches!(time_index_column.data_type, DataType::Timestamp(_, _)),
matches!(time_index_data_type, DataType::Timestamp(_, _)),
InvalidColumnOptionSnafu {
name: time_index_column.name.to_string(),
msg: "time index column data type should be timestamp",
@@ -664,6 +670,19 @@ fn validate_time_index(create_table: &CreateTable) -> Result<()> {
Ok(())
}
fn get_real_timestamp_type(data_type: &DataType) -> DataType {
match data_type {
DataType::Custom(name, tokens) if name.0.len() == 1 && tokens.is_empty() => {
if let Some(real_type) = get_data_type_by_alias_name(name.0[0].value.as_str()) {
real_type
} else {
data_type.clone()
}
}
_ => data_type.clone(),
}
}
fn validate_partitions(columns: &[ColumnDef], partitions: &Partitions) -> Result<()> {
let partition_columns = ensure_partition_columns_defined(columns, partitions)?;
@@ -881,7 +900,7 @@ mod tests {
match &stmts[0] {
Statement::CreateExternalTable(c) => {
assert_eq!(c.name.to_string(), test.expected_table_name.to_string());
assert_eq!(c.options, test.expected_options);
assert_eq!(c.options, test.expected_options.into());
assert_eq!(c.if_not_exists, test.expected_if_not_exist);
assert_eq!(c.engine, test.expected_engine);
}
@@ -895,7 +914,7 @@ mod tests {
let sql = "CREATE EXTERNAL TABLE city (
host string,
ts int64,
cpu float64 default 0,
cpu float32 default 0,
memory float64,
TIME INDEX (ts),
PRIMARY KEY(ts, host)
@@ -911,13 +930,13 @@ mod tests {
match &stmts[0] {
Statement::CreateExternalTable(c) => {
assert_eq!(c.name.to_string(), "city");
assert_eq!(c.options, options);
assert_eq!(c.options, options.into());
let columns = &c.columns;
assert_column_def(&columns[0], "host", "STRING");
assert_column_def(&columns[1], "ts", "int64");
assert_column_def(&columns[2], "cpu", "float64");
assert_column_def(&columns[3], "memory", "float64");
assert_column_def(&columns[1], "ts", "BIGINT");
assert_column_def(&columns[2], "cpu", "FLOAT");
assert_column_def(&columns[3], "memory", "DOUBLE");
let constraints = &c.constraints;
assert_matches!(
@@ -1423,7 +1442,7 @@ ENGINE=mito";
let sql = r"create table demo(
host string,
ts timestamp,
cpu float64 default 0,
cpu float32 default 0,
memory float64,
TIME INDEX (ts),
PRIMARY KEY(ts, host)) engine=mito
@@ -1440,8 +1459,9 @@ ENGINE=mito";
let columns = &c.columns;
assert_column_def(&columns[0], "host", "STRING");
assert_column_def(&columns[1], "ts", "TIMESTAMP");
assert_column_def(&columns[2], "cpu", "float64");
assert_column_def(&columns[3], "memory", "float64");
assert_column_def(&columns[2], "cpu", "FLOAT");
assert_column_def(&columns[3], "memory", "DOUBLE");
let constraints = &c.constraints;
assert_matches!(
&constraints[0],

View File

@@ -20,10 +20,12 @@ pub mod describe;
pub mod drop;
pub mod explain;
pub mod insert;
mod option_map;
pub mod query;
pub mod show;
pub mod statement;
pub mod tql;
mod transform;
pub mod truncate;
use std::str::FromStr;
@@ -38,7 +40,9 @@ use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, COMMENT_KEY};
use datatypes::types::TimestampType;
use datatypes::value::{OrderedF32, OrderedF64, Value};
pub use option_map::OptionMap;
use snafu::{ensure, OptionExt, ResultExt};
pub use transform::{get_data_type_by_alias_name, transform_statements};
use crate::ast::{
ColumnDef, ColumnOption, ColumnOptionDef, DataType as SqlDataType, Expr, TimezoneInfo,

View File

@@ -14,8 +14,9 @@
use common_query::AddColumnLocation;
use sqlparser::ast::{ColumnDef, Ident, ObjectName, TableConstraint};
use sqlparser_derive::{Visit, VisitMut};
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct AlterTable {
table_name: ObjectName,
alter_operation: AlterTableOperation,
@@ -38,7 +39,7 @@ impl AlterTable {
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub enum AlterTableOperation {
/// `ADD <table_constraint>`
AddConstraint(TableConstraint),

View File

@@ -12,35 +12,36 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use sqlparser::ast::ObjectName;
use sqlparser_derive::{Visit, VisitMut};
#[derive(Debug, Clone, PartialEq, Eq)]
use crate::statements::OptionMap;
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub enum Copy {
CopyTable(CopyTable),
CopyDatabase(CopyDatabaseArgument),
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub enum CopyTable {
To(CopyTableArgument),
From(CopyTableArgument),
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct CopyDatabaseArgument {
pub database_name: ObjectName,
pub with: HashMap<String, String>,
pub connection: HashMap<String, String>,
pub with: OptionMap,
pub connection: OptionMap,
pub location: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct CopyTableArgument {
pub table_name: ObjectName,
pub with: HashMap<String, String>,
pub connection: HashMap<String, String>,
pub with: OptionMap,
pub connection: OptionMap,
/// Copy tbl [To|From] 'location'.
pub location: String,
}

View File

@@ -12,13 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use common_catalog::consts::FILE_ENGINE;
use itertools::Itertools;
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{ColumnDef, Ident, ObjectName, SqlOption, TableConstraint, Value as SqlValue};
use crate::statements::OptionMap;
const LINE_SEP: &str = ",\n";
const COMMA_SEP: &str = ", ";
@@ -57,7 +58,7 @@ pub fn is_time_index(constraint: &TableConstraint) -> bool {
} if name.value == TIME_INDEX)
}
#[derive(Debug, PartialEq, Eq, Clone)]
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
pub struct CreateTable {
/// Create if not exists
pub if_not_exists: bool,
@@ -124,7 +125,7 @@ impl CreateTable {
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
pub struct Partitions {
pub column_list: Vec<Ident>,
pub entries: Vec<PartitionEntry>,
@@ -139,7 +140,7 @@ impl Partitions {
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
pub struct PartitionEntry {
pub name: Ident,
pub value_list: Vec<SqlValue>,
@@ -197,14 +198,14 @@ impl Display for CreateTable {
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
pub struct CreateDatabase {
pub name: ObjectName,
/// Create if not exists
pub if_not_exists: bool,
}
#[derive(Debug, PartialEq, Eq, Clone)]
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
pub struct CreateExternalTable {
/// Table name
pub name: ObjectName,
@@ -212,7 +213,7 @@ pub struct CreateExternalTable {
pub constraints: Vec<TableConstraint>,
/// Table options in `WITH`.
/// All keys are lowercase.
pub options: HashMap<String, String>,
pub options: OptionMap,
pub if_not_exists: bool,
pub engine: String,
}

View File

@@ -13,8 +13,9 @@
// limitations under the License.
use sqlparser::ast::Statement;
use sqlparser_derive::{Visit, VisitMut};
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct Delete {
pub inner: Statement,
}

View File

@@ -13,9 +13,10 @@
// limitations under the License.
use sqlparser::ast::ObjectName;
use sqlparser_derive::{Visit, VisitMut};
/// SQL structure for `DESCRIBE TABLE`.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct DescribeTable {
name: ObjectName,
}

View File

@@ -13,9 +13,10 @@
// limitations under the License.
use sqlparser::ast::ObjectName;
use sqlparser_derive::{Visit, VisitMut};
/// DROP TABLE statement.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct DropTable {
table_name: ObjectName,
}

View File

@@ -13,11 +13,12 @@
// limitations under the License.
use sqlparser::ast::Statement as SpStatement;
use sqlparser_derive::{Visit, VisitMut};
use crate::error::Error;
/// Explain statement.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct Explain {
pub inner: SpStatement,
}

View File

@@ -13,12 +13,13 @@
// limitations under the License.
use sqlparser::ast::{ObjectName, Query, SetExpr, Statement, UnaryOperator, Values};
use sqlparser::parser::ParserError;
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{Expr, Value};
use crate::error::Result;
use crate::statements::query::Query as GtQuery;
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct Insert {
// Can only be sqlparser::ast::Statement::Insert variant
pub inner: Statement,

View File

@@ -0,0 +1,63 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
mod visit;
mod visit_mut;
use std::borrow::Borrow;
use std::collections::HashMap;
use std::iter::FromIterator;
/// Options hashmap.
/// Because the trait `Visit` and `VisitMut` is not implemented for `HashMap<String, String>`, we have to wrap it and implement them by ourself.
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct OptionMap {
pub map: HashMap<String, String>,
}
impl OptionMap {
pub fn insert(&mut self, k: String, v: String) {
self.map.insert(k, v);
}
pub fn get(&self, k: &str) -> Option<&String> {
self.map.get(k)
}
}
impl From<HashMap<String, String>> for OptionMap {
fn from(map: HashMap<String, String>) -> Self {
Self { map }
}
}
impl AsRef<HashMap<String, String>> for OptionMap {
fn as_ref(&self) -> &HashMap<String, String> {
&self.map
}
}
impl Borrow<HashMap<String, String>> for OptionMap {
fn borrow(&self) -> &HashMap<String, String> {
&self.map
}
}
impl FromIterator<(String, String)> for OptionMap {
fn from_iter<I: IntoIterator<Item = (String, String)>>(iter: I) -> Self {
Self {
map: iter.into_iter().collect(),
}
}
}

View File

@@ -0,0 +1,29 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::ops::ControlFlow;
use sqlparser::ast::{Visit, Visitor};
use crate::statements::OptionMap;
impl Visit for OptionMap {
fn visit<V: Visitor>(&self, visitor: &mut V) -> ControlFlow<V::Break> {
for (k, v) in &self.map {
k.visit(visitor)?;
v.visit(visitor)?;
}
ControlFlow::Continue(())
}
}

View File

@@ -0,0 +1,28 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::ops::ControlFlow;
use sqlparser::ast::{VisitMut, VisitorMut};
use crate::statements::OptionMap;
impl VisitMut for OptionMap {
fn visit<V: VisitorMut>(&mut self, visitor: &mut V) -> ControlFlow<V::Break> {
for (_, v) in self.map.iter_mut() {
v.visit(visitor)?;
}
ControlFlow::Continue(())
}
}

View File

@@ -15,11 +15,12 @@
use std::fmt;
use sqlparser::ast::Query as SpQuery;
use sqlparser_derive::{Visit, VisitMut};
use crate::error::Error;
/// Query statement instance.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct Query {
pub inner: SpQuery,
}

View File

@@ -14,10 +14,12 @@
use std::fmt;
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{Expr, Ident, ObjectName};
/// Show kind for SQL expressions like `SHOW DATABASE` or `SHOW TABLE`
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub enum ShowKind {
All,
Like(Ident),
@@ -35,7 +37,7 @@ impl fmt::Display for ShowKind {
}
/// SQL structure for `SHOW DATABASES`.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct ShowDatabases {
pub kind: ShowKind,
}
@@ -48,14 +50,14 @@ impl ShowDatabases {
}
/// SQL structure for `SHOW TABLES`.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct ShowTables {
pub kind: ShowKind,
pub database: Option<String>,
}
/// SQL structure for `SHOW CREATE TABLE`.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct ShowCreateTable {
pub table_name: ObjectName,
}

View File

@@ -14,6 +14,7 @@
use datafusion_sql::parser::Statement as DfStatement;
use sqlparser::ast::Statement as SpStatement;
use sqlparser_derive::{Visit, VisitMut};
use crate::error::{ConvertToDfStatementSnafu, Error};
use crate::statements::alter::AlterTable;
@@ -30,7 +31,7 @@ use crate::statements::truncate::TruncateTable;
/// Tokens parsed by `DFParser` are converted into these values.
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub enum Statement {
// Query
Query(Box<Query>),

View File

@@ -11,14 +11,16 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#[derive(Debug, Clone, PartialEq, Eq)]
use sqlparser_derive::{Visit, VisitMut};
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub enum Tql {
Eval(TqlEval),
Explain(TqlExplain),
Analyze(TqlAnalyze),
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct TqlEval {
pub start: String,
pub end: String,
@@ -27,7 +29,7 @@ pub struct TqlEval {
}
/// TQL EXPLAIN (like SQL EXPLAIN): doesn't execute the query but tells how the query would be executed.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct TqlExplain {
pub start: String,
pub end: String,
@@ -36,7 +38,7 @@ pub struct TqlExplain {
}
/// TQL ANALYZE (like SQL ANALYZE): executes the plan and tells the detailed per-step execution time.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct TqlAnalyze {
pub start: String,
pub end: String,

View File

@@ -0,0 +1,64 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::ops::ControlFlow;
use std::sync::Arc;
use lazy_static::lazy_static;
use sqlparser::ast::{visit_expressions_mut, Expr};
use crate::error::Result;
use crate::statements::statement::Statement;
mod type_alias;
pub use type_alias::get_data_type_by_alias_name;
use type_alias::TypeAliasTransformRule;
lazy_static! {
/// [TransformRule] registry
static ref RULES: Vec<Arc<dyn TransformRule>> = vec![
Arc::new(TypeAliasTransformRule{}),
];
}
/// Transform rule to transform statement or expr
pub(crate) trait TransformRule: Send + Sync {
/// Visit a [Statement]
fn visit_statement(&self, _stmt: &mut Statement) -> Result<()> {
Ok(())
}
/// Visit an [Expr]
fn visit_expr(&self, _expr: &mut Expr) -> ControlFlow<()> {
ControlFlow::<()>::Continue(())
}
}
/// Transform statements by rules
pub fn transform_statements(stmts: &mut Vec<Statement>) -> Result<()> {
for stmt in &mut *stmts {
for rule in RULES.iter() {
rule.visit_statement(stmt)?;
}
}
visit_expressions_mut(stmts, |expr| {
for rule in RULES.iter() {
rule.visit_expr(expr)?;
}
ControlFlow::<()>::Continue(())
});
Ok(())
}

View File

@@ -0,0 +1,353 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::ops::ControlFlow;
use datatypes::data_type::DataType as GreptimeDataType;
use sqlparser::ast::{
ColumnDef, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName, Value,
};
use crate::error::Result;
use crate::statements::create::{CreateExternalTable, CreateTable};
use crate::statements::statement::Statement;
use crate::statements::transform::TransformRule;
use crate::statements::{sql_data_type_to_concrete_data_type, TimezoneInfo};
/// SQL data type alias transformer:
/// - `TimestampSecond`, `Timestamp_s`, `Timestamp_sec` for `Timestamp(0)`.
/// - `TimestampMillisecond`, `Timestamp_ms` for `Timestamp(3)`.
/// - `TimestampMicrosecond`, `Timestamp_us` for `Timestamp(6)`.
/// - `TimestampNanosecond`, `Timestamp_ns` for `Timestamp(9)`.
/// - `INT8` for `tinyint`
/// - `INT16` for `smallint`
/// - `INT32` for `int`
/// - `INT32` for `bigint`
/// - And `UINT8`, `UINT16` etc. for `UnsignedTinyint` etc.
pub(crate) struct TypeAliasTransformRule;
impl TransformRule for TypeAliasTransformRule {
fn visit_statement(&self, stmt: &mut Statement) -> Result<()> {
match stmt {
Statement::CreateTable(CreateTable { columns, .. }) => {
columns
.iter_mut()
.for_each(|ColumnDef { data_type, .. }| replace_type_alias(data_type));
}
Statement::CreateExternalTable(CreateExternalTable { columns, .. }) => {
columns
.iter_mut()
.for_each(|ColumnDef { data_type, .. }| replace_type_alias(data_type));
}
_ => {}
}
Ok(())
}
fn visit_expr(&self, expr: &mut Expr) -> ControlFlow<()> {
match expr {
// Type alias
Expr::Cast {
data_type: DataType::Custom(name, tokens),
expr: cast_expr,
} if name.0.len() == 1 && tokens.is_empty() => {
if let Some(new_type) = get_data_type_by_alias_name(name.0[0].value.as_str()) {
if let Ok(concrete_type) = sql_data_type_to_concrete_data_type(&new_type) {
let new_type = concrete_type.as_arrow_type();
*expr = Expr::Function(Function {
name: ObjectName(vec![Ident::new("arrow_cast")]),
args: vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr((**cast_expr).clone())),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString(new_type.to_string()),
))),
],
over: None,
distinct: false,
special: false,
order_by: vec![],
});
}
}
}
// Timestamp(precision) in cast, datafusion doesn't support Timestamp(9) etc.
// We have to transform it into arrow_cast(expr, type).
Expr::Cast {
data_type: DataType::Timestamp(precision, zone),
expr: cast_expr,
} => {
if let Ok(concrete_type) =
sql_data_type_to_concrete_data_type(&DataType::Timestamp(*precision, *zone))
{
let new_type = concrete_type.as_arrow_type();
*expr = Expr::Function(Function {
name: ObjectName(vec![Ident::new("arrow_cast")]),
args: vec![
FunctionArg::Unnamed(FunctionArgExpr::Expr((**cast_expr).clone())),
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
Value::SingleQuotedString(new_type.to_string()),
))),
],
over: None,
distinct: false,
special: false,
order_by: vec![],
});
}
}
// TODO(dennis): supports try_cast
_ => {}
}
ControlFlow::<()>::Continue(())
}
}
fn replace_type_alias(data_type: &mut DataType) {
match data_type {
// TODO(dennis): The sqlparser latest version contains the Int8 alias for postres Bigint.
// Which means 8 bytes in postgres (not 8 bits). If we upgrade the sqlparser, need to process it.
// See https://docs.rs/sqlparser/latest/sqlparser/ast/enum.DataType.html#variant.Int8
DataType::Custom(name, tokens) if name.0.len() == 1 && tokens.is_empty() => {
if let Some(new_type) = get_data_type_by_alias_name(name.0[0].value.as_str()) {
*data_type = new_type;
}
}
_ => {}
}
}
pub fn get_data_type_by_alias_name(name: &str) -> Option<DataType> {
match name.to_uppercase().as_ref() {
// Timestamp type alias
"TIMESTAMP_S" | "TIMESTAMP_SEC" | "TIMESTAMPSECOND" => {
Some(DataType::Timestamp(Some(0), TimezoneInfo::None))
}
"TIMESTAMP_MS" | "TIMESTAMPMILLISECOND" => {
Some(DataType::Timestamp(Some(3), TimezoneInfo::None))
}
"TIMESTAMP_US" | "TIMESTAMPMICROSECOND" => {
Some(DataType::Timestamp(Some(6), TimezoneInfo::None))
}
"TIMESTAMP_NS" | "TIMESTAMPNANOSECOND" => {
Some(DataType::Timestamp(Some(9), TimezoneInfo::None))
}
// Number type alias
"INT8" => Some(DataType::TinyInt(None)),
"INT16" => Some(DataType::SmallInt(None)),
"INT32" => Some(DataType::Int(None)),
"INT64" => Some(DataType::BigInt(None)),
"UINT8" => Some(DataType::UnsignedTinyInt(None)),
"UINT16" => Some(DataType::UnsignedSmallInt(None)),
"UINT32" => Some(DataType::UnsignedInt(None)),
"UINT64" => Some(DataType::UnsignedBigInt(None)),
"FLOAT32" => Some(DataType::Float(None)),
"FLOAT64" => Some(DataType::Double),
_ => None,
}
}
#[cfg(test)]
mod tests {
use sqlparser::dialect::GenericDialect;
use super::*;
use crate::parser::ParserContext;
use crate::statements::transform_statements;
#[test]
fn test_get_data_type_by_alias_name() {
assert_eq!(
get_data_type_by_alias_name("float64"),
Some(DataType::Double)
);
assert_eq!(
get_data_type_by_alias_name("Float64"),
Some(DataType::Double)
);
assert_eq!(
get_data_type_by_alias_name("FLOAT64"),
Some(DataType::Double)
);
assert_eq!(
get_data_type_by_alias_name("float32"),
Some(DataType::Float(None))
);
assert_eq!(
get_data_type_by_alias_name("int8"),
Some(DataType::TinyInt(None))
);
assert_eq!(
get_data_type_by_alias_name("INT16"),
Some(DataType::SmallInt(None))
);
assert_eq!(
get_data_type_by_alias_name("INT32"),
Some(DataType::Int(None))
);
assert_eq!(
get_data_type_by_alias_name("INT64"),
Some(DataType::BigInt(None))
);
assert_eq!(
get_data_type_by_alias_name("Uint8"),
Some(DataType::UnsignedTinyInt(None))
);
assert_eq!(
get_data_type_by_alias_name("UINT16"),
Some(DataType::UnsignedSmallInt(None))
);
assert_eq!(
get_data_type_by_alias_name("UINT32"),
Some(DataType::UnsignedInt(None))
);
assert_eq!(
get_data_type_by_alias_name("uint64"),
Some(DataType::UnsignedBigInt(None))
);
assert_eq!(
get_data_type_by_alias_name("TimestampSecond"),
Some(DataType::Timestamp(Some(0), TimezoneInfo::None))
);
assert_eq!(
get_data_type_by_alias_name("Timestamp_s"),
Some(DataType::Timestamp(Some(0), TimezoneInfo::None))
);
assert_eq!(
get_data_type_by_alias_name("Timestamp_sec"),
Some(DataType::Timestamp(Some(0), TimezoneInfo::None))
);
assert_eq!(
get_data_type_by_alias_name("TimestampMilliSecond"),
Some(DataType::Timestamp(Some(3), TimezoneInfo::None))
);
assert_eq!(
get_data_type_by_alias_name("Timestamp_ms"),
Some(DataType::Timestamp(Some(3), TimezoneInfo::None))
);
assert_eq!(
get_data_type_by_alias_name("TimestampMicroSecond"),
Some(DataType::Timestamp(Some(6), TimezoneInfo::None))
);
assert_eq!(
get_data_type_by_alias_name("Timestamp_us"),
Some(DataType::Timestamp(Some(6), TimezoneInfo::None))
);
assert_eq!(
get_data_type_by_alias_name("TimestampNanoSecond"),
Some(DataType::Timestamp(Some(9), TimezoneInfo::None))
);
assert_eq!(
get_data_type_by_alias_name("Timestamp_ns"),
Some(DataType::Timestamp(Some(9), TimezoneInfo::None))
);
}
fn test_timestamp_alias(alias: &str, expected: &str) {
let sql = format!("SELECT TIMESTAMP '2020-01-01 01:23:45.12345678'::{alias}");
let mut stmts = ParserContext::create_with_dialect(&sql, &GenericDialect {}).unwrap();
transform_statements(&mut stmts).unwrap();
match &stmts[0] {
Statement::Query(q) => assert_eq!(format!("SELECT arrow_cast(TIMESTAMP '2020-01-01 01:23:45.12345678', 'Timestamp({expected}, None)')"), q.to_string()),
_ => unreachable!(),
}
}
fn test_timestamp_precision_type(precision: i32, expected: &str) {
test_timestamp_alias(&format!("Timestamp({precision})"), expected);
}
#[test]
fn test_transform_timestamp_alias() {
// Timestamp[Second | Millisecond | Microsecond | Nanosecond]
test_timestamp_alias("TimestampSecond", "Second");
test_timestamp_alias("Timestamp_s", "Second");
test_timestamp_alias("TimestampMillisecond", "Millisecond");
test_timestamp_alias("Timestamp_ms", "Millisecond");
test_timestamp_alias("TimestampMicrosecond", "Microsecond");
test_timestamp_alias("Timestamp_us", "Microsecond");
test_timestamp_alias("TimestampNanosecond", "Nanosecond");
test_timestamp_alias("Timestamp_ns", "Nanosecond");
// Timestamp(precision)
test_timestamp_precision_type(0, "Second");
test_timestamp_precision_type(3, "Millisecond");
test_timestamp_precision_type(6, "Microsecond");
test_timestamp_precision_type(9, "Nanosecond");
}
#[test]
fn test_create_sql_with_type_alias() {
let sql = r#"
CREATE TABLE data_types (
s string,
tint int8,
sint int16,
i int32,
bint int64,
v varchar,
f float32,
d float64,
b boolean,
vb varbinary,
dt date,
dtt datetime,
ts0 TimestampSecond,
ts3 TimestampMillisecond,
ts6 TimestampMicrosecond,
ts9 TimestampNanosecond DEFAULT CURRENT_TIMESTAMP TIME INDEX,
PRIMARY KEY(s));"#;
let mut stmts = ParserContext::create_with_dialect(sql, &GenericDialect {}).unwrap();
transform_statements(&mut stmts).unwrap();
match &stmts[0] {
Statement::CreateTable(c) => {
let expected = r#"CREATE TABLE data_types (
s STRING,
tint TINYINT,
sint SMALLINT,
i INT,
bint BIGINT,
v VARCHAR,
f FLOAT,
d DOUBLE,
b BOOLEAN,
vb VARBINARY,
dt DATE,
dtt DATETIME,
ts0 TIMESTAMP(0),
ts3 TIMESTAMP(3),
ts6 TIMESTAMP(6),
ts9 TIMESTAMP(9) DEFAULT CURRENT_TIMESTAMP() NOT NULL,
TIME INDEX (ts9),
PRIMARY KEY (s)
)
ENGINE=mito
"#;
assert_eq!(expected, c.to_string());
}
_ => unreachable!(),
}
}
}

View File

@@ -13,9 +13,10 @@
// limitations under the License.
use sqlparser::ast::ObjectName;
use sqlparser_derive::{Visit, VisitMut};
/// TRUNCATE TABLE statement.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)]
pub struct TruncateTable {
table_name: ObjectName,
}