From cb74f1ac34ba9586c8cdce8ca3fbb2f8a39aa831 Mon Sep 17 00:00:00 2001 From: "Lei, Huang" Date: Mon, 25 Apr 2022 17:01:55 +0800 Subject: [PATCH] feat: Add sql parser definition and trivial SHOW DATABASE implementation (#8) * feat: Add SQL parser definition and SHOW DATABASE implementation * chores: Eliminate clippy warnings and errors. * chores: remove Gt prefix in some structs; rename some mod; remove print(s) in unit tests; refactor crate layout; feat: wrap sqlparser error; * chores: reorder cargo dependency * chores: fix code style * chores: add #[cfg(test)] to unit tests * style: fix test mod style --- .gitignore | 8 + Cargo.lock | 2 + src/query/src/lib.rs | 8 +- src/sql/Cargo.toml | 4 +- src/sql/src/dialect.rs | 1 + src/sql/src/errors.rs | 20 ++ src/sql/src/lib.rs | 9 +- src/sql/src/parser.rs | 223 ++++++++++++++++++ src/sql/src/statements/mod.rs | 3 + src/sql/src/statements/show_kind.rs | 10 + src/sql/src/statements/statement.rs | 18 ++ .../src/statements/statement_show_database.rs | 14 ++ 12 files changed, 314 insertions(+), 6 deletions(-) create mode 100644 src/sql/src/dialect.rs create mode 100644 src/sql/src/errors.rs create mode 100644 src/sql/src/statements/mod.rs create mode 100644 src/sql/src/statements/show_kind.rs create mode 100644 src/sql/src/statements/statement.rs create mode 100644 src/sql/src/statements/statement_show_database.rs diff --git a/.gitignore b/.gitignore index 9aa15774b4..65f4835c0e 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,11 @@ # These are backup files generated by rustfmt **/*.rs.bk + +debug/ + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + +# JetBrains IDE config directory +.idea/ \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 64e3fa91ca..9cade898fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -118,6 +118,8 @@ dependencies = [ name = "sql" version = "0.1.0" dependencies = [ + "query", + "snafu", "sqlparser", ] diff --git a/src/query/src/lib.rs b/src/query/src/lib.rs index c7de888be0..7af252fa23 100644 --- a/src/query/src/lib.rs +++ b/src/query/src/lib.rs @@ -1,4 +1,4 @@ -mod executor; -mod logical_optimizer; -mod physical_optimizer; -mod physical_planner; +pub mod executor; +pub mod logical_optimizer; +pub mod physical_optimizer; +pub mod physical_planner; diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml index 01cdc0da6d..70bd188d09 100644 --- a/src/sql/Cargo.toml +++ b/src/sql/Cargo.toml @@ -6,4 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -sqlparser = "0.16.0" +query = { path = "../query" } +snafu = "0.7.0" +sqlparser = "0.16.0" \ No newline at end of file diff --git a/src/sql/src/dialect.rs b/src/sql/src/dialect.rs new file mode 100644 index 0000000000..7bfbbf52b9 --- /dev/null +++ b/src/sql/src/dialect.rs @@ -0,0 +1 @@ +// todo(hl) wrap sqlparser dialects diff --git a/src/sql/src/errors.rs b/src/sql/src/errors.rs new file mode 100644 index 0000000000..621f9efdc5 --- /dev/null +++ b/src/sql/src/errors.rs @@ -0,0 +1,20 @@ +use snafu::prelude::*; +use sqlparser::parser::ParserError as SpParserError; + +/// SQL parser errors. +#[derive(Debug, Snafu)] +#[snafu(visibility(pub(crate)))] +pub enum ParserError { + #[snafu(display("SQL statement is not supported: {sql}"))] + Unsupported { sql: String }, + + #[snafu(display( + "Unexpected token while parsing SQL statement: {sql}, expected: {expected}, found: {actual}" + ))] + Unexpected { + sql: String, + expected: String, + actual: String, + source: SpParserError, + }, +} diff --git a/src/sql/src/lib.rs b/src/sql/src/lib.rs index c289ce327f..8d9671e9e2 100644 --- a/src/sql/src/lib.rs +++ b/src/sql/src/lib.rs @@ -1,3 +1,10 @@ +#![feature(assert_matches)] + +extern crate core; + mod ast; -mod parser; +mod dialect; +mod errors; +pub mod parser; mod planner; +mod statements; diff --git a/src/sql/src/parser.rs b/src/sql/src/parser.rs index 8b13789179..963e21d3c0 100644 --- a/src/sql/src/parser.rs +++ b/src/sql/src/parser.rs @@ -1 +1,224 @@ +use snafu::ResultExt; +use sqlparser::dialect::Dialect; +use sqlparser::keywords::Keyword; +use sqlparser::parser::Parser; +use sqlparser::tokenizer::{Token, Tokenizer}; +use crate::errors; +use crate::statements::show_kind::ShowKind; +use crate::statements::statement::Statement; +use crate::statements::statement_show_database::SqlShowDatabase; + +/// GrepTime SQL parser context, a simple wrapper for Datafusion SQL parser. +pub struct ParserContext<'a> { + pub(crate) parser: Parser<'a>, + pub(crate) sql: &'a str, +} + +impl<'a> ParserContext<'a> { + /// Parses SQL with given dialect + pub fn create_with_dialect( + sql: &'a str, + dialect: &dyn Dialect, + ) -> Result, errors::ParserError> { + let mut stmts: Vec = Vec::new(); + let mut tokenizer = Tokenizer::new(dialect, sql); + + let tokens: Vec = tokenizer.tokenize().unwrap(); + + let mut parser_ctx = ParserContext { + sql, + parser: Parser::new(tokens, dialect), + }; + + let mut expecting_statement_delimiter = false; + loop { + // ignore empty statements (between successive statement delimiters) + while parser_ctx.parser.consume_token(&Token::SemiColon) { + expecting_statement_delimiter = false; + } + + if parser_ctx.parser.peek_token() == Token::EOF { + break; + } + if expecting_statement_delimiter { + return parser_ctx.unsupported(); + } + + let statement = parser_ctx.parse_statement()?; + stmts.push(statement); + expecting_statement_delimiter = true; + } + + Ok(stmts) + } + + /// Parses parser context to a set of statements. + pub fn parse_statement(&mut self) -> Result { + match self.parser.peek_token() { + Token::Word(w) => { + match w.keyword { + Keyword::CREATE => { + self.parser.next_token(); + self.parse_create() + } + + Keyword::EXPLAIN => { + self.parser.next_token(); + self.parse_explain() + } + + Keyword::SHOW => { + self.parser.next_token(); + self.parse_show() + } + + Keyword::INSERT => self.parse_insert(), + + Keyword::SELECT | Keyword::WITH | Keyword::VALUES => self.parse_query(), + + // todo(hl) support more statements. + _ => self.unsupported(), + } + } + Token::LParen => self.parse_query(), + _ => self.unsupported(), + } + } + + /// Raises an "unsupported statement" error. + pub fn unsupported(&self) -> Result { + Err(errors::ParserError::Unsupported { + sql: self.sql.to_string(), + }) + } + + /// Parses SHOW statements + /// todo(hl) support `show table`/`show settings`/`show create`/`show users` ect. + fn parse_show(&mut self) -> Result { + if self.consume_token("DATABASES") || self.consume_token("SCHEMAS") { + Ok(self.parse_show_databases()?) + } else { + self.unsupported() + } + } + + fn parse_explain(&mut self) -> Result { + todo!() + } + + fn parse_insert(&mut self) -> Result { + todo!() + } + + fn parse_query(&mut self) -> Result { + todo!() + } + + fn parse_create(&mut self) -> Result { + todo!() + } + + pub fn consume_token(&mut self, expected: &str) -> bool { + if self.parser.peek_token().to_string().to_uppercase() == *expected.to_uppercase() { + self.parser.next_token(); + true + } else { + false + } + } + + /// Parses `SHOW DATABASES` statement. + pub fn parse_show_databases(&mut self) -> Result { + let tok = self.parser.next_token(); + match &tok { + Token::EOF | Token::SemiColon => Ok(Statement::ShowDatabases(SqlShowDatabase::new( + ShowKind::All, + ))), + Token::Word(w) => match w.keyword { + Keyword::LIKE => Ok(Statement::ShowDatabases(SqlShowDatabase::new( + ShowKind::Like( + self.parser + .parse_identifier() + .context(errors::UnexpectedSnafu { + sql: self.sql, + expected: "LIKE", + actual: tok.to_string(), + }) + .unwrap(), + ), + ))), + Keyword::WHERE => Ok(Statement::ShowDatabases(SqlShowDatabase::new( + ShowKind::Where(self.parser.parse_expr().context(errors::UnexpectedSnafu { + sql: self.sql.to_string(), + expected: "some valid expression".to_string(), + actual: self.parser.peek_token().to_string(), + })?), + ))), + _ => self.unsupported(), + }, + _ => self.unsupported(), + } + } +} + +#[cfg(test)] +mod tests { + use std::assert_matches::assert_matches; + + use sqlparser::dialect::GenericDialect; + + use super::*; + + #[test] + pub fn test_show_database_all() { + let sql = "SHOW DATABASES"; + let result = ParserContext::create_with_dialect(sql, &GenericDialect {}); + let stmts = result.unwrap(); + assert_eq!(1, stmts.len()); + + assert_matches!( + &stmts[0], + Statement::ShowDatabases(SqlShowDatabase { + kind: ShowKind::All + }) + ); + } + + #[test] + pub fn test_show_database_like() { + let sql = "SHOW DATABASES LIKE test_database"; + let result = ParserContext::create_with_dialect(sql, &GenericDialect {}); + let stmts = result.unwrap(); + assert_eq!(1, stmts.len()); + + assert_matches!( + &stmts[0], + Statement::ShowDatabases(SqlShowDatabase { + kind: ShowKind::Like(sqlparser::ast::Ident { + value: _, + quote_style: None, + }) + }) + ); + } + + #[test] + pub fn test_show_database_where() { + let sql = "SHOW DATABASES WHERE Database LIKE '%whatever1%' OR Database LIKE '%whatever2%'"; + let result = ParserContext::create_with_dialect(sql, &GenericDialect {}); + let stmts = result.unwrap(); + assert_eq!(1, stmts.len()); + + assert_matches!( + &stmts[0], + Statement::ShowDatabases(SqlShowDatabase { + kind: ShowKind::Where(sqlparser::ast::Expr::BinaryOp { + left: _, + right: _, + op: sqlparser::ast::BinaryOperator::Or, + }) + }) + ); + } +} diff --git a/src/sql/src/statements/mod.rs b/src/sql/src/statements/mod.rs new file mode 100644 index 0000000000..2b2ac6579a --- /dev/null +++ b/src/sql/src/statements/mod.rs @@ -0,0 +1,3 @@ +pub mod show_kind; +pub mod statement; +pub mod statement_show_database; diff --git a/src/sql/src/statements/show_kind.rs b/src/sql/src/statements/show_kind.rs new file mode 100644 index 0000000000..228c9e83cf --- /dev/null +++ b/src/sql/src/statements/show_kind.rs @@ -0,0 +1,10 @@ +use sqlparser::ast::Expr; +use sqlparser::ast::Ident; + +/// Show kind for SQL expressions like `SHOW DATABASE` or `SHOW TABLE` +#[derive(Debug, Clone, PartialEq)] +pub enum ShowKind { + All, + Like(Ident), + Where(Expr), +} diff --git a/src/sql/src/statements/statement.rs b/src/sql/src/statements/statement.rs new file mode 100644 index 0000000000..4227e6f626 --- /dev/null +++ b/src/sql/src/statements/statement.rs @@ -0,0 +1,18 @@ +use crate::statements::statement_show_database::SqlShowDatabase; + +/// Tokens parsed by `DFParser` are converted into these values. +#[derive(Debug, Clone, PartialEq)] +pub enum Statement { + // Databases. + ShowDatabases(SqlShowDatabase), +} + +/// Comment hints from SQL. +/// It'll be enabled when using `--comment` in mysql client. +/// Eg: `SELECT * FROM system.number LIMIT 1; -- { ErrorCode 25 }` +#[derive(Debug, Clone, PartialEq)] +pub struct Hint { + pub error_code: Option, + pub comment: String, + pub prefix: String, +} diff --git a/src/sql/src/statements/statement_show_database.rs b/src/sql/src/statements/statement_show_database.rs new file mode 100644 index 0000000000..b792507e6d --- /dev/null +++ b/src/sql/src/statements/statement_show_database.rs @@ -0,0 +1,14 @@ +use crate::statements::show_kind::ShowKind; + +/// SQL structure for `SHOW DATABASES`. +#[derive(Debug, Clone, PartialEq)] +pub struct SqlShowDatabase { + pub kind: ShowKind, +} + +impl SqlShowDatabase { + /// Creates a statement for `SHOW DATABASES` + pub fn new(kind: ShowKind) -> Self { + SqlShowDatabase { kind } + } +}