From d82bc98717bc664b58daf2a102c2e5ee8627fc0f Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Sun, 13 Jul 2025 23:44:56 -0700 Subject: [PATCH] feat(parser): parse TQL in CTE position (#6456) * naive implementation Signed-off-by: Ruihang Xia * clean up Signed-off-by: Ruihang Xia * fix clippy Signed-off-by: Ruihang Xia * refactor to use existing tql parse logic Signed-off-by: Ruihang Xia * refactor display logic Signed-off-by: Ruihang Xia * refactor column list parsing logic Signed-off-by: Ruihang Xia * refactor to remove redundent check logic Signed-off-by: Ruihang Xia * set sql cte into Query Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- src/sql/src/parser.rs | 4 +- src/sql/src/parsers.rs | 1 + src/sql/src/parsers/with_tql_parser.rs | 366 +++++++++++++++++++++++++ src/sql/src/statements/query.rs | 32 ++- 4 files changed, 396 insertions(+), 7 deletions(-) create mode 100644 src/sql/src/parsers/with_tql_parser.rs diff --git a/src/sql/src/parser.rs b/src/sql/src/parser.rs index 0da97cbcaf..6535382990 100644 --- a/src/sql/src/parser.rs +++ b/src/sql/src/parser.rs @@ -151,7 +151,9 @@ impl ParserContext<'_> { Keyword::REPLACE => self.parse_replace(), - Keyword::SELECT | Keyword::WITH | Keyword::VALUES => self.parse_query(), + Keyword::SELECT | Keyword::VALUES => self.parse_query(), + + Keyword::WITH => self.parse_with_tql(), Keyword::ALTER => self.parse_alter(), diff --git a/src/sql/src/parsers.rs b/src/sql/src/parsers.rs index 26f3ae9903..2c1e080e77 100644 --- a/src/sql/src/parsers.rs +++ b/src/sql/src/parsers.rs @@ -32,3 +32,4 @@ pub(crate) mod show_parser; pub(crate) mod tql_parser; pub(crate) mod truncate_parser; pub(crate) mod utils; +pub(crate) mod with_tql_parser; diff --git a/src/sql/src/parsers/with_tql_parser.rs b/src/sql/src/parsers/with_tql_parser.rs new file mode 100644 index 0000000000..b40693becc --- /dev/null +++ b/src/sql/src/parsers/with_tql_parser.rs @@ -0,0 +1,366 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt; + +use serde::Serialize; +use snafu::ResultExt; +use sqlparser::ast::helpers::attached_token::AttachedToken; +use sqlparser::ast::{ + Cte, Ident, ObjectName, Query as SpQuery, TableAlias, TableAliasColumnDef, With, +}; +use sqlparser::keywords::Keyword; +use sqlparser::parser::IsOptional; +use sqlparser::tokenizer::Token; +use sqlparser_derive::{Visit, VisitMut}; + +use crate::dialect::GreptimeDbDialect; +use crate::error::{self, Result}; +use crate::parser::{ParseOptions, ParserContext}; +use crate::parsers::tql_parser; +use crate::statements::query::Query; +use crate::statements::statement::Statement; +use crate::statements::tql::Tql; + +/// Content of a CTE - either SQL or TQL +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] +pub enum CteContent { + Sql(Box), + Tql(Tql), +} + +/// A hybrid CTE that can contain either SQL or TQL +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] +pub struct HybridCte { + pub name: Ident, + /// Column aliases for the CTE table. Empty if not specified. + pub columns: Vec, + pub content: CteContent, +} + +/// Extended WITH clause that supports hybrid SQL/TQL CTEs +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] +pub struct HybridCteWith { + pub recursive: bool, + pub cte_tables: Vec, +} + +impl fmt::Display for HybridCteWith { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "WITH ")?; + + if self.recursive { + write!(f, "RECURSIVE ")?; + } + + for (i, cte) in self.cte_tables.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", cte.name)?; + + if !cte.columns.is_empty() { + write!(f, " (")?; + for (j, col) in cte.columns.iter().enumerate() { + if j > 0 { + write!(f, ", ")?; + } + write!(f, "{}", col)?; + } + write!(f, ")")?; + } + + write!(f, " AS (")?; + match &cte.content { + CteContent::Sql(query) => write!(f, "{}", query)?, + CteContent::Tql(tql) => write!(f, "{}", tql)?, + } + write!(f, ")")?; + } + Ok(()) + } +} + +/// Parser implementation for hybrid WITH clauses containing TQL +impl ParserContext<'_> { + /// Parse a WITH clause that may contain TQL CTEs or SQL CTEs. + pub(crate) fn parse_with_tql(&mut self) -> Result { + // Consume the WITH token + self.parser + .expect_keyword(Keyword::WITH) + .context(error::SyntaxSnafu)?; + + // Check for RECURSIVE keyword + let recursive = self.parser.parse_keyword(Keyword::RECURSIVE); + + // Parse the CTE list + let mut tql_cte_tables = Vec::new(); + let mut sql_cte_tables = Vec::new(); + + loop { + let cte = self.parse_hybrid_cte()?; + match cte.content { + CteContent::Sql(body) => sql_cte_tables.push(Cte { + alias: TableAlias { + name: cte.name, + columns: cte + .columns + .into_iter() + .map(|col| TableAliasColumnDef { + name: col.0[0].clone(), + data_type: None, + }) + .collect(), + }, + query: body, + from: None, + materialized: None, + closing_paren_token: AttachedToken::empty(), + }), + CteContent::Tql(_) => tql_cte_tables.push(cte), + } + + if !self.parser.consume_token(&Token::Comma) { + break; + } + } + + // Parse the main query + let main_query = self.parser.parse_query().context(error::SyntaxSnafu)?; + + // Convert the hybrid CTEs to a standard query with hybrid metadata + let hybrid_cte = HybridCteWith { + recursive, + cte_tables: tql_cte_tables, + }; + + // Create a Query statement with hybrid CTE metadata + let mut query = Query::try_from(*main_query)?; + query.hybrid_cte = Some(hybrid_cte); + query.inner.with = Some(With { + recursive, + cte_tables: sql_cte_tables, + with_token: AttachedToken::empty(), + }); + + Ok(Statement::Query(Box::new(query))) + } + + /// Parse a single CTE that can be either SQL or TQL + fn parse_hybrid_cte(&mut self) -> Result { + // Parse CTE name + let name = self.parser.parse_identifier().context(error::SyntaxSnafu)?; + let name = Self::canonicalize_identifier(name); + + // Parse optional column list + let columns = self + .parser + .parse_parenthesized_qualified_column_list(IsOptional::Optional, true) + .context(error::SyntaxSnafu)?; + + // Expect AS keyword + self.parser + .expect_keyword(Keyword::AS) + .context(error::SyntaxSnafu)?; + + // Parse the CTE content + self.parser + .expect_token(&Token::LParen) + .context(error::SyntaxSnafu)?; + + let content = self.parse_cte_content()?; + + self.parser + .expect_token(&Token::RParen) + .context(error::SyntaxSnafu)?; + + Ok(HybridCte { + name, + columns, + content, + }) + } + + /// Determine if CTE contains TQL or SQL and parse accordingly + fn parse_cte_content(&mut self) -> Result { + // Check if the next token is TQL + if let Token::Word(w) = &self.parser.peek_token().token { + if w.keyword == Keyword::NoKeyword + && w.quote_style.is_none() + && w.value.to_uppercase() == tql_parser::TQL + { + let tql = self.parse_tql_content_in_cte()?; + return Ok(CteContent::Tql(tql)); + } + } + + // Parse as SQL query + let sql_query = self.parser.parse_query().context(error::SyntaxSnafu)?; + Ok(CteContent::Sql(sql_query)) + } + + /// Parse TQL content within a CTE by reusing the standard TQL parser. + /// + /// This method consumes all tokens that belong to the TQL statement and + /// stops right **before** the closing `)` of the CTE so that the caller + /// can handle it normally. + /// + /// Only `TQL EVAL` is supported inside CTEs. + fn parse_tql_content_in_cte(&mut self) -> Result { + let mut collected: Vec = Vec::new(); + let mut paren_depth = 0usize; + + loop { + let token_with_span = self.parser.peek_token(); + + // Guard against unexpected EOF + if token_with_span.token == Token::EOF { + return Err(error::InvalidSqlSnafu { + msg: "Unexpected end of input while parsing TQL inside CTE".to_string(), + } + .build()); + } + + // Stop **before** the closing parenthesis that ends the CTE + if token_with_span.token == Token::RParen && paren_depth == 0 { + break; + } + + // Consume the token and push it into our buffer + let consumed = self.parser.next_token(); + match consumed.token { + Token::LParen => paren_depth += 1, + Token::RParen => { + // This RParen must belong to a nested expression since + // `paren_depth > 0` here. Decrease depth accordingly. + paren_depth = paren_depth.saturating_sub(1); + } + _ => {} + } + + collected.push(consumed.token); + } + + // Re-construct the SQL string of the isolated TQL statement. + let tql_string = collected + .iter() + .map(|tok| tok.to_string()) + .collect::>() + .join(" "); + + // Use the shared parser to turn it into a `Statement`. + let mut stmts = ParserContext::create_with_dialect( + &tql_string, + &GreptimeDbDialect {}, + ParseOptions::default(), + )?; + + if stmts.len() != 1 { + return Err(error::InvalidSqlSnafu { + msg: "Expected a single TQL statement inside CTE".to_string(), + } + .build()); + } + + match stmts.remove(0) { + Statement::Tql(Tql::Eval(eval)) => Ok(Tql::Eval(eval)), + Statement::Tql(_) => Err(error::InvalidSqlSnafu { + msg: "Only TQL EVAL is supported in CTEs".to_string(), + } + .build()), + _ => Err(error::InvalidSqlSnafu { + msg: "Expected a TQL statement inside CTE".to_string(), + } + .build()), + } + } +} + +#[cfg(test)] +mod tests { + use crate::dialect::GreptimeDbDialect; + use crate::parser::{ParseOptions, ParserContext}; + use crate::parsers::with_tql_parser::CteContent; + use crate::statements::statement::Statement; + use crate::statements::tql::Tql; + + #[test] + fn test_parse_hybrid_cte_with_parentheses_in_query() { + // Test that parentheses within the TQL query don't interfere with CTE parsing + let sql = r#" + WITH tql_cte AS ( + TQL EVAL (0, 100, '5s') + sum(rate(http_requests_total[1m])) + (max(cpu_usage) * (1 + 0.5)) + ) + SELECT * FROM tql_cte + "#; + + let statements = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + assert_eq!(statements.len(), 1); + + let Statement::Query(query) = &statements[0] else { + panic!("Expected Query statement"); + }; + let hybrid_cte = query.hybrid_cte.as_ref().unwrap(); + assert_eq!(hybrid_cte.cte_tables.len(), 1); + + // Should be TQL content + assert!(matches!( + hybrid_cte.cte_tables[0].content, + CteContent::Tql(_) + )); + + // Check that the query includes the parentheses (spaces are added by tokenizer) + if let CteContent::Tql(Tql::Eval(eval)) = &hybrid_cte.cte_tables[0].content { + // Verify that complex nested parentheses are preserved correctly + assert!(eval + .query + .contains("sum ( rate ( http_requests_total [ 1 m ] ) )")); + assert!(eval.query.contains("( max ( cpu_usage ) * ( 1 + 0.5 ) )")); + // Most importantly, verify the parentheses counting didn't break the parsing + assert!(eval.query.contains("+ ( max")); + } + } + + #[test] + fn test_parse_hybrid_cte_sql_and_tql() { + let sql = r#" + WITH + sql_cte(ts, value, label) AS (SELECT timestamp, val, name FROM metrics), + tql_cte(time, metric_value) AS (TQL EVAL (0, 100, '5s') cpu_usage) + SELECT s.ts, s.value, t.metric_value + FROM sql_cte s JOIN tql_cte t ON s.ts = t.time + "#; + + let statements = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + assert_eq!(statements.len(), 1); + + let Statement::Query(query) = &statements[0] else { + panic!("Expected Query statement"); + }; + let hybrid_cte = query.hybrid_cte.as_ref().unwrap(); + assert_eq!(hybrid_cte.cte_tables.len(), 1); // only TQL CTE presents here + + // First CTE should be TQL with column aliases + let second_cte = &hybrid_cte.cte_tables[0]; + assert!(matches!(second_cte.content, CteContent::Tql(_))); + assert_eq!(second_cte.columns.len(), 2); + assert_eq!(second_cte.columns[0].0[0].value, "time"); + assert_eq!(second_cte.columns[1].0[0].value, "metric_value"); + } +} diff --git a/src/sql/src/statements/query.rs b/src/sql/src/statements/query.rs index b5221a2263..85344bfb6d 100644 --- a/src/sql/src/statements/query.rs +++ b/src/sql/src/statements/query.rs @@ -19,19 +19,24 @@ use sqlparser::ast::Query as SpQuery; use sqlparser_derive::{Visit, VisitMut}; use crate::error::Error; +use crate::parsers::with_tql_parser::HybridCteWith; -/// Query statement instance. +/// A wrapper around [`Query`] from sqlparser-rs to add support for hybrid CTEs #[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Query { pub inner: SpQuery, + /// Hybrid CTE containing both SQL and TQL CTEs + pub hybrid_cte: Option, } -/// Automatically converts from sqlparser Query instance to SqlQuery. impl TryFrom for Query { type Error = Error; - fn try_from(q: SpQuery) -> Result { - Ok(Query { inner: q }) + fn try_from(inner: SpQuery) -> Result { + Ok(Self { + inner, + hybrid_cte: None, + }) } } @@ -45,8 +50,17 @@ impl TryFrom for SpQuery { impl fmt::Display for Query { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.inner)?; - Ok(()) + if let Some(hybrid_cte) = &self.hybrid_cte { + // Delegate the WITH clause rendering to `HybridCteWith` + write!(f, "{} ", hybrid_cte)?; + + // Display the main query without its WITH clause since we handled it above + let mut main_query = self.inner.clone(); + main_query.with = None; + write!(f, "{}", main_query) + } else { + write!(f, "{}", self.inner) + } } } @@ -88,5 +102,11 @@ mod test { .to_string(), "SELECT * FROM abc LEFT JOIN bcd WHERE abc.a = 1 AND bcd.d = 7 AND abc.id = bcd.id" ); + assert_eq!( + create_query("WITH tql_cte AS (TQL EVAL (0, 100, '5s') up) SELECT * FROM tql_cte") + .unwrap() + .to_string(), + "WITH tql_cte AS (TQL EVAL (0, 100, '5s') up) SELECT * FROM tql_cte" + ); } }