feat(parser): parse TQL in CTE position (#6456)

* naive implementation

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor to use existing tql parse logic

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor display logic

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor column list parsing logic

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* refactor to remove redundent check logic

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* set sql cte into Query

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2025-07-13 23:44:56 -07:00
committed by GitHub
parent 582bcc3b14
commit d82bc98717
4 changed files with 396 additions and 7 deletions

View File

@@ -151,7 +151,9 @@ impl ParserContext<'_> {
Keyword::REPLACE => self.parse_replace(),
Keyword::SELECT | Keyword::WITH | Keyword::VALUES => self.parse_query(),
Keyword::SELECT | Keyword::VALUES => self.parse_query(),
Keyword::WITH => self.parse_with_tql(),
Keyword::ALTER => self.parse_alter(),

View File

@@ -32,3 +32,4 @@ pub(crate) mod show_parser;
pub(crate) mod tql_parser;
pub(crate) mod truncate_parser;
pub(crate) mod utils;
pub(crate) mod with_tql_parser;

View File

@@ -0,0 +1,366 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::fmt;
use serde::Serialize;
use snafu::ResultExt;
use sqlparser::ast::helpers::attached_token::AttachedToken;
use sqlparser::ast::{
Cte, Ident, ObjectName, Query as SpQuery, TableAlias, TableAliasColumnDef, With,
};
use sqlparser::keywords::Keyword;
use sqlparser::parser::IsOptional;
use sqlparser::tokenizer::Token;
use sqlparser_derive::{Visit, VisitMut};
use crate::dialect::GreptimeDbDialect;
use crate::error::{self, Result};
use crate::parser::{ParseOptions, ParserContext};
use crate::parsers::tql_parser;
use crate::statements::query::Query;
use crate::statements::statement::Statement;
use crate::statements::tql::Tql;
/// Content of a CTE - either SQL or TQL
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
pub enum CteContent {
Sql(Box<SpQuery>),
Tql(Tql),
}
/// A hybrid CTE that can contain either SQL or TQL
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
pub struct HybridCte {
pub name: Ident,
/// Column aliases for the CTE table. Empty if not specified.
pub columns: Vec<ObjectName>,
pub content: CteContent,
}
/// Extended WITH clause that supports hybrid SQL/TQL CTEs
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
pub struct HybridCteWith {
pub recursive: bool,
pub cte_tables: Vec<HybridCte>,
}
impl fmt::Display for HybridCteWith {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "WITH ")?;
if self.recursive {
write!(f, "RECURSIVE ")?;
}
for (i, cte) in self.cte_tables.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{}", cte.name)?;
if !cte.columns.is_empty() {
write!(f, " (")?;
for (j, col) in cte.columns.iter().enumerate() {
if j > 0 {
write!(f, ", ")?;
}
write!(f, "{}", col)?;
}
write!(f, ")")?;
}
write!(f, " AS (")?;
match &cte.content {
CteContent::Sql(query) => write!(f, "{}", query)?,
CteContent::Tql(tql) => write!(f, "{}", tql)?,
}
write!(f, ")")?;
}
Ok(())
}
}
/// Parser implementation for hybrid WITH clauses containing TQL
impl ParserContext<'_> {
/// Parse a WITH clause that may contain TQL CTEs or SQL CTEs.
pub(crate) fn parse_with_tql(&mut self) -> Result<Statement> {
// Consume the WITH token
self.parser
.expect_keyword(Keyword::WITH)
.context(error::SyntaxSnafu)?;
// Check for RECURSIVE keyword
let recursive = self.parser.parse_keyword(Keyword::RECURSIVE);
// Parse the CTE list
let mut tql_cte_tables = Vec::new();
let mut sql_cte_tables = Vec::new();
loop {
let cte = self.parse_hybrid_cte()?;
match cte.content {
CteContent::Sql(body) => sql_cte_tables.push(Cte {
alias: TableAlias {
name: cte.name,
columns: cte
.columns
.into_iter()
.map(|col| TableAliasColumnDef {
name: col.0[0].clone(),
data_type: None,
})
.collect(),
},
query: body,
from: None,
materialized: None,
closing_paren_token: AttachedToken::empty(),
}),
CteContent::Tql(_) => tql_cte_tables.push(cte),
}
if !self.parser.consume_token(&Token::Comma) {
break;
}
}
// Parse the main query
let main_query = self.parser.parse_query().context(error::SyntaxSnafu)?;
// Convert the hybrid CTEs to a standard query with hybrid metadata
let hybrid_cte = HybridCteWith {
recursive,
cte_tables: tql_cte_tables,
};
// Create a Query statement with hybrid CTE metadata
let mut query = Query::try_from(*main_query)?;
query.hybrid_cte = Some(hybrid_cte);
query.inner.with = Some(With {
recursive,
cte_tables: sql_cte_tables,
with_token: AttachedToken::empty(),
});
Ok(Statement::Query(Box::new(query)))
}
/// Parse a single CTE that can be either SQL or TQL
fn parse_hybrid_cte(&mut self) -> Result<HybridCte> {
// Parse CTE name
let name = self.parser.parse_identifier().context(error::SyntaxSnafu)?;
let name = Self::canonicalize_identifier(name);
// Parse optional column list
let columns = self
.parser
.parse_parenthesized_qualified_column_list(IsOptional::Optional, true)
.context(error::SyntaxSnafu)?;
// Expect AS keyword
self.parser
.expect_keyword(Keyword::AS)
.context(error::SyntaxSnafu)?;
// Parse the CTE content
self.parser
.expect_token(&Token::LParen)
.context(error::SyntaxSnafu)?;
let content = self.parse_cte_content()?;
self.parser
.expect_token(&Token::RParen)
.context(error::SyntaxSnafu)?;
Ok(HybridCte {
name,
columns,
content,
})
}
/// Determine if CTE contains TQL or SQL and parse accordingly
fn parse_cte_content(&mut self) -> Result<CteContent> {
// Check if the next token is TQL
if let Token::Word(w) = &self.parser.peek_token().token {
if w.keyword == Keyword::NoKeyword
&& w.quote_style.is_none()
&& w.value.to_uppercase() == tql_parser::TQL
{
let tql = self.parse_tql_content_in_cte()?;
return Ok(CteContent::Tql(tql));
}
}
// Parse as SQL query
let sql_query = self.parser.parse_query().context(error::SyntaxSnafu)?;
Ok(CteContent::Sql(sql_query))
}
/// Parse TQL content within a CTE by reusing the standard TQL parser.
///
/// This method consumes all tokens that belong to the TQL statement and
/// stops right **before** the closing `)` of the CTE so that the caller
/// can handle it normally.
///
/// Only `TQL EVAL` is supported inside CTEs.
fn parse_tql_content_in_cte(&mut self) -> Result<Tql> {
let mut collected: Vec<Token> = Vec::new();
let mut paren_depth = 0usize;
loop {
let token_with_span = self.parser.peek_token();
// Guard against unexpected EOF
if token_with_span.token == Token::EOF {
return Err(error::InvalidSqlSnafu {
msg: "Unexpected end of input while parsing TQL inside CTE".to_string(),
}
.build());
}
// Stop **before** the closing parenthesis that ends the CTE
if token_with_span.token == Token::RParen && paren_depth == 0 {
break;
}
// Consume the token and push it into our buffer
let consumed = self.parser.next_token();
match consumed.token {
Token::LParen => paren_depth += 1,
Token::RParen => {
// This RParen must belong to a nested expression since
// `paren_depth > 0` here. Decrease depth accordingly.
paren_depth = paren_depth.saturating_sub(1);
}
_ => {}
}
collected.push(consumed.token);
}
// Re-construct the SQL string of the isolated TQL statement.
let tql_string = collected
.iter()
.map(|tok| tok.to_string())
.collect::<Vec<_>>()
.join(" ");
// Use the shared parser to turn it into a `Statement`.
let mut stmts = ParserContext::create_with_dialect(
&tql_string,
&GreptimeDbDialect {},
ParseOptions::default(),
)?;
if stmts.len() != 1 {
return Err(error::InvalidSqlSnafu {
msg: "Expected a single TQL statement inside CTE".to_string(),
}
.build());
}
match stmts.remove(0) {
Statement::Tql(Tql::Eval(eval)) => Ok(Tql::Eval(eval)),
Statement::Tql(_) => Err(error::InvalidSqlSnafu {
msg: "Only TQL EVAL is supported in CTEs".to_string(),
}
.build()),
_ => Err(error::InvalidSqlSnafu {
msg: "Expected a TQL statement inside CTE".to_string(),
}
.build()),
}
}
}
#[cfg(test)]
mod tests {
use crate::dialect::GreptimeDbDialect;
use crate::parser::{ParseOptions, ParserContext};
use crate::parsers::with_tql_parser::CteContent;
use crate::statements::statement::Statement;
use crate::statements::tql::Tql;
#[test]
fn test_parse_hybrid_cte_with_parentheses_in_query() {
// Test that parentheses within the TQL query don't interfere with CTE parsing
let sql = r#"
WITH tql_cte AS (
TQL EVAL (0, 100, '5s')
sum(rate(http_requests_total[1m])) + (max(cpu_usage) * (1 + 0.5))
)
SELECT * FROM tql_cte
"#;
let statements =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
.unwrap();
assert_eq!(statements.len(), 1);
let Statement::Query(query) = &statements[0] else {
panic!("Expected Query statement");
};
let hybrid_cte = query.hybrid_cte.as_ref().unwrap();
assert_eq!(hybrid_cte.cte_tables.len(), 1);
// Should be TQL content
assert!(matches!(
hybrid_cte.cte_tables[0].content,
CteContent::Tql(_)
));
// Check that the query includes the parentheses (spaces are added by tokenizer)
if let CteContent::Tql(Tql::Eval(eval)) = &hybrid_cte.cte_tables[0].content {
// Verify that complex nested parentheses are preserved correctly
assert!(eval
.query
.contains("sum ( rate ( http_requests_total [ 1 m ] ) )"));
assert!(eval.query.contains("( max ( cpu_usage ) * ( 1 + 0.5 ) )"));
// Most importantly, verify the parentheses counting didn't break the parsing
assert!(eval.query.contains("+ ( max"));
}
}
#[test]
fn test_parse_hybrid_cte_sql_and_tql() {
let sql = r#"
WITH
sql_cte(ts, value, label) AS (SELECT timestamp, val, name FROM metrics),
tql_cte(time, metric_value) AS (TQL EVAL (0, 100, '5s') cpu_usage)
SELECT s.ts, s.value, t.metric_value
FROM sql_cte s JOIN tql_cte t ON s.ts = t.time
"#;
let statements =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
.unwrap();
assert_eq!(statements.len(), 1);
let Statement::Query(query) = &statements[0] else {
panic!("Expected Query statement");
};
let hybrid_cte = query.hybrid_cte.as_ref().unwrap();
assert_eq!(hybrid_cte.cte_tables.len(), 1); // only TQL CTE presents here
// First CTE should be TQL with column aliases
let second_cte = &hybrid_cte.cte_tables[0];
assert!(matches!(second_cte.content, CteContent::Tql(_)));
assert_eq!(second_cte.columns.len(), 2);
assert_eq!(second_cte.columns[0].0[0].value, "time");
assert_eq!(second_cte.columns[1].0[0].value, "metric_value");
}
}

View File

@@ -19,19 +19,24 @@ use sqlparser::ast::Query as SpQuery;
use sqlparser_derive::{Visit, VisitMut};
use crate::error::Error;
use crate::parsers::with_tql_parser::HybridCteWith;
/// Query statement instance.
/// A wrapper around [`Query`] from sqlparser-rs to add support for hybrid CTEs
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
pub struct Query {
pub inner: SpQuery,
/// Hybrid CTE containing both SQL and TQL CTEs
pub hybrid_cte: Option<HybridCteWith>,
}
/// Automatically converts from sqlparser Query instance to SqlQuery.
impl TryFrom<SpQuery> for Query {
type Error = Error;
fn try_from(q: SpQuery) -> Result<Self, Self::Error> {
Ok(Query { inner: q })
fn try_from(inner: SpQuery) -> Result<Self, Self::Error> {
Ok(Self {
inner,
hybrid_cte: None,
})
}
}
@@ -45,8 +50,17 @@ impl TryFrom<Query> for SpQuery {
impl fmt::Display for Query {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.inner)?;
Ok(())
if let Some(hybrid_cte) = &self.hybrid_cte {
// Delegate the WITH clause rendering to `HybridCteWith`
write!(f, "{} ", hybrid_cte)?;
// Display the main query without its WITH clause since we handled it above
let mut main_query = self.inner.clone();
main_query.with = None;
write!(f, "{}", main_query)
} else {
write!(f, "{}", self.inner)
}
}
}
@@ -88,5 +102,11 @@ mod test {
.to_string(),
"SELECT * FROM abc LEFT JOIN bcd WHERE abc.a = 1 AND bcd.d = 7 AND abc.id = bcd.id"
);
assert_eq!(
create_query("WITH tql_cte AS (TQL EVAL (0, 100, '5s') up) SELECT * FROM tql_cte")
.unwrap()
.to_string(),
"WITH tql_cte AS (TQL EVAL (0, 100, '5s') up) SELECT * FROM tql_cte"
);
}
}