diff --git a/src/sql/src/parsers/with_tql_parser.rs b/src/sql/src/parsers/with_tql_parser.rs index b40693becc..0c467b2502 100644 --- a/src/sql/src/parsers/with_tql_parser.rs +++ b/src/sql/src/parsers/with_tql_parser.rs @@ -32,6 +32,7 @@ use crate::parsers::tql_parser; use crate::statements::query::Query; use crate::statements::statement::Statement; use crate::statements::tql::Tql; +use crate::util::location_to_index; /// Content of a CTE - either SQL or TQL #[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] @@ -210,7 +211,7 @@ impl ParserContext<'_> { Ok(CteContent::Sql(sql_query)) } - /// Parse TQL content within a CTE by reusing the standard TQL parser. + /// Parse TQL content within a CTE by extracting the raw query string. /// /// This method consumes all tokens that belong to the TQL statement and /// stops right **before** the closing `)` of the CTE so that the caller @@ -218,8 +219,20 @@ impl ParserContext<'_> { /// /// Only `TQL EVAL` is supported inside CTEs. fn parse_tql_content_in_cte(&mut self) -> Result { - let mut collected: Vec = Vec::new(); + // Consume and get the position of the TQL keyword + let tql_token = self.parser.next_token(); + if tql_token.token == Token::EOF { + return Err(error::InvalidSqlSnafu { + msg: "Unexpected end of input while parsing TQL inside CTE".to_string(), + } + .build()); + } + + let start_location = tql_token.span.start; + + // Track parentheses depth to find the end of the CTE let mut paren_depth = 0usize; + let end_location; loop { let token_with_span = self.parser.peek_token(); @@ -234,10 +247,11 @@ impl ParserContext<'_> { // Stop **before** the closing parenthesis that ends the CTE if token_with_span.token == Token::RParen && paren_depth == 0 { + end_location = token_with_span.span.start; break; } - // Consume the token and push it into our buffer + // Consume the token and track parentheses depth let consumed = self.parser.next_token(); match consumed.token { Token::LParen => paren_depth += 1, @@ -248,20 +262,17 @@ impl ParserContext<'_> { } _ => {} } - - collected.push(consumed.token); } - // Re-construct the SQL string of the isolated TQL statement. - let tql_string = collected - .iter() - .map(|tok| tok.to_string()) - .collect::>() - .join(" "); + // Extract the TQL query string directly from the original SQL + let start_index = location_to_index(self.sql, &start_location); + let end_index = location_to_index(self.sql, &end_location); + let tql_string = &self.sql[start_index..end_index]; + let tql_string = tql_string.trim(); - // Use the shared parser to turn it into a `Statement`. + // Parse the TQL string using the standard TQL parser let mut stmts = ParserContext::create_with_dialect( - &tql_string, + tql_string, &GreptimeDbDialect {}, ParseOptions::default(), )?; @@ -323,15 +334,14 @@ mod tests { CteContent::Tql(_) )); - // Check that the query includes the parentheses (spaces are added by tokenizer) + // Check that the query includes the parentheses if let CteContent::Tql(Tql::Eval(eval)) = &hybrid_cte.cte_tables[0].content { // Verify that complex nested parentheses are preserved correctly - assert!(eval - .query - .contains("sum ( rate ( http_requests_total [ 1 m ] ) )")); - assert!(eval.query.contains("( max ( cpu_usage ) * ( 1 + 0.5 ) )")); + // The new approach preserves original spacing, so no extra spaces between tokens + assert!(eval.query.contains("sum(rate(http_requests_total[1m]))")); + assert!(eval.query.contains("(max(cpu_usage) * (1 + 0.5))")); // Most importantly, verify the parentheses counting didn't break the parsing - assert!(eval.query.contains("+ ( max")); + assert!(eval.query.contains("+ (max")); } } diff --git a/tests/cases/standalone/common/tql/tql-cte.result b/tests/cases/standalone/common/tql/tql-cte.result index 59943b92f8..943e7e30a3 100644 --- a/tests/cases/standalone/common/tql/tql-cte.result +++ b/tests/cases/standalone/common/tql/tql-cte.result @@ -73,17 +73,30 @@ SELECT count(*) FROM filtered; | 2 | +----------+ --- TODO(ruihang): The following tests are not supported yet, need to fix parser first. -- TQL CTE with complex PromQL expressions --- WITH --- tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])), --- filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0) --- SELECT sum(val) FROM filtered; +WITH + tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])), + filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0) +SELECT sum(val) FROM filtered; + ++-------------------+ +| sum(filtered.val) | ++-------------------+ +| 1.05 | ++-------------------+ + -- TQL CTE with aggregation functions --- WITH tql_agg AS ( --- TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"}) --- ) --- SELECT avg(val) as avg_sum FROM tql_agg; +WITH tql_agg(ts, summary) AS ( + TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"}) +) +SELECT round(avg(summary)) as avg_sum FROM tql_agg; + ++---------+ +| avg_sum | ++---------+ +| 1.0 | ++---------+ + -- TQL CTE with label selectors WITH host_metrics AS ( TQL EVAL (0, 40, '10s') labels{host="host1"} @@ -209,6 +222,42 @@ SELECT count(*) as high_values FROM final; | 3 | +-------------+ +-- TQL CTE with time-based functions +WITH time_shifted AS ( + TQL EVAL (0, 40, '10s') metric offset 50s +) +SELECT * FROM time_shifted; + ++----+-----+ +| ts | val | ++----+-----+ ++----+-----+ + +-- TQL CTE with JOIN between TQL and regular table +-- SQLNESS SORT_RESULT 3 1 +WITH tql_summary(ts, host, cpu) AS ( + TQL EVAL (0, 40, '10s') avg_over_time(labels[30s]) +) +SELECT + t.ts, + t.cpu as avg_value, + l.host +FROM tql_summary t +JOIN labels l ON DATE_TRUNC('second', t.ts) = DATE_TRUNC('second', l.ts) +WHERE l.host = 'host1' +ORDER BY t.ts, l.host, avg_value +LIMIT 5; + ++---------------------+-----------+-------+ +| ts | avg_value | host | ++---------------------+-----------+-------+ +| 1970-01-01T00:00:00 | host1 | host1 | +| 1970-01-01T00:00:00 | host2 | host1 | +| 1970-01-01T00:00:10 | host1 | host1 | +| 1970-01-01T00:00:10 | host2 | host1 | +| 1970-01-01T00:00:20 | host1 | host1 | ++---------------------+-----------+-------+ + -- Error case - TQL ANALYZE should fail WITH tql_analyze AS ( TQL ANALYZE (0, 40, '10s') metric diff --git a/tests/cases/standalone/common/tql/tql-cte.sql b/tests/cases/standalone/common/tql/tql-cte.sql index de5cc6cead..46879b5075 100644 --- a/tests/cases/standalone/common/tql/tql-cte.sql +++ b/tests/cases/standalone/common/tql/tql-cte.sql @@ -38,18 +38,17 @@ WITH filtered AS (SELECT * FROM tql_data WHERE val > 5) SELECT count(*) FROM filtered; --- TODO(ruihang): The following tests are not supported yet, need to fix parser first. -- TQL CTE with complex PromQL expressions --- WITH --- tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])), --- filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0) --- SELECT sum(val) FROM filtered; +WITH + tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])), + filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0) +SELECT sum(val) FROM filtered; -- TQL CTE with aggregation functions --- WITH tql_agg AS ( --- TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"}) --- ) --- SELECT avg(val) as avg_sum FROM tql_agg; +WITH tql_agg(ts, summary) AS ( + TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"}) +) +SELECT round(avg(summary)) as avg_sum FROM tql_agg; -- TQL CTE with label selectors WITH host_metrics AS ( @@ -119,6 +118,27 @@ WITH ) SELECT count(*) as high_values FROM final; +-- TQL CTE with time-based functions +WITH time_shifted AS ( + TQL EVAL (0, 40, '10s') metric offset 50s +) +SELECT * FROM time_shifted; + +-- TQL CTE with JOIN between TQL and regular table +-- SQLNESS SORT_RESULT 3 1 +WITH tql_summary(ts, host, cpu) AS ( + TQL EVAL (0, 40, '10s') avg_over_time(labels[30s]) +) +SELECT + t.ts, + t.cpu as avg_value, + l.host +FROM tql_summary t +JOIN labels l ON DATE_TRUNC('second', t.ts) = DATE_TRUNC('second', l.ts) +WHERE l.host = 'host1' +ORDER BY t.ts, l.host, avg_value +LIMIT 5; + -- Error case - TQL ANALYZE should fail WITH tql_analyze AS ( TQL ANALYZE (0, 40, '10s') metric