fix: TQL CTE parser take raw query string (#6671)

* take raw TQL part

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* more tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* sort sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add order by

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* more order by

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add comment back

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2025-08-07 23:22:56 -07:00
committed by GitHub
parent 0215b39f61
commit 3b1f172ab8
3 changed files with 116 additions and 37 deletions

View File

@@ -32,6 +32,7 @@ use crate::parsers::tql_parser;
use crate::statements::query::Query;
use crate::statements::statement::Statement;
use crate::statements::tql::Tql;
use crate::util::location_to_index;
/// Content of a CTE - either SQL or TQL
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
@@ -210,7 +211,7 @@ impl ParserContext<'_> {
Ok(CteContent::Sql(sql_query))
}
/// Parse TQL content within a CTE by reusing the standard TQL parser.
/// Parse TQL content within a CTE by extracting the raw query string.
///
/// This method consumes all tokens that belong to the TQL statement and
/// stops right **before** the closing `)` of the CTE so that the caller
@@ -218,8 +219,20 @@ impl ParserContext<'_> {
///
/// Only `TQL EVAL` is supported inside CTEs.
fn parse_tql_content_in_cte(&mut self) -> Result<Tql> {
let mut collected: Vec<Token> = Vec::new();
// Consume and get the position of the TQL keyword
let tql_token = self.parser.next_token();
if tql_token.token == Token::EOF {
return Err(error::InvalidSqlSnafu {
msg: "Unexpected end of input while parsing TQL inside CTE".to_string(),
}
.build());
}
let start_location = tql_token.span.start;
// Track parentheses depth to find the end of the CTE
let mut paren_depth = 0usize;
let end_location;
loop {
let token_with_span = self.parser.peek_token();
@@ -234,10 +247,11 @@ impl ParserContext<'_> {
// Stop **before** the closing parenthesis that ends the CTE
if token_with_span.token == Token::RParen && paren_depth == 0 {
end_location = token_with_span.span.start;
break;
}
// Consume the token and push it into our buffer
// Consume the token and track parentheses depth
let consumed = self.parser.next_token();
match consumed.token {
Token::LParen => paren_depth += 1,
@@ -248,20 +262,17 @@ impl ParserContext<'_> {
}
_ => {}
}
collected.push(consumed.token);
}
// Re-construct the SQL string of the isolated TQL statement.
let tql_string = collected
.iter()
.map(|tok| tok.to_string())
.collect::<Vec<_>>()
.join(" ");
// Extract the TQL query string directly from the original SQL
let start_index = location_to_index(self.sql, &start_location);
let end_index = location_to_index(self.sql, &end_location);
let tql_string = &self.sql[start_index..end_index];
let tql_string = tql_string.trim();
// Use the shared parser to turn it into a `Statement`.
// Parse the TQL string using the standard TQL parser
let mut stmts = ParserContext::create_with_dialect(
&tql_string,
tql_string,
&GreptimeDbDialect {},
ParseOptions::default(),
)?;
@@ -323,15 +334,14 @@ mod tests {
CteContent::Tql(_)
));
// Check that the query includes the parentheses (spaces are added by tokenizer)
// Check that the query includes the parentheses
if let CteContent::Tql(Tql::Eval(eval)) = &hybrid_cte.cte_tables[0].content {
// Verify that complex nested parentheses are preserved correctly
assert!(eval
.query
.contains("sum ( rate ( http_requests_total [ 1 m ] ) )"));
assert!(eval.query.contains("( max ( cpu_usage ) * ( 1 + 0.5 ) )"));
// The new approach preserves original spacing, so no extra spaces between tokens
assert!(eval.query.contains("sum(rate(http_requests_total[1m]))"));
assert!(eval.query.contains("(max(cpu_usage) * (1 + 0.5))"));
// Most importantly, verify the parentheses counting didn't break the parsing
assert!(eval.query.contains("+ ( max"));
assert!(eval.query.contains("+ (max"));
}
}

View File

@@ -73,17 +73,30 @@ SELECT count(*) FROM filtered;
| 2 |
+----------+
-- TODO(ruihang): The following tests are not supported yet, need to fix parser first.
-- TQL CTE with complex PromQL expressions
-- WITH
-- tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])),
-- filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0)
-- SELECT sum(val) FROM filtered;
WITH
tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])),
filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0)
SELECT sum(val) FROM filtered;
+-------------------+
| sum(filtered.val) |
+-------------------+
| 1.05 |
+-------------------+
-- TQL CTE with aggregation functions
-- WITH tql_agg AS (
-- TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"})
-- )
-- SELECT avg(val) as avg_sum FROM tql_agg;
WITH tql_agg(ts, summary) AS (
TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"})
)
SELECT round(avg(summary)) as avg_sum FROM tql_agg;
+---------+
| avg_sum |
+---------+
| 1.0 |
+---------+
-- TQL CTE with label selectors
WITH host_metrics AS (
TQL EVAL (0, 40, '10s') labels{host="host1"}
@@ -209,6 +222,42 @@ SELECT count(*) as high_values FROM final;
| 3 |
+-------------+
-- TQL CTE with time-based functions
WITH time_shifted AS (
TQL EVAL (0, 40, '10s') metric offset 50s
)
SELECT * FROM time_shifted;
+----+-----+
| ts | val |
+----+-----+
+----+-----+
-- TQL CTE with JOIN between TQL and regular table
-- SQLNESS SORT_RESULT 3 1
WITH tql_summary(ts, host, cpu) AS (
TQL EVAL (0, 40, '10s') avg_over_time(labels[30s])
)
SELECT
t.ts,
t.cpu as avg_value,
l.host
FROM tql_summary t
JOIN labels l ON DATE_TRUNC('second', t.ts) = DATE_TRUNC('second', l.ts)
WHERE l.host = 'host1'
ORDER BY t.ts, l.host, avg_value
LIMIT 5;
+---------------------+-----------+-------+
| ts | avg_value | host |
+---------------------+-----------+-------+
| 1970-01-01T00:00:00 | host1 | host1 |
| 1970-01-01T00:00:00 | host2 | host1 |
| 1970-01-01T00:00:10 | host1 | host1 |
| 1970-01-01T00:00:10 | host2 | host1 |
| 1970-01-01T00:00:20 | host1 | host1 |
+---------------------+-----------+-------+
-- Error case - TQL ANALYZE should fail
WITH tql_analyze AS (
TQL ANALYZE (0, 40, '10s') metric

View File

@@ -38,18 +38,17 @@ WITH
filtered AS (SELECT * FROM tql_data WHERE val > 5)
SELECT count(*) FROM filtered;
-- TODO(ruihang): The following tests are not supported yet, need to fix parser first.
-- TQL CTE with complex PromQL expressions
-- WITH
-- tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])),
-- filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0)
-- SELECT sum(val) FROM filtered;
WITH
tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])),
filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0)
SELECT sum(val) FROM filtered;
-- TQL CTE with aggregation functions
-- WITH tql_agg AS (
-- TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"})
-- )
-- SELECT avg(val) as avg_sum FROM tql_agg;
WITH tql_agg(ts, summary) AS (
TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"})
)
SELECT round(avg(summary)) as avg_sum FROM tql_agg;
-- TQL CTE with label selectors
WITH host_metrics AS (
@@ -119,6 +118,27 @@ WITH
)
SELECT count(*) as high_values FROM final;
-- TQL CTE with time-based functions
WITH time_shifted AS (
TQL EVAL (0, 40, '10s') metric offset 50s
)
SELECT * FROM time_shifted;
-- TQL CTE with JOIN between TQL and regular table
-- SQLNESS SORT_RESULT 3 1
WITH tql_summary(ts, host, cpu) AS (
TQL EVAL (0, 40, '10s') avg_over_time(labels[30s])
)
SELECT
t.ts,
t.cpu as avg_value,
l.host
FROM tql_summary t
JOIN labels l ON DATE_TRUNC('second', t.ts) = DATE_TRUNC('second', l.ts)
WHERE l.host = 'host1'
ORDER BY t.ts, l.host, avg_value
LIMIT 5;
-- Error case - TQL ANALYZE should fail
WITH tql_analyze AS (
TQL ANALYZE (0, 40, '10s') metric