mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-25 09:20:40 +00:00
fix: TQL CTE parser take raw query string (#6671)
* take raw TQL part Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * more tests Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * sort sqlness result Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add order by Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * more order by Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add comment back Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
@@ -32,6 +32,7 @@ use crate::parsers::tql_parser;
|
||||
use crate::statements::query::Query;
|
||||
use crate::statements::statement::Statement;
|
||||
use crate::statements::tql::Tql;
|
||||
use crate::util::location_to_index;
|
||||
|
||||
/// Content of a CTE - either SQL or TQL
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)]
|
||||
@@ -210,7 +211,7 @@ impl ParserContext<'_> {
|
||||
Ok(CteContent::Sql(sql_query))
|
||||
}
|
||||
|
||||
/// Parse TQL content within a CTE by reusing the standard TQL parser.
|
||||
/// Parse TQL content within a CTE by extracting the raw query string.
|
||||
///
|
||||
/// This method consumes all tokens that belong to the TQL statement and
|
||||
/// stops right **before** the closing `)` of the CTE so that the caller
|
||||
@@ -218,8 +219,20 @@ impl ParserContext<'_> {
|
||||
///
|
||||
/// Only `TQL EVAL` is supported inside CTEs.
|
||||
fn parse_tql_content_in_cte(&mut self) -> Result<Tql> {
|
||||
let mut collected: Vec<Token> = Vec::new();
|
||||
// Consume and get the position of the TQL keyword
|
||||
let tql_token = self.parser.next_token();
|
||||
if tql_token.token == Token::EOF {
|
||||
return Err(error::InvalidSqlSnafu {
|
||||
msg: "Unexpected end of input while parsing TQL inside CTE".to_string(),
|
||||
}
|
||||
.build());
|
||||
}
|
||||
|
||||
let start_location = tql_token.span.start;
|
||||
|
||||
// Track parentheses depth to find the end of the CTE
|
||||
let mut paren_depth = 0usize;
|
||||
let end_location;
|
||||
|
||||
loop {
|
||||
let token_with_span = self.parser.peek_token();
|
||||
@@ -234,10 +247,11 @@ impl ParserContext<'_> {
|
||||
|
||||
// Stop **before** the closing parenthesis that ends the CTE
|
||||
if token_with_span.token == Token::RParen && paren_depth == 0 {
|
||||
end_location = token_with_span.span.start;
|
||||
break;
|
||||
}
|
||||
|
||||
// Consume the token and push it into our buffer
|
||||
// Consume the token and track parentheses depth
|
||||
let consumed = self.parser.next_token();
|
||||
match consumed.token {
|
||||
Token::LParen => paren_depth += 1,
|
||||
@@ -248,20 +262,17 @@ impl ParserContext<'_> {
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
collected.push(consumed.token);
|
||||
}
|
||||
|
||||
// Re-construct the SQL string of the isolated TQL statement.
|
||||
let tql_string = collected
|
||||
.iter()
|
||||
.map(|tok| tok.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
// Extract the TQL query string directly from the original SQL
|
||||
let start_index = location_to_index(self.sql, &start_location);
|
||||
let end_index = location_to_index(self.sql, &end_location);
|
||||
let tql_string = &self.sql[start_index..end_index];
|
||||
let tql_string = tql_string.trim();
|
||||
|
||||
// Use the shared parser to turn it into a `Statement`.
|
||||
// Parse the TQL string using the standard TQL parser
|
||||
let mut stmts = ParserContext::create_with_dialect(
|
||||
&tql_string,
|
||||
tql_string,
|
||||
&GreptimeDbDialect {},
|
||||
ParseOptions::default(),
|
||||
)?;
|
||||
@@ -323,15 +334,14 @@ mod tests {
|
||||
CteContent::Tql(_)
|
||||
));
|
||||
|
||||
// Check that the query includes the parentheses (spaces are added by tokenizer)
|
||||
// Check that the query includes the parentheses
|
||||
if let CteContent::Tql(Tql::Eval(eval)) = &hybrid_cte.cte_tables[0].content {
|
||||
// Verify that complex nested parentheses are preserved correctly
|
||||
assert!(eval
|
||||
.query
|
||||
.contains("sum ( rate ( http_requests_total [ 1 m ] ) )"));
|
||||
assert!(eval.query.contains("( max ( cpu_usage ) * ( 1 + 0.5 ) )"));
|
||||
// The new approach preserves original spacing, so no extra spaces between tokens
|
||||
assert!(eval.query.contains("sum(rate(http_requests_total[1m]))"));
|
||||
assert!(eval.query.contains("(max(cpu_usage) * (1 + 0.5))"));
|
||||
// Most importantly, verify the parentheses counting didn't break the parsing
|
||||
assert!(eval.query.contains("+ ( max"));
|
||||
assert!(eval.query.contains("+ (max"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -73,17 +73,30 @@ SELECT count(*) FROM filtered;
|
||||
| 2 |
|
||||
+----------+
|
||||
|
||||
-- TODO(ruihang): The following tests are not supported yet, need to fix parser first.
|
||||
-- TQL CTE with complex PromQL expressions
|
||||
-- WITH
|
||||
-- tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])),
|
||||
-- filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0)
|
||||
-- SELECT sum(val) FROM filtered;
|
||||
WITH
|
||||
tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])),
|
||||
filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0)
|
||||
SELECT sum(val) FROM filtered;
|
||||
|
||||
+-------------------+
|
||||
| sum(filtered.val) |
|
||||
+-------------------+
|
||||
| 1.05 |
|
||||
+-------------------+
|
||||
|
||||
-- TQL CTE with aggregation functions
|
||||
-- WITH tql_agg AS (
|
||||
-- TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"})
|
||||
-- )
|
||||
-- SELECT avg(val) as avg_sum FROM tql_agg;
|
||||
WITH tql_agg(ts, summary) AS (
|
||||
TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"})
|
||||
)
|
||||
SELECT round(avg(summary)) as avg_sum FROM tql_agg;
|
||||
|
||||
+---------+
|
||||
| avg_sum |
|
||||
+---------+
|
||||
| 1.0 |
|
||||
+---------+
|
||||
|
||||
-- TQL CTE with label selectors
|
||||
WITH host_metrics AS (
|
||||
TQL EVAL (0, 40, '10s') labels{host="host1"}
|
||||
@@ -209,6 +222,42 @@ SELECT count(*) as high_values FROM final;
|
||||
| 3 |
|
||||
+-------------+
|
||||
|
||||
-- TQL CTE with time-based functions
|
||||
WITH time_shifted AS (
|
||||
TQL EVAL (0, 40, '10s') metric offset 50s
|
||||
)
|
||||
SELECT * FROM time_shifted;
|
||||
|
||||
+----+-----+
|
||||
| ts | val |
|
||||
+----+-----+
|
||||
+----+-----+
|
||||
|
||||
-- TQL CTE with JOIN between TQL and regular table
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
WITH tql_summary(ts, host, cpu) AS (
|
||||
TQL EVAL (0, 40, '10s') avg_over_time(labels[30s])
|
||||
)
|
||||
SELECT
|
||||
t.ts,
|
||||
t.cpu as avg_value,
|
||||
l.host
|
||||
FROM tql_summary t
|
||||
JOIN labels l ON DATE_TRUNC('second', t.ts) = DATE_TRUNC('second', l.ts)
|
||||
WHERE l.host = 'host1'
|
||||
ORDER BY t.ts, l.host, avg_value
|
||||
LIMIT 5;
|
||||
|
||||
+---------------------+-----------+-------+
|
||||
| ts | avg_value | host |
|
||||
+---------------------+-----------+-------+
|
||||
| 1970-01-01T00:00:00 | host1 | host1 |
|
||||
| 1970-01-01T00:00:00 | host2 | host1 |
|
||||
| 1970-01-01T00:00:10 | host1 | host1 |
|
||||
| 1970-01-01T00:00:10 | host2 | host1 |
|
||||
| 1970-01-01T00:00:20 | host1 | host1 |
|
||||
+---------------------+-----------+-------+
|
||||
|
||||
-- Error case - TQL ANALYZE should fail
|
||||
WITH tql_analyze AS (
|
||||
TQL ANALYZE (0, 40, '10s') metric
|
||||
|
||||
@@ -38,18 +38,17 @@ WITH
|
||||
filtered AS (SELECT * FROM tql_data WHERE val > 5)
|
||||
SELECT count(*) FROM filtered;
|
||||
|
||||
-- TODO(ruihang): The following tests are not supported yet, need to fix parser first.
|
||||
-- TQL CTE with complex PromQL expressions
|
||||
-- WITH
|
||||
-- tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])),
|
||||
-- filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0)
|
||||
-- SELECT sum(val) FROM filtered;
|
||||
WITH
|
||||
tql_data (ts, val) AS (TQL EVAL (0, 40, '10s') rate(metric[20s])),
|
||||
filtered (ts, val) AS (SELECT * FROM tql_data WHERE val > 0)
|
||||
SELECT sum(val) FROM filtered;
|
||||
|
||||
-- TQL CTE with aggregation functions
|
||||
-- WITH tql_agg AS (
|
||||
-- TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"})
|
||||
-- )
|
||||
-- SELECT avg(val) as avg_sum FROM tql_agg;
|
||||
WITH tql_agg(ts, summary) AS (
|
||||
TQL EVAL (0, 40, '10s') sum(labels{host=~"host.*"})
|
||||
)
|
||||
SELECT round(avg(summary)) as avg_sum FROM tql_agg;
|
||||
|
||||
-- TQL CTE with label selectors
|
||||
WITH host_metrics AS (
|
||||
@@ -119,6 +118,27 @@ WITH
|
||||
)
|
||||
SELECT count(*) as high_values FROM final;
|
||||
|
||||
-- TQL CTE with time-based functions
|
||||
WITH time_shifted AS (
|
||||
TQL EVAL (0, 40, '10s') metric offset 50s
|
||||
)
|
||||
SELECT * FROM time_shifted;
|
||||
|
||||
-- TQL CTE with JOIN between TQL and regular table
|
||||
-- SQLNESS SORT_RESULT 3 1
|
||||
WITH tql_summary(ts, host, cpu) AS (
|
||||
TQL EVAL (0, 40, '10s') avg_over_time(labels[30s])
|
||||
)
|
||||
SELECT
|
||||
t.ts,
|
||||
t.cpu as avg_value,
|
||||
l.host
|
||||
FROM tql_summary t
|
||||
JOIN labels l ON DATE_TRUNC('second', t.ts) = DATE_TRUNC('second', l.ts)
|
||||
WHERE l.host = 'host1'
|
||||
ORDER BY t.ts, l.host, avg_value
|
||||
LIMIT 5;
|
||||
|
||||
-- Error case - TQL ANALYZE should fail
|
||||
WITH tql_analyze AS (
|
||||
TQL ANALYZE (0, 40, '10s') metric
|
||||
|
||||
Reference in New Issue
Block a user