feat(flow): support prom ql(in tql) in flow (#6063)

* feat: support parse prom ql in create flow

* refactor

* fix: just run tql unmodified

* refactor: determine type faster

* fix: pass original query

* tests: sqlness

* test: fix format&chore

* fix: get raw query

* test: fix sqlness randomness

* chore: what's the box for?

* test: location_to_index

* test: make sqlness more determinstic

* fix: tmp add sleep 1s after flush_flow

* undo test sleep 1s&rm done todo

* chore: more tests
This commit is contained in:
discord9
2025-05-22 11:06:09 +08:00
committed by GitHub
parent f55af5838c
commit fc6300a2ba
18 changed files with 704 additions and 146 deletions

View File

@@ -195,6 +195,13 @@ pub enum Error {
location: Location,
},
#[snafu(display("Invalid flow query: {}", reason))]
InvalidFlowQuery {
reason: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Invalid default constraint, column: {}", column))]
InvalidDefault {
column: String,
@@ -390,6 +397,7 @@ impl ErrorExt for Error {
| ColumnTypeMismatch { .. }
| InvalidTableName { .. }
| InvalidFlowName { .. }
| InvalidFlowQuery { .. }
| InvalidSqlValue { .. }
| TimestampOverflow { .. }
| InvalidTableOption { .. }

View File

@@ -40,12 +40,12 @@ use crate::parsers::utils::{
};
use crate::statements::create::{
Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable,
CreateTableLike, CreateView, Partitions, TableConstraint, VECTOR_OPT_DIM,
CreateTableLike, CreateView, Partitions, SqlOrTql, TableConstraint, VECTOR_OPT_DIM,
};
use crate::statements::statement::Statement;
use crate::statements::transform::type_alias::get_data_type_by_alias_name;
use crate::statements::{sql_data_type_to_concrete_data_type, OptionMap};
use crate::util::parse_option_string;
use crate::util::{location_to_index, parse_option_string};
pub const ENGINE: &str = "ENGINE";
pub const MAXVALUE: &str = "MAXVALUE";
@@ -282,12 +282,13 @@ impl<'a> ParserContext<'a> {
.consume_tokens(&[Token::make_keyword(EXPIRE), Token::make_keyword(AFTER)])
{
let expire_after_expr = self.parser.parse_expr().context(error::SyntaxSnafu)?;
let expire_after_lit = utils::parser_expr_to_scalar_value(expire_after_expr.clone())?
.cast_to(&ArrowDataType::Interval(IntervalUnit::MonthDayNano))
.ok()
.with_context(|| InvalidIntervalSnafu {
reason: format!("cannot cast {} to interval type", expire_after_expr),
})?;
let expire_after_lit =
utils::parser_expr_to_scalar_value_literal(expire_after_expr.clone())?
.cast_to(&ArrowDataType::Interval(IntervalUnit::MonthDayNano))
.ok()
.with_context(|| InvalidIntervalSnafu {
reason: format!("cannot cast {} to interval type", expire_after_expr),
})?;
if let ScalarValue::IntervalMonthDayNano(Some(interval)) = expire_after_lit {
Some(
interval.nanoseconds / 1_000_000_000
@@ -324,7 +325,22 @@ impl<'a> ParserContext<'a> {
.expect_keyword(Keyword::AS)
.context(SyntaxSnafu)?;
let query = self.parser.parse_query().context(error::SyntaxSnafu)?;
let start_loc = self.parser.peek_token().span.start;
let start_index = location_to_index(self.sql, &start_loc);
let query = self.parse_statement()?;
let end_token = self.parser.peek_token();
let raw_query = if end_token == Token::EOF {
&self.sql[start_index..]
} else {
let end_loc = end_token.span.end;
let end_index = location_to_index(self.sql, &end_loc);
&self.sql[start_index..end_index.min(self.sql.len())]
};
let raw_query = raw_query.trim_end_matches(";");
let query = Box::new(SqlOrTql::try_from_statement(query, raw_query)?);
Ok(Statement::CreateFlow(CreateFlow {
flow_name,

View File

@@ -23,6 +23,7 @@ use crate::parser::ParserContext;
use crate::parsers::utils;
use crate::statements::statement::Statement;
use crate::statements::tql::{Tql, TqlAnalyze, TqlEval, TqlExplain, TqlParameters};
use crate::util::location_to_index;
pub const TQL: &str = "TQL";
const EVAL: &str = "EVAL";
@@ -159,7 +160,7 @@ impl ParserContext<'_> {
let value = match tokens[0].clone() {
Token::Number(n, _) => n,
Token::DoubleQuotedString(s) | Token::SingleQuotedString(s) => s,
Token::Word(_) => Self::parse_tokens(tokens)?,
Token::Word(_) => Self::parse_tokens_to_ts(tokens)?,
unexpected => {
return Err(ParserError::ParserError(format!(
"Expected number, string or word, but have {unexpected:?}"
@@ -169,7 +170,7 @@ impl ParserContext<'_> {
};
Ok(value)
}
_ => Self::parse_tokens(tokens),
_ => Self::parse_tokens_to_ts(tokens),
};
for token in delimiter_tokens {
if parser.consume_token(token) {
@@ -182,9 +183,10 @@ impl ParserContext<'_> {
.context(ParserSnafu)
}
fn parse_tokens(tokens: Vec<Token>) -> std::result::Result<String, TQLError> {
/// Parse the tokens to seconds and convert to string.
fn parse_tokens_to_ts(tokens: Vec<Token>) -> std::result::Result<String, TQLError> {
let parser_expr = Self::parse_to_expr(tokens)?;
let lit = utils::parser_expr_to_scalar_value(parser_expr)
let lit = utils::parser_expr_to_scalar_value_literal(parser_expr)
.map_err(Box::new)
.context(ConvertToLogicalExpressionSnafu)?;
@@ -217,11 +219,15 @@ impl ParserContext<'_> {
while matches!(parser.peek_token().token, Token::Comma) {
let _skip_token = parser.next_token();
}
let index = parser.next_token().span.start.column as usize;
if index == 0 {
let start_tql = parser.next_token();
if start_tql == Token::EOF {
return Err(ParserError::ParserError("empty TQL query".to_string()));
}
let start_location = start_tql.span.start;
// translate the start location to the index in the sql string
let index = location_to_index(sql, &start_location);
let query = &sql[index - 1..];
while parser.next_token() != Token::EOF {
// consume all tokens

View File

@@ -41,7 +41,7 @@ use crate::error::{
/// Convert a parser expression to a scalar value. This function will try the
/// best to resolve and reduce constants. Exprs like `1 + 1` or `now()` can be
/// handled properly.
pub fn parser_expr_to_scalar_value(expr: sqlparser::ast::Expr) -> Result<ScalarValue> {
pub fn parser_expr_to_scalar_value_literal(expr: sqlparser::ast::Expr) -> Result<ScalarValue> {
// 1. convert parser expr to logical expr
let empty_df_schema = DFSchema::empty();
let logical_expr = SqlToRel::new(&StubContextProvider::default())

View File

@@ -24,8 +24,11 @@ use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query};
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue};
use crate::error::{Result, SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu};
use crate::error::{
InvalidFlowQuerySnafu, Result, SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu,
};
use crate::statements::statement::Statement;
use crate::statements::tql::Tql;
use crate::statements::OptionMap;
const LINE_SEP: &str = ",\n";
@@ -374,7 +377,41 @@ pub struct CreateFlow {
/// Comment string
pub comment: Option<String>,
/// SQL statement
pub query: Box<Query>,
pub query: Box<SqlOrTql>,
}
/// Either a sql query or a tql query
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)]
pub enum SqlOrTql {
Sql(Query, String),
Tql(Tql, String),
}
impl std::fmt::Display for SqlOrTql {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Sql(_, s) => write!(f, "{}", s),
Self::Tql(_, s) => write!(f, "{}", s),
}
}
}
impl SqlOrTql {
pub fn try_from_statement(
value: Statement,
original_query: &str,
) -> std::result::Result<Self, crate::error::Error> {
match value {
Statement::Query(query) => {
Ok(Self::Sql((*query).try_into()?, original_query.to_string()))
}
Statement::Tql(tql) => Ok(Self::Tql(tql, original_query.to_string())),
_ => InvalidFlowQuerySnafu {
reason: format!("Expect either sql query or promql query, found {:?}", value),
}
.fail(),
}
}
}
impl Display for CreateFlow {
@@ -741,7 +778,7 @@ WITH(
r#"
CREATE FLOW filter_numbers
SINK TO out_num_cnt
AS SELECT number FROM numbers_input WHERE number > 10"#,
AS SELECT number FROM numbers_input where number > 10"#,
&new_sql
);

View File

@@ -42,7 +42,7 @@ fn format_tql(
lookback: Option<&str>,
query: &str,
) -> std::fmt::Result {
write!(f, "({start}, {end}, {step}")?;
write!(f, "({start}, {end}, '{step}'")?;
if let Some(lookback) = lookback {
write!(f, ", {lookback}")?;
}

View File

@@ -15,9 +15,10 @@
use std::collections::HashSet;
use std::fmt::{Display, Formatter};
use sqlparser::ast::{Expr, ObjectName, Query, SetExpr, SqlOption, TableFactor, Value};
use sqlparser::ast::{Expr, ObjectName, SetExpr, SqlOption, TableFactor, Value};
use crate::error::{InvalidSqlSnafu, InvalidTableOptionValueSnafu, Result};
use crate::statements::create::SqlOrTql;
/// Format an [ObjectName] without any quote of its idents.
pub fn format_raw_object_name(name: &ObjectName) -> String {
@@ -58,14 +59,36 @@ pub fn parse_option_string(option: SqlOption) -> Result<(String, String)> {
}
/// Walk through a [Query] and extract all the tables referenced in it.
pub fn extract_tables_from_query(query: &Query) -> impl Iterator<Item = ObjectName> {
pub fn extract_tables_from_query(query: &SqlOrTql) -> impl Iterator<Item = ObjectName> {
let mut names = HashSet::new();
extract_tables_from_set_expr(&query.body, &mut names);
match query {
SqlOrTql::Sql(query, _) => extract_tables_from_set_expr(&query.body, &mut names),
SqlOrTql::Tql(_tql, _) => {
// since tql have sliding time window, so we don't need to extract tables from it
// (because we are going to eval it fully anyway)
}
}
names.into_iter()
}
/// translate the start location to the index in the sql string
pub fn location_to_index(sql: &str, location: &sqlparser::tokenizer::Location) -> usize {
let mut index = 0;
for (lno, line) in sql.lines().enumerate() {
if lno + 1 == location.line as usize {
index += location.column as usize;
break;
} else {
index += line.len() + 1; // +1 for the newline
}
}
// -1 because the index is 0-based
// and the location is 1-based
index - 1
}
/// Helper function for [extract_tables_from_query].
///
/// Handle [SetExpr].
@@ -98,3 +121,53 @@ fn table_factor_to_object_name(table_factor: &TableFactor, names: &mut HashSet<O
names.insert(name.to_owned());
}
}
#[cfg(test)]
mod tests {
use sqlparser::tokenizer::Token;
use super::*;
use crate::dialect::GreptimeDbDialect;
use crate::parser::ParserContext;
#[test]
fn test_location_to_index() {
let testcases = vec![
"SELECT * FROM t WHERE a = 1",
// start or end with newline
r"
SELECT *
FROM
t
WHERE a =
1
",
r"SELECT *
FROM
t
WHERE a =
1
",
r"
SELECT *
FROM
t
WHERE a =
1",
];
for sql in testcases {
let mut parser = ParserContext::new(&GreptimeDbDialect {}, sql).unwrap();
loop {
let token = parser.parser.next_token();
if token == Token::EOF {
break;
}
let span = token.span;
let subslice =
&sql[location_to_index(sql, &span.start)..location_to_index(sql, &span.end)];
assert_eq!(token.to_string(), subslice);
}
}
}
}