1use std::any::Any;
16use std::borrow::Cow;
17use std::collections::{HashMap, HashSet};
18use std::str::FromStr;
19use std::sync::Arc;
20
21use arrow_schema::DataType;
22use async_trait::async_trait;
23use catalog::table_source::DfTableSourceProvider;
24use common_error::ext::BoxedError;
25use common_telemetry::tracing;
26use datafusion::common::{DFSchema, plan_err};
27use datafusion::execution::context::SessionState;
28use datafusion::sql::planner::PlannerContext;
29use datafusion_common::ToDFSchema;
30use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
31use datafusion_expr::expr::{Exists, InSubquery};
32use datafusion_expr::{
33 Analyze, Explain, ExplainFormat, Expr as DfExpr, LogicalPlan, LogicalPlanBuilder, PlanType,
34 ToStringifiedPlan, col,
35};
36use datafusion_sql::planner::{ParserOptions, SqlToRel};
37use log_query::LogQuery;
38use promql_parser::parser::EvalStmt;
39use session::context::QueryContextRef;
40use snafu::{ResultExt, ensure};
41use sql::CteContent;
42use sql::ast::Expr as SqlExpr;
43use sql::statements::explain::ExplainStatement;
44use sql::statements::query::Query;
45use sql::statements::statement::Statement;
46use sql::statements::tql::Tql;
47
48use crate::error::{
49 CteColumnSchemaMismatchSnafu, PlanSqlSnafu, QueryPlanSnafu, Result, SqlSnafu,
50 UnimplementedSnafu,
51};
52use crate::log_query::planner::LogQueryPlanner;
53use crate::parser::{DEFAULT_LOOKBACK_STRING, PromQuery, QueryLanguageParser, QueryStatement};
54use crate::promql::planner::PromPlanner;
55use crate::query_engine::{DefaultPlanDecoder, QueryEngineState};
56use crate::range_select::plan_rewrite::RangePlanRewriter;
57use crate::{DfContextProviderAdapter, QueryEngineContext};
58
59#[async_trait]
60pub trait LogicalPlanner: Send + Sync {
61 async fn plan(&self, stmt: &QueryStatement, query_ctx: QueryContextRef) -> Result<LogicalPlan>;
62
63 async fn plan_logs_query(
64 &self,
65 query: LogQuery,
66 query_ctx: QueryContextRef,
67 ) -> Result<LogicalPlan>;
68
69 fn optimize(&self, plan: LogicalPlan) -> Result<LogicalPlan>;
70
71 fn as_any(&self) -> &dyn Any;
72}
73
74pub struct DfLogicalPlanner {
75 engine_state: Arc<QueryEngineState>,
76 session_state: SessionState,
77}
78
79impl DfLogicalPlanner {
80 pub fn new(engine_state: Arc<QueryEngineState>) -> Self {
81 let session_state = engine_state.session_state();
82 Self {
83 engine_state,
84 session_state,
85 }
86 }
87
88 async fn explain_to_plan(
91 &self,
92 explain: &ExplainStatement,
93 query_ctx: QueryContextRef,
94 ) -> Result<LogicalPlan> {
95 let plan = self.plan_sql(&explain.statement, query_ctx).await?;
96 if matches!(plan, LogicalPlan::Explain(_)) {
97 return plan_err!("Nested EXPLAINs are not supported").context(PlanSqlSnafu);
98 }
99
100 let verbose = explain.verbose;
101 let analyze = explain.analyze;
102 let format = explain.format.map(|f| f.to_string());
103
104 let plan = Arc::new(plan);
105 let schema = LogicalPlan::explain_schema();
106 let schema = ToDFSchema::to_dfschema_ref(schema)?;
107
108 if verbose && format.is_some() {
109 return plan_err!("EXPLAIN VERBOSE with FORMAT is not supported").context(PlanSqlSnafu);
110 }
111
112 if analyze {
113 Ok(LogicalPlan::Analyze(Analyze {
115 verbose,
116 input: plan,
117 schema,
118 }))
119 } else {
120 let stringified_plans = vec![plan.to_stringified(PlanType::InitialLogicalPlan)];
121
122 let options = self.session_state.config().options();
124 let format = format
125 .map(|x| ExplainFormat::from_str(&x))
126 .transpose()?
127 .unwrap_or_else(|| options.explain.format.clone());
128
129 Ok(LogicalPlan::Explain(Explain {
130 verbose,
131 explain_format: format,
132 plan,
133 stringified_plans,
134 schema,
135 logical_optimization_succeeded: false,
136 }))
137 }
138 }
139
140 #[tracing::instrument(skip_all)]
141 #[async_recursion::async_recursion]
142 async fn plan_sql(&self, stmt: &Statement, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
143 let mut planner_context = PlannerContext::new();
144 let mut stmt = Cow::Borrowed(stmt);
145 let mut is_tql_cte = false;
146
147 if let Statement::Explain(explain) = stmt.as_ref() {
149 return self.explain_to_plan(explain, query_ctx).await;
150 }
151
152 if self.has_hybrid_ctes(stmt.as_ref()) {
154 let stmt_owned = stmt.into_owned();
155 let mut query = match stmt_owned {
156 Statement::Query(query) => query.as_ref().clone(),
157 _ => unreachable!("has_hybrid_ctes should only return true for Query statements"),
158 };
159 self.plan_query_with_hybrid_ctes(&query, query_ctx.clone(), &mut planner_context)
160 .await?;
161
162 query.hybrid_cte = None;
164 stmt = Cow::Owned(Statement::Query(Box::new(query)));
165 is_tql_cte = true;
166 }
167
168 let mut df_stmt = stmt.as_ref().try_into().context(SqlSnafu)?;
169
170 if let datafusion::sql::parser::Statement::Statement(
172 box datafusion::sql::sqlparser::ast::Statement::Explain { .. },
173 ) = &mut df_stmt
174 {
175 UnimplementedSnafu {
176 operation: "EXPLAIN with FORMAT using raw datafusion planner",
177 }
178 .fail()?;
179 }
180
181 let table_provider = DfTableSourceProvider::new(
182 self.engine_state.catalog_manager().clone(),
183 self.engine_state.disallow_cross_catalog_query(),
184 query_ctx.clone(),
185 Arc::new(DefaultPlanDecoder::new(
186 self.session_state.clone(),
187 &query_ctx,
188 )?),
189 self.session_state
190 .config_options()
191 .sql_parser
192 .enable_ident_normalization,
193 );
194
195 let context_provider = DfContextProviderAdapter::try_new(
196 self.engine_state.clone(),
197 self.session_state.clone(),
198 Some(&df_stmt),
199 query_ctx.clone(),
200 )
201 .await?;
202
203 let config_options = self.session_state.config().options();
204 let parser_options = &config_options.sql_parser;
205 let parser_options = ParserOptions {
206 map_string_types_to_utf8view: false,
207 ..parser_options.into()
208 };
209
210 let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options);
211
212 let result = if is_tql_cte {
214 let Statement::Query(query) = stmt.into_owned() else {
215 unreachable!("is_tql_cte should only be true for Query statements");
216 };
217 let sqlparser_stmt = sqlparser::ast::Statement::Query(Box::new(query.inner));
218 sql_to_rel
219 .sql_statement_to_plan_with_context(sqlparser_stmt, &mut planner_context)
220 .context(PlanSqlSnafu)?
221 } else {
222 sql_to_rel
223 .statement_to_plan(df_stmt)
224 .context(PlanSqlSnafu)?
225 };
226
227 common_telemetry::debug!("Logical planner, statement to plan result: {result}");
228 let plan = RangePlanRewriter::new(table_provider, query_ctx.clone())
229 .rewrite(result)
230 .await?;
231
232 let context = QueryEngineContext::new(self.session_state.clone(), query_ctx);
234 let plan = self
235 .engine_state
236 .optimize_by_extension_rules(plan, &context)?;
237 common_telemetry::debug!("Logical planner, optimize result: {plan}");
238
239 Ok(plan)
240 }
241
242 #[tracing::instrument(skip_all)]
244 pub(crate) async fn sql_to_expr(
245 &self,
246 sql: SqlExpr,
247 schema: &DFSchema,
248 normalize_ident: bool,
249 query_ctx: QueryContextRef,
250 ) -> Result<DfExpr> {
251 let context_provider = DfContextProviderAdapter::try_new(
252 self.engine_state.clone(),
253 self.session_state.clone(),
254 None,
255 query_ctx,
256 )
257 .await?;
258
259 let config_options = self.session_state.config().options();
260 let parser_options = &config_options.sql_parser;
261 let parser_options: ParserOptions = ParserOptions {
262 map_string_types_to_utf8view: false,
263 enable_ident_normalization: normalize_ident,
264 ..parser_options.into()
265 };
266
267 let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options);
268
269 Ok(sql_to_rel.sql_to_expr(sql, schema, &mut PlannerContext::new())?)
270 }
271
272 #[tracing::instrument(skip_all)]
273 async fn plan_pql(&self, stmt: &EvalStmt, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
274 let plan_decoder = Arc::new(DefaultPlanDecoder::new(
275 self.session_state.clone(),
276 &query_ctx,
277 )?);
278 let table_provider = DfTableSourceProvider::new(
279 self.engine_state.catalog_manager().clone(),
280 self.engine_state.disallow_cross_catalog_query(),
281 query_ctx,
282 plan_decoder,
283 self.session_state
284 .config_options()
285 .sql_parser
286 .enable_ident_normalization,
287 );
288 PromPlanner::stmt_to_plan(table_provider, stmt, &self.engine_state)
289 .await
290 .map_err(BoxedError::new)
291 .context(QueryPlanSnafu)
292 }
293
294 #[tracing::instrument(skip_all)]
295 fn optimize_logical_plan(&self, plan: LogicalPlan) -> Result<LogicalPlan> {
296 Ok(self.engine_state.optimize_logical_plan(plan)?)
297 }
298
299 fn has_hybrid_ctes(&self, stmt: &Statement) -> bool {
301 if let Statement::Query(query) = stmt {
302 query
303 .hybrid_cte
304 .as_ref()
305 .map(|hybrid_cte| !hybrid_cte.cte_tables.is_empty())
306 .unwrap_or(false)
307 } else {
308 false
309 }
310 }
311
312 async fn plan_query_with_hybrid_ctes(
314 &self,
315 query: &Query,
316 query_ctx: QueryContextRef,
317 planner_context: &mut PlannerContext,
318 ) -> Result<()> {
319 let hybrid_cte = query.hybrid_cte.as_ref().unwrap();
320
321 for cte in &hybrid_cte.cte_tables {
322 match &cte.content {
323 CteContent::Tql(tql) => {
324 let mut logical_plan = self.tql_to_logical_plan(tql, query_ctx.clone()).await?;
326 if !cte.columns.is_empty() {
327 let schema = logical_plan.schema();
328 let schema_fields = schema.fields().to_vec();
329 ensure!(
330 schema_fields.len() == cte.columns.len(),
331 CteColumnSchemaMismatchSnafu {
332 cte_name: cte.name.value.clone(),
333 original: schema_fields
334 .iter()
335 .map(|field| field.name().clone())
336 .collect::<Vec<_>>(),
337 expected: cte
338 .columns
339 .iter()
340 .map(|column| column.to_string())
341 .collect::<Vec<_>>(),
342 }
343 );
344 let aliases = cte
345 .columns
346 .iter()
347 .zip(schema_fields.iter())
348 .map(|(column, field)| col(field.name()).alias(column.to_string()));
349 logical_plan = LogicalPlanBuilder::from(logical_plan)
350 .project(aliases)
351 .context(PlanSqlSnafu)?
352 .build()
353 .context(PlanSqlSnafu)?;
354 }
355
356 logical_plan = LogicalPlan::SubqueryAlias(
358 datafusion_expr::SubqueryAlias::try_new(
359 Arc::new(logical_plan),
360 cte.name.value.clone(),
361 )
362 .context(PlanSqlSnafu)?,
363 );
364
365 planner_context.insert_cte(&cte.name.value, logical_plan);
366 }
367 CteContent::Sql(_) => {
368 unreachable!("SQL CTEs should not be in hybrid_cte.cte_tables");
371 }
372 }
373 }
374
375 Ok(())
376 }
377
378 async fn tql_to_logical_plan(
380 &self,
381 tql: &Tql,
382 query_ctx: QueryContextRef,
383 ) -> Result<LogicalPlan> {
384 match tql {
385 Tql::Eval(eval) => {
386 let prom_query = PromQuery {
388 query: eval.query.clone(),
389 start: eval.start.clone(),
390 end: eval.end.clone(),
391 step: eval.step.clone(),
392 lookback: eval
393 .lookback
394 .clone()
395 .unwrap_or_else(|| DEFAULT_LOOKBACK_STRING.to_string()),
396 alias: eval.alias.clone(),
397 };
398 let stmt = QueryLanguageParser::parse_promql(&prom_query, &query_ctx)?;
399
400 self.plan(&stmt, query_ctx).await
401 }
402 Tql::Explain(_) => UnimplementedSnafu {
403 operation: "TQL EXPLAIN in CTEs",
404 }
405 .fail(),
406 Tql::Analyze(_) => UnimplementedSnafu {
407 operation: "TQL ANALYZE in CTEs",
408 }
409 .fail(),
410 }
411 }
412
413 fn extract_placeholder_cast_types(
423 plan: &LogicalPlan,
424 ) -> Result<HashMap<String, Option<DataType>>> {
425 let mut placeholder_types = HashMap::new();
426 let mut casted_placeholders = HashSet::new();
427
428 Self::extract_from_plan(plan, &mut placeholder_types, &mut casted_placeholders)?;
429
430 Ok(placeholder_types)
431 }
432
433 fn extract_from_plan(
434 plan: &LogicalPlan,
435 placeholder_types: &mut HashMap<String, Option<DataType>>,
436 casted_placeholders: &mut HashSet<String>,
437 ) -> Result<()> {
438 plan.apply(|node| {
439 for expr in node.expressions() {
440 let _ = expr.apply(|e| {
441 if let DfExpr::Cast(cast) = e
443 && let DfExpr::Placeholder(ph) = &*cast.expr
444 {
445 placeholder_types.insert(ph.id.clone(), Some(cast.data_type.clone()));
446 casted_placeholders.insert(ph.id.clone());
447 }
448
449 if let DfExpr::Placeholder(ph) = e
451 && !casted_placeholders.contains(&ph.id)
452 && !placeholder_types.contains_key(&ph.id)
453 {
454 placeholder_types.insert(ph.id.clone(), None);
455 }
456
457 match e {
459 DfExpr::Exists(Exists { subquery, .. })
460 | DfExpr::InSubquery(InSubquery { subquery, .. })
461 | DfExpr::ScalarSubquery(subquery) => {
462 Self::extract_from_plan(
463 &subquery.subquery,
464 placeholder_types,
465 casted_placeholders,
466 )?;
467 }
468 _ => {}
469 }
470
471 Ok(TreeNodeRecursion::Continue)
472 });
473 }
474 Ok(TreeNodeRecursion::Continue)
475 })?;
476 Ok(())
477 }
478
479 pub fn get_inferred_parameter_types(
493 plan: &LogicalPlan,
494 ) -> Result<HashMap<String, Option<DataType>>> {
495 let param_types = plan.get_parameter_types().context(PlanSqlSnafu)?;
496
497 let has_none = param_types.values().any(|v| v.is_none());
498
499 if !has_none {
500 Ok(param_types)
501 } else {
502 let cast_types = Self::extract_placeholder_cast_types(plan)?;
503
504 let mut merged = param_types;
505
506 for (id, opt_type) in cast_types {
507 merged
508 .entry(id)
509 .and_modify(|existing| {
510 if existing.is_none() {
511 *existing = opt_type.clone();
512 }
513 })
514 .or_insert(opt_type);
515 }
516
517 Ok(merged)
518 }
519 }
520}
521
522#[async_trait]
523impl LogicalPlanner for DfLogicalPlanner {
524 #[tracing::instrument(skip_all)]
525 async fn plan(&self, stmt: &QueryStatement, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
526 match stmt {
527 QueryStatement::Sql(stmt) => self.plan_sql(stmt, query_ctx).await,
528 QueryStatement::Promql(stmt, _alias) => self.plan_pql(stmt, query_ctx).await,
529 }
530 }
531
532 async fn plan_logs_query(
533 &self,
534 query: LogQuery,
535 query_ctx: QueryContextRef,
536 ) -> Result<LogicalPlan> {
537 let plan_decoder = Arc::new(DefaultPlanDecoder::new(
538 self.session_state.clone(),
539 &query_ctx,
540 )?);
541 let table_provider = DfTableSourceProvider::new(
542 self.engine_state.catalog_manager().clone(),
543 self.engine_state.disallow_cross_catalog_query(),
544 query_ctx,
545 plan_decoder,
546 self.session_state
547 .config_options()
548 .sql_parser
549 .enable_ident_normalization,
550 );
551
552 let mut planner = LogQueryPlanner::new(table_provider, self.session_state.clone());
553 planner
554 .query_to_plan(query)
555 .await
556 .map_err(BoxedError::new)
557 .context(QueryPlanSnafu)
558 }
559
560 fn optimize(&self, plan: LogicalPlan) -> Result<LogicalPlan> {
561 self.optimize_logical_plan(plan)
562 }
563
564 fn as_any(&self) -> &dyn Any {
565 self
566 }
567}
568
569#[cfg(test)]
570mod tests {
571 use std::sync::Arc;
572
573 use arrow_schema::DataType;
574 use datatypes::prelude::ConcreteDataType;
575 use datatypes::schema::{ColumnSchema, Schema};
576 use session::context::QueryContext;
577 use table::metadata::{TableInfoBuilder, TableMetaBuilder};
578 use table::test_util::EmptyTable;
579
580 use super::*;
581 use crate::QueryEngineRef;
582 use crate::parser::QueryLanguageParser;
583
584 async fn create_test_engine() -> QueryEngineRef {
585 let columns = vec![
586 ColumnSchema::new("id", ConcreteDataType::int32_datatype(), false),
587 ColumnSchema::new("name", ConcreteDataType::string_datatype(), true),
588 ];
589 let schema = Arc::new(Schema::new(columns));
590 let table_meta = TableMetaBuilder::empty()
591 .schema(schema)
592 .primary_key_indices(vec![0])
593 .value_indices(vec![1])
594 .next_column_id(1024)
595 .build()
596 .unwrap();
597 let table_info = TableInfoBuilder::new("test", table_meta).build().unwrap();
598 let table = EmptyTable::from_table_info(&table_info);
599
600 crate::tests::new_query_engine_with_table(table)
601 }
602
603 async fn parse_sql_to_plan(sql: &str) -> LogicalPlan {
604 let stmt = QueryLanguageParser::parse_sql(sql, &QueryContext::arc()).unwrap();
605 let engine = create_test_engine().await;
606 engine
607 .planner()
608 .plan(&stmt, QueryContext::arc())
609 .await
610 .unwrap()
611 }
612
613 #[tokio::test]
614 async fn test_extract_placeholder_cast_types_multiple() {
615 let plan = parse_sql_to_plan(
616 "SELECT $1::INT, $2::TEXT, $3, $4::INTEGER FROM test WHERE $5::FLOAT > 0",
617 )
618 .await;
619 let types = DfLogicalPlanner::extract_placeholder_cast_types(&plan).unwrap();
620
621 assert_eq!(types.len(), 5);
622 assert_eq!(types.get("$1"), Some(&Some(DataType::Int32)));
623 assert_eq!(types.get("$2"), Some(&Some(DataType::Utf8)));
624 assert_eq!(types.get("$3"), Some(&None));
625 assert_eq!(types.get("$4"), Some(&Some(DataType::Int32)));
626 assert_eq!(types.get("$5"), Some(&Some(DataType::Float32)));
627 }
628
629 #[tokio::test]
630 async fn test_get_inferred_parameter_types_fallback_for_udf_args() {
631 let plan = parse_sql_to_plan(
633 "SELECT parse_ident($1), parse_ident($2::TEXT) FROM test WHERE id > $3",
634 )
635 .await;
636 let types = DfLogicalPlanner::get_inferred_parameter_types(&plan).unwrap();
637
638 assert_eq!(types.len(), 3);
639
640 let type_1 = types.get("$1").unwrap();
641 let type_2 = types.get("$2").unwrap();
642 let type_3 = types.get("$3").unwrap();
643
644 assert!(type_1.is_none(), "Expected $1 to be None");
645 assert_eq!(type_2, &Some(DataType::Utf8));
646 assert_eq!(type_3, &Some(DataType::Int32));
647 }
648
649 #[tokio::test]
650 async fn test_get_inferred_parameter_types_subquery() {
651 let plan = parse_sql_to_plan(
652 r#"SELECT * FROM test WHERE id = (SELECT id FROM test CROSS JOIN (SELECT parse_ident($1::TEXT) AS parts) p LIMIT 1)"#,
653 ).await;
654 let types = DfLogicalPlanner::get_inferred_parameter_types(&plan).unwrap();
655
656 assert_eq!(types.len(), 1);
657 let type_1 = types.get("$1").unwrap();
658 assert_eq!(type_1, &Some(DataType::Utf8));
659 }
660}