query/
planner.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::any::Any;
16use std::borrow::Cow;
17use std::collections::{HashMap, HashSet};
18use std::str::FromStr;
19use std::sync::Arc;
20
21use arrow_schema::DataType;
22use async_trait::async_trait;
23use catalog::table_source::DfTableSourceProvider;
24use common_error::ext::BoxedError;
25use common_telemetry::tracing;
26use datafusion::common::{DFSchema, plan_err};
27use datafusion::execution::context::SessionState;
28use datafusion::sql::planner::PlannerContext;
29use datafusion_common::ToDFSchema;
30use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
31use datafusion_expr::expr::{Exists, InSubquery};
32use datafusion_expr::{
33    Analyze, Explain, ExplainFormat, Expr as DfExpr, LogicalPlan, LogicalPlanBuilder, PlanType,
34    ToStringifiedPlan, col,
35};
36use datafusion_sql::planner::{ParserOptions, SqlToRel};
37use log_query::LogQuery;
38use promql_parser::parser::EvalStmt;
39use session::context::QueryContextRef;
40use snafu::{ResultExt, ensure};
41use sql::CteContent;
42use sql::ast::Expr as SqlExpr;
43use sql::statements::explain::ExplainStatement;
44use sql::statements::query::Query;
45use sql::statements::statement::Statement;
46use sql::statements::tql::Tql;
47
48use crate::error::{
49    CteColumnSchemaMismatchSnafu, PlanSqlSnafu, QueryPlanSnafu, Result, SqlSnafu,
50    UnimplementedSnafu,
51};
52use crate::log_query::planner::LogQueryPlanner;
53use crate::parser::{DEFAULT_LOOKBACK_STRING, PromQuery, QueryLanguageParser, QueryStatement};
54use crate::promql::planner::PromPlanner;
55use crate::query_engine::{DefaultPlanDecoder, QueryEngineState};
56use crate::range_select::plan_rewrite::RangePlanRewriter;
57use crate::{DfContextProviderAdapter, QueryEngineContext};
58
59#[async_trait]
60pub trait LogicalPlanner: Send + Sync {
61    async fn plan(&self, stmt: &QueryStatement, query_ctx: QueryContextRef) -> Result<LogicalPlan>;
62
63    async fn plan_logs_query(
64        &self,
65        query: LogQuery,
66        query_ctx: QueryContextRef,
67    ) -> Result<LogicalPlan>;
68
69    fn optimize(&self, plan: LogicalPlan) -> Result<LogicalPlan>;
70
71    fn as_any(&self) -> &dyn Any;
72}
73
74pub struct DfLogicalPlanner {
75    engine_state: Arc<QueryEngineState>,
76    session_state: SessionState,
77}
78
79impl DfLogicalPlanner {
80    pub fn new(engine_state: Arc<QueryEngineState>) -> Self {
81        let session_state = engine_state.session_state();
82        Self {
83            engine_state,
84            session_state,
85        }
86    }
87
88    /// Basically the same with `explain_to_plan` in DataFusion, but adapted to Greptime's
89    /// `plan_sql` to support Greptime Statements.
90    async fn explain_to_plan(
91        &self,
92        explain: &ExplainStatement,
93        query_ctx: QueryContextRef,
94    ) -> Result<LogicalPlan> {
95        let plan = self.plan_sql(&explain.statement, query_ctx).await?;
96        if matches!(plan, LogicalPlan::Explain(_)) {
97            return plan_err!("Nested EXPLAINs are not supported").context(PlanSqlSnafu);
98        }
99
100        let verbose = explain.verbose;
101        let analyze = explain.analyze;
102        let format = explain.format.map(|f| f.to_string());
103
104        let plan = Arc::new(plan);
105        let schema = LogicalPlan::explain_schema();
106        let schema = ToDFSchema::to_dfschema_ref(schema)?;
107
108        if verbose && format.is_some() {
109            return plan_err!("EXPLAIN VERBOSE with FORMAT is not supported").context(PlanSqlSnafu);
110        }
111
112        if analyze {
113            // notice format is already set in query context, so can be ignore here
114            Ok(LogicalPlan::Analyze(Analyze {
115                verbose,
116                input: plan,
117                schema,
118            }))
119        } else {
120            let stringified_plans = vec![plan.to_stringified(PlanType::InitialLogicalPlan)];
121
122            // default to configuration value
123            let options = self.session_state.config().options();
124            let format = format
125                .map(|x| ExplainFormat::from_str(&x))
126                .transpose()?
127                .unwrap_or_else(|| options.explain.format.clone());
128
129            Ok(LogicalPlan::Explain(Explain {
130                verbose,
131                explain_format: format,
132                plan,
133                stringified_plans,
134                schema,
135                logical_optimization_succeeded: false,
136            }))
137        }
138    }
139
140    #[tracing::instrument(skip_all)]
141    #[async_recursion::async_recursion]
142    async fn plan_sql(&self, stmt: &Statement, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
143        let mut planner_context = PlannerContext::new();
144        let mut stmt = Cow::Borrowed(stmt);
145        let mut is_tql_cte = false;
146
147        // handle explain before normal processing so we can explain Greptime Statements
148        if let Statement::Explain(explain) = stmt.as_ref() {
149            return self.explain_to_plan(explain, query_ctx).await;
150        }
151
152        // Check for hybrid CTEs before normal processing
153        if self.has_hybrid_ctes(stmt.as_ref()) {
154            let stmt_owned = stmt.into_owned();
155            let mut query = match stmt_owned {
156                Statement::Query(query) => query.as_ref().clone(),
157                _ => unreachable!("has_hybrid_ctes should only return true for Query statements"),
158            };
159            self.plan_query_with_hybrid_ctes(&query, query_ctx.clone(), &mut planner_context)
160                .await?;
161
162            // remove the processed TQL CTEs from the query
163            query.hybrid_cte = None;
164            stmt = Cow::Owned(Statement::Query(Box::new(query)));
165            is_tql_cte = true;
166        }
167
168        let mut df_stmt = stmt.as_ref().try_into().context(SqlSnafu)?;
169
170        // TODO(LFC): Remove this when Datafusion supports **both** the syntax and implementation of "explain with format".
171        if let datafusion::sql::parser::Statement::Statement(
172            box datafusion::sql::sqlparser::ast::Statement::Explain { .. },
173        ) = &mut df_stmt
174        {
175            UnimplementedSnafu {
176                operation: "EXPLAIN with FORMAT using raw datafusion planner",
177            }
178            .fail()?;
179        }
180
181        let table_provider = DfTableSourceProvider::new(
182            self.engine_state.catalog_manager().clone(),
183            self.engine_state.disallow_cross_catalog_query(),
184            query_ctx.clone(),
185            Arc::new(DefaultPlanDecoder::new(
186                self.session_state.clone(),
187                &query_ctx,
188            )?),
189            self.session_state
190                .config_options()
191                .sql_parser
192                .enable_ident_normalization,
193        );
194
195        let context_provider = DfContextProviderAdapter::try_new(
196            self.engine_state.clone(),
197            self.session_state.clone(),
198            Some(&df_stmt),
199            query_ctx.clone(),
200        )
201        .await?;
202
203        let config_options = self.session_state.config().options();
204        let parser_options = &config_options.sql_parser;
205        let parser_options = ParserOptions {
206            map_string_types_to_utf8view: false,
207            ..parser_options.into()
208        };
209
210        let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options);
211
212        // this IF is to handle different version of ASTs
213        let result = if is_tql_cte {
214            let Statement::Query(query) = stmt.into_owned() else {
215                unreachable!("is_tql_cte should only be true for Query statements");
216            };
217            let sqlparser_stmt = sqlparser::ast::Statement::Query(Box::new(query.inner));
218            sql_to_rel
219                .sql_statement_to_plan_with_context(sqlparser_stmt, &mut planner_context)
220                .context(PlanSqlSnafu)?
221        } else {
222            sql_to_rel
223                .statement_to_plan(df_stmt)
224                .context(PlanSqlSnafu)?
225        };
226
227        common_telemetry::debug!("Logical planner, statement to plan result: {result}");
228        let plan = RangePlanRewriter::new(table_provider, query_ctx.clone())
229            .rewrite(result)
230            .await?;
231
232        // Optimize logical plan by extension rules
233        let context = QueryEngineContext::new(self.session_state.clone(), query_ctx);
234        let plan = self
235            .engine_state
236            .optimize_by_extension_rules(plan, &context)?;
237        common_telemetry::debug!("Logical planner, optimize result: {plan}");
238
239        Ok(plan)
240    }
241
242    /// Generate a relational expression from a SQL expression
243    #[tracing::instrument(skip_all)]
244    pub(crate) async fn sql_to_expr(
245        &self,
246        sql: SqlExpr,
247        schema: &DFSchema,
248        normalize_ident: bool,
249        query_ctx: QueryContextRef,
250    ) -> Result<DfExpr> {
251        let context_provider = DfContextProviderAdapter::try_new(
252            self.engine_state.clone(),
253            self.session_state.clone(),
254            None,
255            query_ctx,
256        )
257        .await?;
258
259        let config_options = self.session_state.config().options();
260        let parser_options = &config_options.sql_parser;
261        let parser_options: ParserOptions = ParserOptions {
262            map_string_types_to_utf8view: false,
263            enable_ident_normalization: normalize_ident,
264            ..parser_options.into()
265        };
266
267        let sql_to_rel = SqlToRel::new_with_options(&context_provider, parser_options);
268
269        Ok(sql_to_rel.sql_to_expr(sql, schema, &mut PlannerContext::new())?)
270    }
271
272    #[tracing::instrument(skip_all)]
273    async fn plan_pql(&self, stmt: &EvalStmt, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
274        let plan_decoder = Arc::new(DefaultPlanDecoder::new(
275            self.session_state.clone(),
276            &query_ctx,
277        )?);
278        let table_provider = DfTableSourceProvider::new(
279            self.engine_state.catalog_manager().clone(),
280            self.engine_state.disallow_cross_catalog_query(),
281            query_ctx,
282            plan_decoder,
283            self.session_state
284                .config_options()
285                .sql_parser
286                .enable_ident_normalization,
287        );
288        PromPlanner::stmt_to_plan(table_provider, stmt, &self.engine_state)
289            .await
290            .map_err(BoxedError::new)
291            .context(QueryPlanSnafu)
292    }
293
294    #[tracing::instrument(skip_all)]
295    fn optimize_logical_plan(&self, plan: LogicalPlan) -> Result<LogicalPlan> {
296        Ok(self.engine_state.optimize_logical_plan(plan)?)
297    }
298
299    /// Check if a statement contains hybrid CTEs (mix of SQL and TQL)
300    fn has_hybrid_ctes(&self, stmt: &Statement) -> bool {
301        if let Statement::Query(query) = stmt {
302            query
303                .hybrid_cte
304                .as_ref()
305                .map(|hybrid_cte| !hybrid_cte.cte_tables.is_empty())
306                .unwrap_or(false)
307        } else {
308            false
309        }
310    }
311
312    /// Plan a query with hybrid CTEs using DataFusion's native PlannerContext
313    async fn plan_query_with_hybrid_ctes(
314        &self,
315        query: &Query,
316        query_ctx: QueryContextRef,
317        planner_context: &mut PlannerContext,
318    ) -> Result<()> {
319        let hybrid_cte = query.hybrid_cte.as_ref().unwrap();
320
321        for cte in &hybrid_cte.cte_tables {
322            match &cte.content {
323                CteContent::Tql(tql) => {
324                    // Plan TQL and register in PlannerContext
325                    let mut logical_plan = self.tql_to_logical_plan(tql, query_ctx.clone()).await?;
326                    if !cte.columns.is_empty() {
327                        let schema = logical_plan.schema();
328                        let schema_fields = schema.fields().to_vec();
329                        ensure!(
330                            schema_fields.len() == cte.columns.len(),
331                            CteColumnSchemaMismatchSnafu {
332                                cte_name: cte.name.value.clone(),
333                                original: schema_fields
334                                    .iter()
335                                    .map(|field| field.name().clone())
336                                    .collect::<Vec<_>>(),
337                                expected: cte
338                                    .columns
339                                    .iter()
340                                    .map(|column| column.to_string())
341                                    .collect::<Vec<_>>(),
342                            }
343                        );
344                        let aliases = cte
345                            .columns
346                            .iter()
347                            .zip(schema_fields.iter())
348                            .map(|(column, field)| col(field.name()).alias(column.to_string()));
349                        logical_plan = LogicalPlanBuilder::from(logical_plan)
350                            .project(aliases)
351                            .context(PlanSqlSnafu)?
352                            .build()
353                            .context(PlanSqlSnafu)?;
354                    }
355
356                    // Wrap in SubqueryAlias to ensure proper table qualification for CTE
357                    logical_plan = LogicalPlan::SubqueryAlias(
358                        datafusion_expr::SubqueryAlias::try_new(
359                            Arc::new(logical_plan),
360                            cte.name.value.clone(),
361                        )
362                        .context(PlanSqlSnafu)?,
363                    );
364
365                    planner_context.insert_cte(&cte.name.value, logical_plan);
366                }
367                CteContent::Sql(_) => {
368                    // SQL CTEs should have been moved to the main query's WITH clause
369                    // during parsing, so we shouldn't encounter them here
370                    unreachable!("SQL CTEs should not be in hybrid_cte.cte_tables");
371                }
372            }
373        }
374
375        Ok(())
376    }
377
378    /// Convert TQL to LogicalPlan directly
379    async fn tql_to_logical_plan(
380        &self,
381        tql: &Tql,
382        query_ctx: QueryContextRef,
383    ) -> Result<LogicalPlan> {
384        match tql {
385            Tql::Eval(eval) => {
386                // Convert TqlEval to PromQuery then to QueryStatement::Promql
387                let prom_query = PromQuery {
388                    query: eval.query.clone(),
389                    start: eval.start.clone(),
390                    end: eval.end.clone(),
391                    step: eval.step.clone(),
392                    lookback: eval
393                        .lookback
394                        .clone()
395                        .unwrap_or_else(|| DEFAULT_LOOKBACK_STRING.to_string()),
396                    alias: eval.alias.clone(),
397                };
398                let stmt = QueryLanguageParser::parse_promql(&prom_query, &query_ctx)?;
399
400                self.plan(&stmt, query_ctx).await
401            }
402            Tql::Explain(_) => UnimplementedSnafu {
403                operation: "TQL EXPLAIN in CTEs",
404            }
405            .fail(),
406            Tql::Analyze(_) => UnimplementedSnafu {
407                operation: "TQL ANALYZE in CTEs",
408            }
409            .fail(),
410        }
411    }
412
413    /// Extracts cast types for all placeholders in a logical plan.
414    /// Returns a map where each placeholder ID is mapped to:
415    /// - Some(DataType) if the placeholder is cast to a specific type
416    /// - None if the placeholder exists but has no cast
417    ///
418    /// Example: `$1::TEXT` returns `{"$1": Some(DataType::Utf8)}`
419    ///
420    /// This function walks through all expressions in the logical plan,
421    /// including subqueries, to identify placeholders and their cast types.
422    fn extract_placeholder_cast_types(
423        plan: &LogicalPlan,
424    ) -> Result<HashMap<String, Option<DataType>>> {
425        let mut placeholder_types = HashMap::new();
426        let mut casted_placeholders = HashSet::new();
427
428        Self::extract_from_plan(plan, &mut placeholder_types, &mut casted_placeholders)?;
429
430        Ok(placeholder_types)
431    }
432
433    fn extract_from_plan(
434        plan: &LogicalPlan,
435        placeholder_types: &mut HashMap<String, Option<DataType>>,
436        casted_placeholders: &mut HashSet<String>,
437    ) -> Result<()> {
438        plan.apply(|node| {
439            for expr in node.expressions() {
440                let _ = expr.apply(|e| {
441                    // Handle casted placeholders
442                    if let DfExpr::Cast(cast) = e
443                        && let DfExpr::Placeholder(ph) = &*cast.expr
444                    {
445                        placeholder_types.insert(ph.id.clone(), Some(cast.data_type.clone()));
446                        casted_placeholders.insert(ph.id.clone());
447                    }
448
449                    // Handle bare (non-casted) placeholders
450                    if let DfExpr::Placeholder(ph) = e
451                        && !casted_placeholders.contains(&ph.id)
452                        && !placeholder_types.contains_key(&ph.id)
453                    {
454                        placeholder_types.insert(ph.id.clone(), None);
455                    }
456
457                    // Recurse into subquery plans embedded in expressions
458                    match e {
459                        DfExpr::Exists(Exists { subquery, .. })
460                        | DfExpr::InSubquery(InSubquery { subquery, .. })
461                        | DfExpr::ScalarSubquery(subquery) => {
462                            Self::extract_from_plan(
463                                &subquery.subquery,
464                                placeholder_types,
465                                casted_placeholders,
466                            )?;
467                        }
468                        _ => {}
469                    }
470
471                    Ok(TreeNodeRecursion::Continue)
472                });
473            }
474            Ok(TreeNodeRecursion::Continue)
475        })?;
476        Ok(())
477    }
478
479    /// Gets inferred parameter types from a logical plan.
480    /// Returns a map where each parameter ID is mapped to:
481    /// - Some(DataType) if the parameter type could be inferred
482    /// - None if the parameter type could not be inferred
483    ///
484    /// This function first uses DataFusion's `get_parameter_types()` to infer types.
485    /// If any parameters have `None` values (i.e., DataFusion couldn't infer their types),
486    /// it falls back to using `extract_placeholder_cast_types()` to detect explicit casts.
487    ///
488    /// This is because datafusion can only infer types for a limited cases.
489    ///
490    /// Example: For query `WHERE $1::TEXT AND $2`, DataFusion may not infer `$2`'s type,
491    /// but this function will return `{"$1": Some(DataType::Utf8), "$2": None}`.
492    pub fn get_inferred_parameter_types(
493        plan: &LogicalPlan,
494    ) -> Result<HashMap<String, Option<DataType>>> {
495        let param_types = plan.get_parameter_types().context(PlanSqlSnafu)?;
496
497        let has_none = param_types.values().any(|v| v.is_none());
498
499        if !has_none {
500            Ok(param_types)
501        } else {
502            let cast_types = Self::extract_placeholder_cast_types(plan)?;
503
504            let mut merged = param_types;
505
506            for (id, opt_type) in cast_types {
507                merged
508                    .entry(id)
509                    .and_modify(|existing| {
510                        if existing.is_none() {
511                            *existing = opt_type.clone();
512                        }
513                    })
514                    .or_insert(opt_type);
515            }
516
517            Ok(merged)
518        }
519    }
520}
521
522#[async_trait]
523impl LogicalPlanner for DfLogicalPlanner {
524    #[tracing::instrument(skip_all)]
525    async fn plan(&self, stmt: &QueryStatement, query_ctx: QueryContextRef) -> Result<LogicalPlan> {
526        match stmt {
527            QueryStatement::Sql(stmt) => self.plan_sql(stmt, query_ctx).await,
528            QueryStatement::Promql(stmt, _alias) => self.plan_pql(stmt, query_ctx).await,
529        }
530    }
531
532    async fn plan_logs_query(
533        &self,
534        query: LogQuery,
535        query_ctx: QueryContextRef,
536    ) -> Result<LogicalPlan> {
537        let plan_decoder = Arc::new(DefaultPlanDecoder::new(
538            self.session_state.clone(),
539            &query_ctx,
540        )?);
541        let table_provider = DfTableSourceProvider::new(
542            self.engine_state.catalog_manager().clone(),
543            self.engine_state.disallow_cross_catalog_query(),
544            query_ctx,
545            plan_decoder,
546            self.session_state
547                .config_options()
548                .sql_parser
549                .enable_ident_normalization,
550        );
551
552        let mut planner = LogQueryPlanner::new(table_provider, self.session_state.clone());
553        planner
554            .query_to_plan(query)
555            .await
556            .map_err(BoxedError::new)
557            .context(QueryPlanSnafu)
558    }
559
560    fn optimize(&self, plan: LogicalPlan) -> Result<LogicalPlan> {
561        self.optimize_logical_plan(plan)
562    }
563
564    fn as_any(&self) -> &dyn Any {
565        self
566    }
567}
568
569#[cfg(test)]
570mod tests {
571    use std::sync::Arc;
572
573    use arrow_schema::DataType;
574    use datatypes::prelude::ConcreteDataType;
575    use datatypes::schema::{ColumnSchema, Schema};
576    use session::context::QueryContext;
577    use table::metadata::{TableInfoBuilder, TableMetaBuilder};
578    use table::test_util::EmptyTable;
579
580    use super::*;
581    use crate::QueryEngineRef;
582    use crate::parser::QueryLanguageParser;
583
584    async fn create_test_engine() -> QueryEngineRef {
585        let columns = vec![
586            ColumnSchema::new("id", ConcreteDataType::int32_datatype(), false),
587            ColumnSchema::new("name", ConcreteDataType::string_datatype(), true),
588        ];
589        let schema = Arc::new(Schema::new(columns));
590        let table_meta = TableMetaBuilder::empty()
591            .schema(schema)
592            .primary_key_indices(vec![0])
593            .value_indices(vec![1])
594            .next_column_id(1024)
595            .build()
596            .unwrap();
597        let table_info = TableInfoBuilder::new("test", table_meta).build().unwrap();
598        let table = EmptyTable::from_table_info(&table_info);
599
600        crate::tests::new_query_engine_with_table(table)
601    }
602
603    async fn parse_sql_to_plan(sql: &str) -> LogicalPlan {
604        let stmt = QueryLanguageParser::parse_sql(sql, &QueryContext::arc()).unwrap();
605        let engine = create_test_engine().await;
606        engine
607            .planner()
608            .plan(&stmt, QueryContext::arc())
609            .await
610            .unwrap()
611    }
612
613    #[tokio::test]
614    async fn test_extract_placeholder_cast_types_multiple() {
615        let plan = parse_sql_to_plan(
616            "SELECT $1::INT, $2::TEXT, $3, $4::INTEGER FROM test WHERE $5::FLOAT > 0",
617        )
618        .await;
619        let types = DfLogicalPlanner::extract_placeholder_cast_types(&plan).unwrap();
620
621        assert_eq!(types.len(), 5);
622        assert_eq!(types.get("$1"), Some(&Some(DataType::Int32)));
623        assert_eq!(types.get("$2"), Some(&Some(DataType::Utf8)));
624        assert_eq!(types.get("$3"), Some(&None));
625        assert_eq!(types.get("$4"), Some(&Some(DataType::Int32)));
626        assert_eq!(types.get("$5"), Some(&Some(DataType::Float32)));
627    }
628
629    #[tokio::test]
630    async fn test_get_inferred_parameter_types_fallback_for_udf_args() {
631        // datafusion is not able to infer type for scalar function arguments
632        let plan = parse_sql_to_plan(
633            "SELECT parse_ident($1), parse_ident($2::TEXT) FROM test WHERE id > $3",
634        )
635        .await;
636        let types = DfLogicalPlanner::get_inferred_parameter_types(&plan).unwrap();
637
638        assert_eq!(types.len(), 3);
639
640        let type_1 = types.get("$1").unwrap();
641        let type_2 = types.get("$2").unwrap();
642        let type_3 = types.get("$3").unwrap();
643
644        assert!(type_1.is_none(), "Expected $1 to be None");
645        assert_eq!(type_2, &Some(DataType::Utf8));
646        assert_eq!(type_3, &Some(DataType::Int32));
647    }
648
649    #[tokio::test]
650    async fn test_get_inferred_parameter_types_subquery() {
651        let plan = parse_sql_to_plan(
652            r#"SELECT * FROM test WHERE id = (SELECT id FROM test CROSS JOIN (SELECT parse_ident($1::TEXT) AS parts) p LIMIT 1)"#,
653        ).await;
654        let types = DfLogicalPlanner::get_inferred_parameter_types(&plan).unwrap();
655
656        assert_eq!(types.len(), 1);
657        let type_1 = types.get("$1").unwrap();
658        assert_eq!(type_1, &Some(DataType::Utf8));
659    }
660}