Skip to main content

query/dist_plan/
predicate_extractor.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Predicate extraction for partition pruning.
16//!
17//! [`PredicateExtractor`] extract a list of [`PartitionExpr`] from given [`LogicalPlan`].
18
19use std::collections::HashSet;
20
21use arrow::datatypes::DataType;
22use common_telemetry::debug;
23use datafusion_common::Result as DfResult;
24use datafusion_expr::{Expr, LogicalPlan, Operator};
25use datatypes::value::Value;
26use partition::expr::{Operand, PartitionExpr, RestrictedOp};
27
28/// Extracts a list of [`PartitionExpr`] from given [`LogicalPlan`]
29pub struct PredicateExtractor;
30
31impl PredicateExtractor {
32    /// Extract partition expressions for partition columns from logical plan  
33    /// This method returns PartitionExpr objects suitable for ConstraintPruner
34    pub fn extract_partition_expressions(
35        plan: &LogicalPlan,
36        partition_columns: &[String],
37    ) -> DfResult<Vec<PartitionExpr>> {
38        // Collect all filter expressions from the logical plan
39        let mut filter_exprs = Vec::new();
40        Self::collect_filter_expressions(plan, &mut filter_exprs)?;
41
42        if filter_exprs.is_empty() {
43            return Ok(Vec::new());
44        }
45
46        // Convert each DataFusion filter expression to PartitionExpr
47        let mut partition_exprs = Vec::with_capacity(filter_exprs.len());
48        let partition_set: HashSet<String> = partition_columns.iter().cloned().collect();
49
50        for filter_expr in filter_exprs {
51            match DataFusionExprConverter::convert(&filter_expr) {
52                Ok(partition_expr) => {
53                    // Check expression for safe partition pruning
54                    match ExpressionChecker::check_expression_for_pruning(
55                        &partition_expr,
56                        &partition_set,
57                    ) {
58                        ExpressionCheckResult::UseAsIs(expr) => {
59                            partition_exprs.push(expr);
60                        }
61                        ExpressionCheckResult::UsePartial(exprs) => {
62                            partition_exprs.extend(exprs);
63                        }
64                        ExpressionCheckResult::Drop => {
65                            debug!(
66                                "Dropping mixed expression for correctness: {}",
67                                partition_expr
68                            );
69                        }
70                    }
71                }
72                Err(err) => {
73                    debug!(
74                        "Failed to convert filter expression to PartitionExpr: {}, skipping",
75                        err
76                    );
77                    continue;
78                }
79            }
80        }
81
82        debug!(
83            "Extracted {} partition expressions from logical plan for partition columns: {:?}",
84            partition_exprs.len(),
85            partition_columns
86        );
87
88        Ok(partition_exprs)
89    }
90
91    /// Collect all filter expressions from a logical plan.
92    ///
93    /// Besides explicit [`LogicalPlan::Filter`] nodes, this must also collect
94    /// predicates already stored in [`LogicalPlan::TableScan`] filters. The
95    /// distributed planner runs a focused DataFusion `PushDownFilter` pass
96    /// before `MergeScan` wrapping, so partition predicates may no longer exist
97    /// as standalone `Filter` nodes by the time region pruning calls this
98    /// extractor. If we ignored `TableScan.filters`, region pruning would miss
99    /// predicates that were successfully pushed down for scan-level pruning.
100    fn collect_filter_expressions(plan: &LogicalPlan, expressions: &mut Vec<Expr>) -> DfResult<()> {
101        if let LogicalPlan::Filter(filter) = plan {
102            expressions.push(filter.predicate.clone());
103        }
104
105        // Collect filters that DataFusion's PushDownFilter stored in TableScan.
106        // `TableScan.filters` is conjunctive: DataFusion passes scan filters as
107        // a list but the table scan must satisfy all of them. Preserve that AND
108        // semantics for partition pruning instead of returning the filters as
109        // independent top-level expressions.
110        if let LogicalPlan::TableScan(table_scan) = plan
111            && let Some(expr) = Self::conjunction(table_scan.filters.iter().cloned())
112        {
113            expressions.push(expr);
114        }
115
116        // Recursively visit children
117        for child in plan.inputs() {
118            Self::collect_filter_expressions(child, expressions)?;
119        }
120
121        // TODO(ruihang): support plans involve multiple relations.
122        if plan.inputs().len() > 1 {
123            expressions.clear();
124        }
125
126        Ok(())
127    }
128
129    fn conjunction(mut expressions: impl Iterator<Item = Expr>) -> Option<Expr> {
130        let first = expressions.next()?;
131        Some(expressions.fold(first, |acc, expr| acc.and(expr)))
132    }
133}
134
135/// Result of analyzing an expression for partition pruning safety
136#[derive(Debug, Clone)]
137enum ExpressionCheckResult {
138    /// Expression is safe to use as-is for pruning (only involves partition columns)
139    UseAsIs(PartitionExpr),
140    /// Extract only these parts for AND expressions (mixed with non-partition columns)
141    UsePartial(Vec<PartitionExpr>),
142    /// Drop the entire expression (unsafe for pruning, e.g., OR with non-partition columns)
143    Drop,
144}
145
146/// Checks expressions to determine safe pruning strategies for mixed partition/non-partition expressions
147struct ExpressionChecker;
148
149impl ExpressionChecker {
150    /// Check a partition expression to determine how it should be handled for safe pruning
151    fn check_expression_for_pruning(
152        expr: &PartitionExpr,
153        partition_columns: &HashSet<String>,
154    ) -> ExpressionCheckResult {
155        match expr.op() {
156            RestrictedOp::And => {
157                // For AND expressions, we can extract only the partition-related parts
158                // because if any part fails, the entire AND fails
159                let mut partition_constraints = Vec::new();
160                Self::extract_and_constraints(expr, partition_columns, &mut partition_constraints);
161
162                if partition_constraints.is_empty() {
163                    ExpressionCheckResult::Drop
164                } else if Self::expr_only_involves_partition_columns(expr, partition_columns) {
165                    // If the entire expression only involves partition columns, use as-is
166                    ExpressionCheckResult::UseAsIs(expr.clone())
167                } else {
168                    // Mixed expression: return only the partition parts
169                    ExpressionCheckResult::UsePartial(partition_constraints)
170                }
171            }
172            RestrictedOp::Or => {
173                // For OR expressions, we can only use them if ALL branches involve only partition columns
174                // because if any branch involves non-partition columns, we cannot safely prune
175                if Self::expr_only_involves_partition_columns(expr, partition_columns) {
176                    ExpressionCheckResult::UseAsIs(expr.clone())
177                } else {
178                    // Mixed OR expression: must drop entirely for safety
179                    ExpressionCheckResult::Drop
180                }
181            }
182            _ => {
183                // For comparison operations (=, <, >, etc.), check if they only involve partition columns
184                if Self::expr_only_involves_partition_columns(expr, partition_columns) {
185                    ExpressionCheckResult::UseAsIs(expr.clone())
186                } else {
187                    ExpressionCheckResult::Drop
188                }
189            }
190        }
191    }
192
193    /// Extract partition-related constraints from AND expressions recursively
194    fn extract_and_constraints(
195        expr: &PartitionExpr,
196        partition_columns: &HashSet<String>,
197        result: &mut Vec<PartitionExpr>,
198    ) {
199        if let RestrictedOp::And = expr.op() {
200            // Recursively process both sides of AND
201            Self::extract_constraints_from_operand(expr.lhs(), partition_columns, result);
202            Self::extract_constraints_from_operand(expr.rhs(), partition_columns, result);
203        } else {
204            // Non-AND expression: check if it involves partition columns
205            if Self::expr_only_involves_partition_columns(expr, partition_columns) {
206                result.push(expr.clone());
207            }
208        }
209    }
210
211    /// Extract constraints from an operand (which might be a column, value, or nested expression)
212    fn extract_constraints_from_operand(
213        operand: &Operand,
214        partition_columns: &HashSet<String>,
215        result: &mut Vec<PartitionExpr>,
216    ) {
217        match operand {
218            Operand::Column(_) | Operand::Value(_) => {
219                // This shouldn't happen in well-formed expressions
220            }
221            Operand::Expr(expr) => {
222                Self::extract_and_constraints(expr, partition_columns, result);
223            }
224        }
225    }
226
227    /// Check if an expression involves ONLY partition columns
228    fn expr_only_involves_partition_columns(
229        expr: &PartitionExpr,
230        partition_columns: &HashSet<String>,
231    ) -> bool {
232        Self::operand_only_involves_partition_columns(expr.lhs(), partition_columns)
233            && Self::operand_only_involves_partition_columns(expr.rhs(), partition_columns)
234    }
235
236    /// Check if an operand involves ONLY partition columns or values
237    fn operand_only_involves_partition_columns(
238        operand: &Operand,
239        partition_columns: &HashSet<String>,
240    ) -> bool {
241        match operand {
242            Operand::Column(col) => partition_columns.contains(col),
243            Operand::Value(_) => true, // Values are always safe
244            Operand::Expr(expr) => {
245                Self::expr_only_involves_partition_columns(expr, partition_columns)
246            }
247        }
248    }
249}
250
251/// Converts DataFusion expressions to PartitionExpr
252struct DataFusionExprConverter;
253
254impl DataFusionExprConverter {
255    /// Convert DataFusion Expr to PartitionExpr
256    pub fn convert(expr: &Expr) -> DfResult<PartitionExpr> {
257        match expr {
258            Expr::BinaryExpr(binary_expr) => {
259                let lhs = Self::convert_to_operand(&binary_expr.left)?;
260                let rhs = Self::convert_to_operand(&binary_expr.right)?;
261                let op = Self::convert_operator(&binary_expr.op)?;
262
263                Ok(PartitionExpr::new(lhs, op, rhs))
264            }
265            Expr::InList(inlist_expr) => {
266                // Convert col IN (val1, val2, val3) to col = val1 OR col = val2 OR col = val3
267                // Handle negation: col NOT IN (val1, val2) to col != val1 AND col != val2
268                let column_operand = Self::convert_to_operand(&inlist_expr.expr)?;
269
270                if inlist_expr.list.is_empty() {
271                    return Err(datafusion_common::DataFusionError::Plan(
272                        "InList with empty list is not supported".to_string(),
273                    ));
274                }
275
276                let op = if inlist_expr.negated {
277                    RestrictedOp::NotEq
278                } else {
279                    RestrictedOp::Eq
280                };
281
282                let connector_op = if inlist_expr.negated {
283                    RestrictedOp::And // NOT IN becomes col != val1 AND col != val2
284                } else {
285                    RestrictedOp::Or // IN becomes col = val1 OR col = val2
286                };
287
288                // Convert each value in the list to an equality/inequality expression
289                let mut expressions = Vec::new();
290                for value_expr in &inlist_expr.list {
291                    let value_operand = Self::convert_to_operand(value_expr)?;
292                    expressions.push(PartitionExpr::new(
293                        column_operand.clone(),
294                        op.clone(),
295                        value_operand,
296                    ));
297                }
298
299                // Chain expressions with OR/AND
300                let mut expr_iter = expressions.into_iter();
301                let mut result = expr_iter.next().unwrap();
302                for expr in expr_iter {
303                    result = PartitionExpr::new(
304                        Operand::Expr(result),
305                        connector_op.clone(),
306                        Operand::Expr(expr),
307                    );
308                }
309
310                Ok(result)
311            }
312            Expr::Between(between_expr) => {
313                // Convert col BETWEEN low AND high to col >= low AND col <= high
314                // Handle negation: col NOT BETWEEN low AND high to col < low OR col > high
315                let column_operand = Self::convert_to_operand(&between_expr.expr)?;
316                let low_operand = Self::convert_to_operand(&between_expr.low)?;
317                let high_operand = Self::convert_to_operand(&between_expr.high)?;
318
319                if between_expr.negated {
320                    // NOT BETWEEN: col < low OR col > high
321                    let left_expr =
322                        PartitionExpr::new(column_operand.clone(), RestrictedOp::Lt, low_operand);
323                    let right_expr =
324                        PartitionExpr::new(column_operand, RestrictedOp::Gt, high_operand);
325                    Ok(PartitionExpr::new(
326                        Operand::Expr(left_expr),
327                        RestrictedOp::Or,
328                        Operand::Expr(right_expr),
329                    ))
330                } else {
331                    // BETWEEN: col >= low AND col <= high
332                    let left_expr =
333                        PartitionExpr::new(column_operand.clone(), RestrictedOp::GtEq, low_operand);
334                    let right_expr =
335                        PartitionExpr::new(column_operand, RestrictedOp::LtEq, high_operand);
336                    Ok(PartitionExpr::new(
337                        Operand::Expr(left_expr),
338                        RestrictedOp::And,
339                        Operand::Expr(right_expr),
340                    ))
341                }
342            }
343            Expr::IsNull(expr) => {
344                // Convert col IS NULL to a PartitionExpr
345                let column_operand = Self::convert_to_operand(expr)?;
346                Ok(PartitionExpr::new(
347                    column_operand,
348                    RestrictedOp::Eq,
349                    Operand::Value(Value::Null),
350                ))
351            }
352            Expr::IsNotNull(expr) => {
353                // Convert col IS NOT NULL to a PartitionExpr
354                let column_operand = Self::convert_to_operand(expr)?;
355                Ok(PartitionExpr::new(
356                    column_operand,
357                    RestrictedOp::NotEq,
358                    Operand::Value(Value::Null),
359                ))
360            }
361            Expr::Not(expr) => {
362                // Handle NOT expressions by inverting the inner expression
363                match expr.as_ref() {
364                    Expr::BinaryExpr(binary_expr) => {
365                        let lhs = Self::convert_to_operand(&binary_expr.left)?;
366                        let rhs = Self::convert_to_operand(&binary_expr.right)?;
367                        let inverted_op = Self::invert_operator(&binary_expr.op)?;
368
369                        Ok(PartitionExpr::new(lhs, inverted_op, rhs))
370                    }
371                    Expr::IsNull(inner_expr) => {
372                        // NOT (col IS NULL) becomes col IS NOT NULL
373                        let column_operand = Self::convert_to_operand(inner_expr)?;
374                        Ok(PartitionExpr::new(
375                            column_operand,
376                            RestrictedOp::NotEq,
377                            Operand::Value(Value::Null),
378                        ))
379                    }
380                    Expr::IsNotNull(inner_expr) => {
381                        // NOT (col IS NOT NULL) becomes col IS NULL
382                        let column_operand = Self::convert_to_operand(inner_expr)?;
383                        Ok(PartitionExpr::new(
384                            column_operand,
385                            RestrictedOp::Eq,
386                            Operand::Value(Value::Null),
387                        ))
388                    }
389                    _ => {
390                        debug!(
391                            "Unsupported NOT expression for partition pruning: {:?}",
392                            expr
393                        );
394                        Err(datafusion_common::DataFusionError::Plan(format!(
395                            "NOT expression with inner type {:?} not supported for partition pruning",
396                            expr
397                        )))
398                    }
399                }
400            }
401            _ => Err(datafusion_common::DataFusionError::Plan(format!(
402                "Unsupported expression type for conversion: {:?}",
403                expr
404            ))),
405        }
406    }
407
408    /// Convert DataFusion Expr to Operand
409    fn convert_to_operand(expr: &Expr) -> DfResult<Operand> {
410        match expr {
411            Expr::Column(col) => {
412                // Handle qualified column names (table.column) by extracting just the column name
413                // For partition pruning, we typically only care about the column name itself
414                let column_name = if let Some(relation) = &col.relation {
415                    debug!(
416                        "Using qualified column reference: {}.{}",
417                        relation, col.name
418                    );
419                    col.name.clone()
420                } else {
421                    col.name.clone()
422                };
423                Ok(Operand::Column(column_name))
424            }
425            Expr::Literal(scalar_value, _) => {
426                let value = Value::try_from(scalar_value.clone()).unwrap();
427                Ok(Operand::Value(value))
428            }
429            Expr::Alias(alias_expr) => {
430                // Unwrap alias to get the actual expression
431                Self::convert_to_operand(&alias_expr.expr)
432            }
433            Expr::Cast(cast_expr) => {
434                // For safe casts, unwrap to the inner expression
435                // For unsafe casts, skip with debug logging
436                if Self::is_safe_cast_for_partition_pruning(&cast_expr.data_type) {
437                    Self::convert_to_operand(&cast_expr.expr)
438                } else {
439                    debug!(
440                        "Skipping unsafe cast for partition pruning: {:?}",
441                        cast_expr.data_type
442                    );
443                    Err(datafusion_common::DataFusionError::Plan(format!(
444                        "Cast to {:?} not supported for partition pruning",
445                        cast_expr.data_type
446                    )))
447                }
448            }
449            other => {
450                let partition_expr = Self::convert(other)?;
451                Ok(Operand::Expr(partition_expr))
452            }
453        }
454    }
455
456    /// Convert DataFusion Operator to RestrictedOp
457    fn convert_operator(op: &Operator) -> DfResult<RestrictedOp> {
458        match op {
459            Operator::Eq => Ok(RestrictedOp::Eq),
460            Operator::NotEq => Ok(RestrictedOp::NotEq),
461            Operator::Lt => Ok(RestrictedOp::Lt),
462            Operator::LtEq => Ok(RestrictedOp::LtEq),
463            Operator::Gt => Ok(RestrictedOp::Gt),
464            Operator::GtEq => Ok(RestrictedOp::GtEq),
465            Operator::And => Ok(RestrictedOp::And),
466            Operator::Or => Ok(RestrictedOp::Or),
467            _ => Err(datafusion_common::DataFusionError::Plan(format!(
468                "Unsupported operator: {:?}",
469                op
470            ))),
471        }
472    }
473
474    /// Invert a DataFusion Operator for NOT expressions
475    fn invert_operator(op: &Operator) -> DfResult<RestrictedOp> {
476        let Some(negated) = op.negate() else {
477            return Err(datafusion_common::DataFusionError::Plan(format!(
478                "Cannot invert operator: {:?}",
479                op
480            )));
481        };
482        Self::convert_operator(&negated)
483    }
484
485    /// Determine if a cast is safe for partition pruning
486    /// Safe casts don't change the logical meaning of constraints
487    fn is_safe_cast_for_partition_pruning(data_type: &DataType) -> bool {
488        match data_type {
489            // Integer widening casts are generally safe
490            DataType::Int8 => true,
491            DataType::Int16 => true,
492            DataType::Int32 => true,
493            DataType::Int64 => true,
494            DataType::UInt8 => true,
495            DataType::UInt16 => true,
496            DataType::UInt32 => true,
497            DataType::UInt64 => true,
498
499            // Float casts are generally safe for equality/inequality comparisons
500            DataType::Float32 => true,
501            DataType::Float64 => true,
502
503            // String casts might be safe in some cases
504            DataType::Utf8 => true,
505            DataType::LargeUtf8 => true,
506
507            // Date/time casts might be safe if they don't change precision significantly
508            DataType::Date32 => true,
509            DataType::Date64 => true,
510            DataType::Timestamp(_, _) => true,
511
512            // Boolean casts are straightforward
513            DataType::Boolean => true,
514
515            // For other types, be conservative and skip
516            _ => false,
517        }
518    }
519}
520
521#[cfg(test)]
522mod tests {
523    use std::sync::Arc;
524
525    use datafusion::arrow::datatypes::{DataType, Field, Schema};
526    use datafusion::common::Column;
527    use datafusion::datasource::DefaultTableSource;
528    use datafusion_expr::{LogicalPlanBuilder, col, lit};
529    use datatypes::value::Value;
530    use partition::expr::{Operand, PartitionExpr, RestrictedOp};
531
532    use super::*;
533
534    fn create_test_table_scan() -> LogicalPlan {
535        let schema = Arc::new(Schema::new(vec![
536            Field::new(
537                "timestamp",
538                DataType::Timestamp(datafusion::arrow::datatypes::TimeUnit::Millisecond, None),
539                false,
540            ),
541            Field::new("user_id", DataType::Int64, false),
542            Field::new("value", DataType::Int64, false),
543        ]));
544
545        let empty_table = datafusion::datasource::empty::EmptyTable::new(schema);
546        let table_source = Arc::new(DefaultTableSource::new(Arc::new(empty_table)));
547
548        LogicalPlanBuilder::scan("test", table_source, None)
549            .unwrap()
550            .build()
551            .unwrap()
552    }
553
554    struct FilterTestCase {
555        name: &'static str,
556        filter_expr: Expr,
557        expected_partition_exprs: Vec<PartitionExpr>,
558        partition_columns: Vec<&'static str>,
559    }
560
561    impl FilterTestCase {
562        fn new(
563            name: &'static str,
564            filter_expr: Expr,
565            expected_partition_exprs: Vec<PartitionExpr>,
566            partition_columns: Vec<&'static str>,
567        ) -> Self {
568            Self {
569                name,
570                filter_expr,
571                expected_partition_exprs,
572                partition_columns,
573            }
574        }
575    }
576
577    /// Helper to check partition expressions for a set of test cases.
578    fn check_partition_expressions(cases: Vec<FilterTestCase>) {
579        for case in cases {
580            let table_scan = create_test_table_scan();
581            let filter = case.filter_expr.clone();
582
583            let plan = LogicalPlanBuilder::from(table_scan)
584                .filter(filter)
585                .unwrap()
586                .build()
587                .unwrap();
588
589            let partition_columns: Vec<String> = case
590                .partition_columns
591                .iter()
592                .map(|s| s.to_string())
593                .collect();
594            let partition_exprs =
595                PredicateExtractor::extract_partition_expressions(&plan, &partition_columns)
596                    .unwrap();
597            let expected = case.expected_partition_exprs.clone();
598            assert_eq!(
599                partition_exprs, expected,
600                "Test case '{}': expected partition expressions {:?}, got {:?}",
601                case.name, expected, partition_exprs
602            );
603        }
604    }
605
606    #[test]
607    fn test_extracts_table_scan_filters() {
608        let table_scan = create_test_table_scan();
609        let filter = col("user_id").gt_eq(lit(100i64));
610        let LogicalPlan::TableScan(scan) = table_scan else {
611            panic!("expected test table scan");
612        };
613        let plan = LogicalPlan::TableScan(datafusion_expr::logical_plan::TableScan {
614            filters: vec![filter],
615            ..scan
616        });
617
618        let partition_exprs =
619            PredicateExtractor::extract_partition_expressions(&plan, &["user_id".to_string()])
620                .unwrap();
621
622        assert_eq!(
623            partition_exprs,
624            vec![PartitionExpr::new(
625                Operand::Column("user_id".to_string()),
626                RestrictedOp::GtEq,
627                Operand::Value(Value::Int64(100)),
628            )]
629        );
630    }
631
632    #[test]
633    fn test_combines_table_scan_filters_as_conjunction() {
634        let table_scan = create_test_table_scan();
635        let filter_a = col("user_id").eq(lit(10i64));
636        let filter_b = col("value").eq(lit(20i64));
637        let LogicalPlan::TableScan(scan) = table_scan else {
638            panic!("expected test table scan");
639        };
640        let plan = LogicalPlan::TableScan(datafusion_expr::logical_plan::TableScan {
641            filters: vec![filter_a, filter_b],
642            ..scan
643        });
644
645        let partition_exprs = PredicateExtractor::extract_partition_expressions(
646            &plan,
647            &["user_id".to_string(), "value".to_string()],
648        )
649        .unwrap();
650
651        assert_eq!(
652            partition_exprs,
653            vec![PartitionExpr::new(
654                Operand::Expr(PartitionExpr::new(
655                    Operand::Column("user_id".to_string()),
656                    RestrictedOp::Eq,
657                    Operand::Value(Value::Int64(10)),
658                )),
659                RestrictedOp::And,
660                Operand::Expr(PartitionExpr::new(
661                    Operand::Column("value".to_string()),
662                    RestrictedOp::Eq,
663                    Operand::Value(Value::Int64(20)),
664                )),
665            )]
666        );
667    }
668
669    #[test]
670    fn test_basic_constraints_extraction() {
671        let cases = vec![
672            FilterTestCase::new(
673                "non_partition_column_ignored",
674                col("value").gt_eq(lit(100i64)),
675                vec![],
676                vec!["user_id"],
677            ),
678            FilterTestCase::new(
679                "simple_constraint",
680                col("user_id").gt_eq(lit(100i64)),
681                vec![PartitionExpr::new(
682                    Operand::Column("user_id".to_string()),
683                    RestrictedOp::GtEq,
684                    Operand::Value(Value::Int64(100)),
685                )],
686                vec!["user_id"],
687            ),
688            FilterTestCase::new(
689                "or_expression",
690                col("user_id")
691                    .eq(lit(100i64))
692                    .or(col("user_id").eq(lit(200i64))),
693                vec![PartitionExpr::new(
694                    Operand::Expr(PartitionExpr::new(
695                        Operand::Column("user_id".to_string()),
696                        RestrictedOp::Eq,
697                        Operand::Value(Value::Int64(100)),
698                    )),
699                    RestrictedOp::Or,
700                    Operand::Expr(PartitionExpr::new(
701                        Operand::Column("user_id".to_string()),
702                        RestrictedOp::Eq,
703                        Operand::Value(Value::Int64(200)),
704                    )),
705                )],
706                vec!["user_id"],
707            ),
708            FilterTestCase::new(
709                "complex_and_or",
710                col("user_id")
711                    .gt_eq(lit(100i64))
712                    .and(col("user_id").lt(lit(200i64)))
713                    .or(col("user_id")
714                        .gt_eq(lit(300i64))
715                        .and(col("user_id").lt(lit(400i64)))),
716                vec![PartitionExpr::new(
717                    Operand::Expr(PartitionExpr::new(
718                        Operand::Expr(PartitionExpr::new(
719                            Operand::Column("user_id".to_string()),
720                            RestrictedOp::GtEq,
721                            Operand::Value(Value::Int64(100)),
722                        )),
723                        RestrictedOp::And,
724                        Operand::Expr(PartitionExpr::new(
725                            Operand::Column("user_id".to_string()),
726                            RestrictedOp::Lt,
727                            Operand::Value(Value::Int64(200)),
728                        )),
729                    )),
730                    RestrictedOp::Or,
731                    Operand::Expr(PartitionExpr::new(
732                        Operand::Expr(PartitionExpr::new(
733                            Operand::Column("user_id".to_string()),
734                            RestrictedOp::GtEq,
735                            Operand::Value(Value::Int64(300)),
736                        )),
737                        RestrictedOp::And,
738                        Operand::Expr(PartitionExpr::new(
739                            Operand::Column("user_id".to_string()),
740                            RestrictedOp::Lt,
741                            Operand::Value(Value::Int64(400)),
742                        )),
743                    )),
744                )],
745                vec!["user_id"],
746            ),
747        ];
748        check_partition_expressions(cases);
749    }
750
751    #[test]
752    fn test_alias_expressions() {
753        let cases = vec![
754            FilterTestCase::new(
755                "simple_alias",
756                col("user_id").alias("uid").eq(lit(100i64)),
757                vec![PartitionExpr::new(
758                    Operand::Column("user_id".to_string()),
759                    RestrictedOp::Eq,
760                    Operand::Value(Value::Int64(100)),
761                )],
762                vec!["user_id"],
763            ),
764            FilterTestCase::new(
765                "nested_alias",
766                col("user_id").alias("uid").alias("u").gt_eq(lit(50i64)),
767                vec![PartitionExpr::new(
768                    Operand::Column("user_id".to_string()),
769                    RestrictedOp::GtEq,
770                    Operand::Value(Value::Int64(50)),
771                )],
772                vec!["user_id"],
773            ),
774            FilterTestCase::new(
775                "complex_alias_with_and_or",
776                col("user_id")
777                    .alias("uid")
778                    .gt_eq(lit(100i64))
779                    .and(col("user_id").alias("u").lt(lit(200i64)))
780                    .or(col("user_id").alias("id").eq(lit(300i64))),
781                vec![PartitionExpr::new(
782                    Operand::Expr(PartitionExpr::new(
783                        Operand::Expr(PartitionExpr::new(
784                            Operand::Column("user_id".to_string()),
785                            RestrictedOp::GtEq,
786                            Operand::Value(Value::Int64(100)),
787                        )),
788                        RestrictedOp::And,
789                        Operand::Expr(PartitionExpr::new(
790                            Operand::Column("user_id".to_string()),
791                            RestrictedOp::Lt,
792                            Operand::Value(Value::Int64(200)),
793                        )),
794                    )),
795                    RestrictedOp::Or,
796                    Operand::Expr(PartitionExpr::new(
797                        Operand::Column("user_id".to_string()),
798                        RestrictedOp::Eq,
799                        Operand::Value(Value::Int64(300)),
800                    )),
801                )],
802                vec!["user_id"],
803            ),
804        ];
805        check_partition_expressions(cases);
806    }
807
808    #[test]
809    fn test_inlist_expressions() {
810        let cases = vec![
811            FilterTestCase::new(
812                "simple_inlist",
813                col("user_id").in_list(vec![lit(100i64), lit(200i64), lit(300i64)], false),
814                vec![PartitionExpr::new(
815                    Operand::Expr(PartitionExpr::new(
816                        Operand::Expr(PartitionExpr::new(
817                            Operand::Column("user_id".to_string()),
818                            RestrictedOp::Eq,
819                            Operand::Value(Value::Int64(100)),
820                        )),
821                        RestrictedOp::Or,
822                        Operand::Expr(PartitionExpr::new(
823                            Operand::Column("user_id".to_string()),
824                            RestrictedOp::Eq,
825                            Operand::Value(Value::Int64(200)),
826                        )),
827                    )),
828                    RestrictedOp::Or,
829                    Operand::Expr(PartitionExpr::new(
830                        Operand::Column("user_id".to_string()),
831                        RestrictedOp::Eq,
832                        Operand::Value(Value::Int64(300)),
833                    )),
834                )],
835                vec!["user_id"],
836            ),
837            FilterTestCase::new(
838                "negated_inlist",
839                col("user_id").in_list(vec![lit(100i64), lit(200i64)], true),
840                vec![PartitionExpr::new(
841                    Operand::Expr(PartitionExpr::new(
842                        Operand::Column("user_id".to_string()),
843                        RestrictedOp::NotEq,
844                        Operand::Value(Value::Int64(100)),
845                    )),
846                    RestrictedOp::And,
847                    Operand::Expr(PartitionExpr::new(
848                        Operand::Column("user_id".to_string()),
849                        RestrictedOp::NotEq,
850                        Operand::Value(Value::Int64(200)),
851                    )),
852                )],
853                vec!["user_id"],
854            ),
855            FilterTestCase::new(
856                "inlist_with_alias",
857                col("user_id")
858                    .alias("uid")
859                    .in_list(vec![lit(100i64), lit(200i64)], false),
860                vec![PartitionExpr::new(
861                    Operand::Expr(PartitionExpr::new(
862                        Operand::Column("user_id".to_string()),
863                        RestrictedOp::Eq,
864                        Operand::Value(Value::Int64(100)),
865                    )),
866                    RestrictedOp::Or,
867                    Operand::Expr(PartitionExpr::new(
868                        Operand::Column("user_id".to_string()),
869                        RestrictedOp::Eq,
870                        Operand::Value(Value::Int64(200)),
871                    )),
872                )],
873                vec!["user_id"],
874            ),
875        ];
876        check_partition_expressions(cases);
877    }
878
879    #[test]
880    fn test_between_expressions() {
881        let cases = vec![
882            FilterTestCase::new(
883                "simple_between",
884                col("user_id").between(lit(100i64), lit(200i64)),
885                vec![PartitionExpr::new(
886                    Operand::Expr(PartitionExpr::new(
887                        Operand::Column("user_id".to_string()),
888                        RestrictedOp::GtEq,
889                        Operand::Value(Value::Int64(100)),
890                    )),
891                    RestrictedOp::And,
892                    Operand::Expr(PartitionExpr::new(
893                        Operand::Column("user_id".to_string()),
894                        RestrictedOp::LtEq,
895                        Operand::Value(Value::Int64(200)),
896                    )),
897                )],
898                vec!["user_id"],
899            ),
900            FilterTestCase::new(
901                "negated_between",
902                Expr::Between(datafusion_expr::Between {
903                    expr: Box::new(col("user_id")),
904                    negated: true,
905                    low: Box::new(lit(100i64)),
906                    high: Box::new(lit(200i64)),
907                }),
908                vec![PartitionExpr::new(
909                    Operand::Expr(PartitionExpr::new(
910                        Operand::Column("user_id".to_string()),
911                        RestrictedOp::Lt,
912                        Operand::Value(Value::Int64(100)),
913                    )),
914                    RestrictedOp::Or,
915                    Operand::Expr(PartitionExpr::new(
916                        Operand::Column("user_id".to_string()),
917                        RestrictedOp::Gt,
918                        Operand::Value(Value::Int64(200)),
919                    )),
920                )],
921                vec!["user_id"],
922            ),
923            FilterTestCase::new(
924                "between_with_alias",
925                col("user_id")
926                    .alias("uid")
927                    .between(lit(100i64), lit(200i64)),
928                vec![PartitionExpr::new(
929                    Operand::Expr(PartitionExpr::new(
930                        Operand::Column("user_id".to_string()),
931                        RestrictedOp::GtEq,
932                        Operand::Value(Value::Int64(100)),
933                    )),
934                    RestrictedOp::And,
935                    Operand::Expr(PartitionExpr::new(
936                        Operand::Column("user_id".to_string()),
937                        RestrictedOp::LtEq,
938                        Operand::Value(Value::Int64(200)),
939                    )),
940                )],
941                vec!["user_id"],
942            ),
943        ];
944        check_partition_expressions(cases);
945    }
946
947    #[test]
948    fn test_null_expressions() {
949        let cases = vec![
950            FilterTestCase::new(
951                "is_null",
952                col("user_id").is_null(),
953                vec![PartitionExpr::new(
954                    Operand::Column("user_id".to_string()),
955                    RestrictedOp::Eq,
956                    Operand::Value(Value::Null),
957                )],
958                vec!["user_id"],
959            ),
960            FilterTestCase::new(
961                "is_not_null",
962                col("user_id").is_not_null(),
963                vec![PartitionExpr::new(
964                    Operand::Column("user_id".to_string()),
965                    RestrictedOp::NotEq,
966                    Operand::Value(Value::Null),
967                )],
968                vec!["user_id"],
969            ),
970            FilterTestCase::new(
971                "null_with_alias",
972                col("user_id").alias("uid").is_null(),
973                vec![PartitionExpr::new(
974                    Operand::Column("user_id".to_string()),
975                    RestrictedOp::Eq,
976                    Operand::Value(Value::Null),
977                )],
978                vec!["user_id"],
979            ),
980        ];
981        check_partition_expressions(cases);
982    }
983
984    #[test]
985    fn test_cast_expressions() {
986        let cases = vec![
987            FilterTestCase::new(
988                "safe_cast",
989                Expr::Cast(datafusion_expr::Cast {
990                    expr: Box::new(col("user_id")),
991                    data_type: DataType::Int64,
992                })
993                .eq(lit(100i64)),
994                vec![PartitionExpr::new(
995                    Operand::Column("user_id".to_string()),
996                    RestrictedOp::Eq,
997                    Operand::Value(Value::Int64(100)),
998                )],
999                vec!["user_id"],
1000            ),
1001            FilterTestCase::new(
1002                "cast_with_alias",
1003                Expr::Cast(datafusion_expr::Cast {
1004                    expr: Box::new(col("user_id").alias("uid")),
1005                    data_type: DataType::Int64,
1006                })
1007                .eq(lit(100i64)),
1008                vec![PartitionExpr::new(
1009                    Operand::Column("user_id".to_string()),
1010                    RestrictedOp::Eq,
1011                    Operand::Value(Value::Int64(100)),
1012                )],
1013                vec!["user_id"],
1014            ),
1015            FilterTestCase::new(
1016                "unsafe_cast",
1017                Expr::Cast(datafusion_expr::Cast {
1018                    expr: Box::new(col("user_id")),
1019                    data_type: DataType::List(std::sync::Arc::new(
1020                        datafusion::arrow::datatypes::Field::new("item", DataType::Int32, true),
1021                    )),
1022                })
1023                .eq(lit(100i64)),
1024                vec![],
1025                vec!["user_id"],
1026            ),
1027        ];
1028        check_partition_expressions(cases);
1029    }
1030
1031    #[test]
1032    fn test_not_expressions() {
1033        let cases = vec![
1034            FilterTestCase::new(
1035                "not_equality",
1036                Expr::Not(Box::new(col("user_id").eq(lit(100i64)))),
1037                vec![PartitionExpr::new(
1038                    Operand::Column("user_id".to_string()),
1039                    RestrictedOp::NotEq,
1040                    Operand::Value(Value::Int64(100)),
1041                )],
1042                vec!["user_id"],
1043            ),
1044            FilterTestCase::new(
1045                "not_comparison",
1046                Expr::Not(Box::new(col("user_id").lt(lit(100i64)))),
1047                vec![PartitionExpr::new(
1048                    Operand::Column("user_id".to_string()),
1049                    RestrictedOp::GtEq,
1050                    Operand::Value(Value::Int64(100)),
1051                )],
1052                vec!["user_id"],
1053            ),
1054            FilterTestCase::new(
1055                "not_is_null",
1056                Expr::Not(Box::new(col("user_id").is_null())),
1057                vec![PartitionExpr::new(
1058                    Operand::Column("user_id".to_string()),
1059                    RestrictedOp::NotEq,
1060                    Operand::Value(Value::Null),
1061                )],
1062                vec!["user_id"],
1063            ),
1064            FilterTestCase::new(
1065                "not_with_alias",
1066                Expr::Not(Box::new(col("user_id").alias("uid").eq(lit(100i64)))),
1067                vec![PartitionExpr::new(
1068                    Operand::Column("user_id".to_string()),
1069                    RestrictedOp::NotEq,
1070                    Operand::Value(Value::Int64(100)),
1071                )],
1072                vec!["user_id"],
1073            ),
1074        ];
1075        check_partition_expressions(cases);
1076    }
1077
1078    #[test]
1079    fn test_edge_cases() {
1080        let cases = vec![
1081            FilterTestCase::new(
1082                "qualified_column_name",
1083                {
1084                    let qualified_col = Expr::Column(Column::new(Some("test"), "user_id"));
1085                    qualified_col.eq(lit(100i64))
1086                },
1087                vec![PartitionExpr::new(
1088                    Operand::Column("user_id".to_string()),
1089                    RestrictedOp::Eq,
1090                    Operand::Value(Value::Int64(100)),
1091                )],
1092                vec!["user_id"],
1093            ),
1094            FilterTestCase::new(
1095                "comprehensive_combinations",
1096                {
1097                    let in_expr = col("user_id")
1098                        .alias("uid")
1099                        .in_list(vec![lit(100i64), lit(200i64)], false);
1100                    let cast_expr = Expr::Cast(datafusion_expr::Cast {
1101                        expr: Box::new(col("user_id")),
1102                        data_type: DataType::Int64,
1103                    });
1104                    let between_expr = cast_expr.between(lit(300i64), lit(400i64));
1105                    in_expr.or(between_expr)
1106                },
1107                vec![PartitionExpr::new(
1108                    Operand::Expr(PartitionExpr::new(
1109                        Operand::Expr(PartitionExpr::new(
1110                            Operand::Column("user_id".to_string()),
1111                            RestrictedOp::Eq,
1112                            Operand::Value(Value::Int64(100)),
1113                        )),
1114                        RestrictedOp::Or,
1115                        Operand::Expr(PartitionExpr::new(
1116                            Operand::Column("user_id".to_string()),
1117                            RestrictedOp::Eq,
1118                            Operand::Value(Value::Int64(200)),
1119                        )),
1120                    )),
1121                    RestrictedOp::Or,
1122                    Operand::Expr(PartitionExpr::new(
1123                        Operand::Expr(PartitionExpr::new(
1124                            Operand::Column("user_id".to_string()),
1125                            RestrictedOp::GtEq,
1126                            Operand::Value(Value::Int64(300)),
1127                        )),
1128                        RestrictedOp::And,
1129                        Operand::Expr(PartitionExpr::new(
1130                            Operand::Column("user_id".to_string()),
1131                            RestrictedOp::LtEq,
1132                            Operand::Value(Value::Int64(400)),
1133                        )),
1134                    )),
1135                )],
1136                vec!["user_id"],
1137            ),
1138        ];
1139        check_partition_expressions(cases);
1140    }
1141
1142    #[test]
1143    fn test_mixed_partition_non_partition_expressions() {
1144        let cases = vec![
1145            // Mixed AND expression - should extract only partition part
1146            FilterTestCase::new(
1147                "mixed_and_expression",
1148                col("user_id")
1149                    .eq(lit(100i64))
1150                    .and(col("value").gt(lit(50i64))),
1151                vec![PartitionExpr::new(
1152                    Operand::Column("user_id".to_string()),
1153                    RestrictedOp::Eq,
1154                    Operand::Value(Value::Int64(100)),
1155                )],
1156                vec!["user_id"],
1157            ),
1158            // Mixed OR expression - should be dropped entirely
1159            FilterTestCase::new(
1160                "mixed_or_expression",
1161                col("user_id")
1162                    .between(lit(1i64), lit(10i64))
1163                    .or(col("value").gt(lit(50i64))),
1164                vec![], // Empty result - expression should be dropped
1165                vec!["user_id"],
1166            ),
1167            // Complex mixed AND expression with multiple parts
1168            FilterTestCase::new(
1169                "complex_mixed_and",
1170                col("user_id")
1171                    .gt_eq(lit(100i64))
1172                    .and(col("value").eq(lit(200i64)))
1173                    .and(col("timestamp").lt(lit(1000i64))),
1174                vec![
1175                    PartitionExpr::new(
1176                        Operand::Column("user_id".to_string()),
1177                        RestrictedOp::GtEq,
1178                        Operand::Value(Value::Int64(100)),
1179                    ),
1180                    PartitionExpr::new(
1181                        Operand::Column("timestamp".to_string()),
1182                        RestrictedOp::Lt,
1183                        Operand::Value(Value::Int64(1000)),
1184                    ),
1185                ],
1186                vec!["user_id", "timestamp"], // Both partition columns
1187            ),
1188            // Pure partition expression - should be kept as-is
1189            FilterTestCase::new(
1190                "pure_partition_and",
1191                col("user_id")
1192                    .gt_eq(lit(100i64))
1193                    .and(col("timestamp").lt(lit(1000i64))),
1194                vec![PartitionExpr::new(
1195                    Operand::Expr(PartitionExpr::new(
1196                        Operand::Column("user_id".to_string()),
1197                        RestrictedOp::GtEq,
1198                        Operand::Value(Value::Int64(100)),
1199                    )),
1200                    RestrictedOp::And,
1201                    Operand::Expr(PartitionExpr::new(
1202                        Operand::Column("timestamp".to_string()),
1203                        RestrictedOp::Lt,
1204                        Operand::Value(Value::Int64(1000)),
1205                    )),
1206                )],
1207                vec!["user_id", "timestamp"],
1208            ),
1209            // Pure partition OR expression - should be kept as-is
1210            FilterTestCase::new(
1211                "pure_partition_or",
1212                col("user_id")
1213                    .eq(lit(100i64))
1214                    .or(col("user_id").eq(lit(200i64))),
1215                vec![PartitionExpr::new(
1216                    Operand::Expr(PartitionExpr::new(
1217                        Operand::Column("user_id".to_string()),
1218                        RestrictedOp::Eq,
1219                        Operand::Value(Value::Int64(100)),
1220                    )),
1221                    RestrictedOp::Or,
1222                    Operand::Expr(PartitionExpr::new(
1223                        Operand::Column("user_id".to_string()),
1224                        RestrictedOp::Eq,
1225                        Operand::Value(Value::Int64(200)),
1226                    )),
1227                )],
1228                vec!["user_id"],
1229            ),
1230            // Pure non-partition expression - should be dropped
1231            FilterTestCase::new(
1232                "pure_non_partition",
1233                col("value").gt_eq(lit(100i64)),
1234                vec![], // Empty result - no partition columns involved
1235                vec!["user_id"],
1236            ),
1237            // Complex nested mixed expression
1238            FilterTestCase::new(
1239                "nested_mixed_expression",
1240                (col("user_id")
1241                    .eq(lit(100i64))
1242                    .and(col("value").gt(lit(50i64))))
1243                .or(col("user_id").eq(lit(200i64))),
1244                vec![], // Empty result - OR with mixed sub-expression should be dropped
1245                vec!["user_id"],
1246            ),
1247            // AND with nested OR (mixed) - should extract partition parts only
1248            FilterTestCase::new(
1249                "and_with_nested_mixed_or",
1250                col("user_id")
1251                    .gt_eq(lit(100i64))
1252                    .and(col("value").eq(lit(1i64)).or(col("value").eq(lit(2i64)))),
1253                vec![PartitionExpr::new(
1254                    Operand::Column("user_id".to_string()),
1255                    RestrictedOp::GtEq,
1256                    Operand::Value(Value::Int64(100)),
1257                )],
1258                vec!["user_id"],
1259            ),
1260        ];
1261        check_partition_expressions(cases);
1262    }
1263}