1use std::collections::HashSet;
20
21use arrow::datatypes::DataType;
22use common_telemetry::debug;
23use datafusion_common::Result as DfResult;
24use datafusion_expr::{Expr, LogicalPlan, Operator};
25use datatypes::value::Value;
26use partition::expr::{Operand, PartitionExpr, RestrictedOp};
27
28pub struct PredicateExtractor;
30
31impl PredicateExtractor {
32 pub fn extract_partition_expressions(
35 plan: &LogicalPlan,
36 partition_columns: &[String],
37 ) -> DfResult<Vec<PartitionExpr>> {
38 let mut filter_exprs = Vec::new();
40 Self::collect_filter_expressions(plan, &mut filter_exprs)?;
41
42 if filter_exprs.is_empty() {
43 return Ok(Vec::new());
44 }
45
46 let mut partition_exprs = Vec::with_capacity(filter_exprs.len());
48 let partition_set: HashSet<String> = partition_columns.iter().cloned().collect();
49
50 for filter_expr in filter_exprs {
51 match DataFusionExprConverter::convert(&filter_expr) {
52 Ok(partition_expr) => {
53 match ExpressionChecker::check_expression_for_pruning(
55 &partition_expr,
56 &partition_set,
57 ) {
58 ExpressionCheckResult::UseAsIs(expr) => {
59 partition_exprs.push(expr);
60 }
61 ExpressionCheckResult::UsePartial(exprs) => {
62 partition_exprs.extend(exprs);
63 }
64 ExpressionCheckResult::Drop => {
65 debug!(
66 "Dropping mixed expression for correctness: {}",
67 partition_expr
68 );
69 }
70 }
71 }
72 Err(err) => {
73 debug!(
74 "Failed to convert filter expression to PartitionExpr: {}, skipping",
75 err
76 );
77 continue;
78 }
79 }
80 }
81
82 debug!(
83 "Extracted {} partition expressions from logical plan for partition columns: {:?}",
84 partition_exprs.len(),
85 partition_columns
86 );
87
88 Ok(partition_exprs)
89 }
90
91 fn collect_filter_expressions(plan: &LogicalPlan, expressions: &mut Vec<Expr>) -> DfResult<()> {
101 if let LogicalPlan::Filter(filter) = plan {
102 expressions.push(filter.predicate.clone());
103 }
104
105 if let LogicalPlan::TableScan(table_scan) = plan
111 && let Some(expr) = Self::conjunction(table_scan.filters.iter().cloned())
112 {
113 expressions.push(expr);
114 }
115
116 for child in plan.inputs() {
118 Self::collect_filter_expressions(child, expressions)?;
119 }
120
121 if plan.inputs().len() > 1 {
123 expressions.clear();
124 }
125
126 Ok(())
127 }
128
129 fn conjunction(mut expressions: impl Iterator<Item = Expr>) -> Option<Expr> {
130 let first = expressions.next()?;
131 Some(expressions.fold(first, |acc, expr| acc.and(expr)))
132 }
133}
134
135#[derive(Debug, Clone)]
137enum ExpressionCheckResult {
138 UseAsIs(PartitionExpr),
140 UsePartial(Vec<PartitionExpr>),
142 Drop,
144}
145
146struct ExpressionChecker;
148
149impl ExpressionChecker {
150 fn check_expression_for_pruning(
152 expr: &PartitionExpr,
153 partition_columns: &HashSet<String>,
154 ) -> ExpressionCheckResult {
155 match expr.op() {
156 RestrictedOp::And => {
157 let mut partition_constraints = Vec::new();
160 Self::extract_and_constraints(expr, partition_columns, &mut partition_constraints);
161
162 if partition_constraints.is_empty() {
163 ExpressionCheckResult::Drop
164 } else if Self::expr_only_involves_partition_columns(expr, partition_columns) {
165 ExpressionCheckResult::UseAsIs(expr.clone())
167 } else {
168 ExpressionCheckResult::UsePartial(partition_constraints)
170 }
171 }
172 RestrictedOp::Or => {
173 if Self::expr_only_involves_partition_columns(expr, partition_columns) {
176 ExpressionCheckResult::UseAsIs(expr.clone())
177 } else {
178 ExpressionCheckResult::Drop
180 }
181 }
182 _ => {
183 if Self::expr_only_involves_partition_columns(expr, partition_columns) {
185 ExpressionCheckResult::UseAsIs(expr.clone())
186 } else {
187 ExpressionCheckResult::Drop
188 }
189 }
190 }
191 }
192
193 fn extract_and_constraints(
195 expr: &PartitionExpr,
196 partition_columns: &HashSet<String>,
197 result: &mut Vec<PartitionExpr>,
198 ) {
199 if let RestrictedOp::And = expr.op() {
200 Self::extract_constraints_from_operand(expr.lhs(), partition_columns, result);
202 Self::extract_constraints_from_operand(expr.rhs(), partition_columns, result);
203 } else {
204 if Self::expr_only_involves_partition_columns(expr, partition_columns) {
206 result.push(expr.clone());
207 }
208 }
209 }
210
211 fn extract_constraints_from_operand(
213 operand: &Operand,
214 partition_columns: &HashSet<String>,
215 result: &mut Vec<PartitionExpr>,
216 ) {
217 match operand {
218 Operand::Column(_) | Operand::Value(_) => {
219 }
221 Operand::Expr(expr) => {
222 Self::extract_and_constraints(expr, partition_columns, result);
223 }
224 }
225 }
226
227 fn expr_only_involves_partition_columns(
229 expr: &PartitionExpr,
230 partition_columns: &HashSet<String>,
231 ) -> bool {
232 Self::operand_only_involves_partition_columns(expr.lhs(), partition_columns)
233 && Self::operand_only_involves_partition_columns(expr.rhs(), partition_columns)
234 }
235
236 fn operand_only_involves_partition_columns(
238 operand: &Operand,
239 partition_columns: &HashSet<String>,
240 ) -> bool {
241 match operand {
242 Operand::Column(col) => partition_columns.contains(col),
243 Operand::Value(_) => true, Operand::Expr(expr) => {
245 Self::expr_only_involves_partition_columns(expr, partition_columns)
246 }
247 }
248 }
249}
250
251struct DataFusionExprConverter;
253
254impl DataFusionExprConverter {
255 pub fn convert(expr: &Expr) -> DfResult<PartitionExpr> {
257 match expr {
258 Expr::BinaryExpr(binary_expr) => {
259 let lhs = Self::convert_to_operand(&binary_expr.left)?;
260 let rhs = Self::convert_to_operand(&binary_expr.right)?;
261 let op = Self::convert_operator(&binary_expr.op)?;
262
263 Ok(PartitionExpr::new(lhs, op, rhs))
264 }
265 Expr::InList(inlist_expr) => {
266 let column_operand = Self::convert_to_operand(&inlist_expr.expr)?;
269
270 if inlist_expr.list.is_empty() {
271 return Err(datafusion_common::DataFusionError::Plan(
272 "InList with empty list is not supported".to_string(),
273 ));
274 }
275
276 let op = if inlist_expr.negated {
277 RestrictedOp::NotEq
278 } else {
279 RestrictedOp::Eq
280 };
281
282 let connector_op = if inlist_expr.negated {
283 RestrictedOp::And } else {
285 RestrictedOp::Or };
287
288 let mut expressions = Vec::new();
290 for value_expr in &inlist_expr.list {
291 let value_operand = Self::convert_to_operand(value_expr)?;
292 expressions.push(PartitionExpr::new(
293 column_operand.clone(),
294 op.clone(),
295 value_operand,
296 ));
297 }
298
299 let mut expr_iter = expressions.into_iter();
301 let mut result = expr_iter.next().unwrap();
302 for expr in expr_iter {
303 result = PartitionExpr::new(
304 Operand::Expr(result),
305 connector_op.clone(),
306 Operand::Expr(expr),
307 );
308 }
309
310 Ok(result)
311 }
312 Expr::Between(between_expr) => {
313 let column_operand = Self::convert_to_operand(&between_expr.expr)?;
316 let low_operand = Self::convert_to_operand(&between_expr.low)?;
317 let high_operand = Self::convert_to_operand(&between_expr.high)?;
318
319 if between_expr.negated {
320 let left_expr =
322 PartitionExpr::new(column_operand.clone(), RestrictedOp::Lt, low_operand);
323 let right_expr =
324 PartitionExpr::new(column_operand, RestrictedOp::Gt, high_operand);
325 Ok(PartitionExpr::new(
326 Operand::Expr(left_expr),
327 RestrictedOp::Or,
328 Operand::Expr(right_expr),
329 ))
330 } else {
331 let left_expr =
333 PartitionExpr::new(column_operand.clone(), RestrictedOp::GtEq, low_operand);
334 let right_expr =
335 PartitionExpr::new(column_operand, RestrictedOp::LtEq, high_operand);
336 Ok(PartitionExpr::new(
337 Operand::Expr(left_expr),
338 RestrictedOp::And,
339 Operand::Expr(right_expr),
340 ))
341 }
342 }
343 Expr::IsNull(expr) => {
344 let column_operand = Self::convert_to_operand(expr)?;
346 Ok(PartitionExpr::new(
347 column_operand,
348 RestrictedOp::Eq,
349 Operand::Value(Value::Null),
350 ))
351 }
352 Expr::IsNotNull(expr) => {
353 let column_operand = Self::convert_to_operand(expr)?;
355 Ok(PartitionExpr::new(
356 column_operand,
357 RestrictedOp::NotEq,
358 Operand::Value(Value::Null),
359 ))
360 }
361 Expr::Not(expr) => {
362 match expr.as_ref() {
364 Expr::BinaryExpr(binary_expr) => {
365 let lhs = Self::convert_to_operand(&binary_expr.left)?;
366 let rhs = Self::convert_to_operand(&binary_expr.right)?;
367 let inverted_op = Self::invert_operator(&binary_expr.op)?;
368
369 Ok(PartitionExpr::new(lhs, inverted_op, rhs))
370 }
371 Expr::IsNull(inner_expr) => {
372 let column_operand = Self::convert_to_operand(inner_expr)?;
374 Ok(PartitionExpr::new(
375 column_operand,
376 RestrictedOp::NotEq,
377 Operand::Value(Value::Null),
378 ))
379 }
380 Expr::IsNotNull(inner_expr) => {
381 let column_operand = Self::convert_to_operand(inner_expr)?;
383 Ok(PartitionExpr::new(
384 column_operand,
385 RestrictedOp::Eq,
386 Operand::Value(Value::Null),
387 ))
388 }
389 _ => {
390 debug!(
391 "Unsupported NOT expression for partition pruning: {:?}",
392 expr
393 );
394 Err(datafusion_common::DataFusionError::Plan(format!(
395 "NOT expression with inner type {:?} not supported for partition pruning",
396 expr
397 )))
398 }
399 }
400 }
401 _ => Err(datafusion_common::DataFusionError::Plan(format!(
402 "Unsupported expression type for conversion: {:?}",
403 expr
404 ))),
405 }
406 }
407
408 fn convert_to_operand(expr: &Expr) -> DfResult<Operand> {
410 match expr {
411 Expr::Column(col) => {
412 let column_name = if let Some(relation) = &col.relation {
415 debug!(
416 "Using qualified column reference: {}.{}",
417 relation, col.name
418 );
419 col.name.clone()
420 } else {
421 col.name.clone()
422 };
423 Ok(Operand::Column(column_name))
424 }
425 Expr::Literal(scalar_value, _) => {
426 let value = Value::try_from(scalar_value.clone()).unwrap();
427 Ok(Operand::Value(value))
428 }
429 Expr::Alias(alias_expr) => {
430 Self::convert_to_operand(&alias_expr.expr)
432 }
433 Expr::Cast(cast_expr) => {
434 if Self::is_safe_cast_for_partition_pruning(&cast_expr.data_type) {
437 Self::convert_to_operand(&cast_expr.expr)
438 } else {
439 debug!(
440 "Skipping unsafe cast for partition pruning: {:?}",
441 cast_expr.data_type
442 );
443 Err(datafusion_common::DataFusionError::Plan(format!(
444 "Cast to {:?} not supported for partition pruning",
445 cast_expr.data_type
446 )))
447 }
448 }
449 other => {
450 let partition_expr = Self::convert(other)?;
451 Ok(Operand::Expr(partition_expr))
452 }
453 }
454 }
455
456 fn convert_operator(op: &Operator) -> DfResult<RestrictedOp> {
458 match op {
459 Operator::Eq => Ok(RestrictedOp::Eq),
460 Operator::NotEq => Ok(RestrictedOp::NotEq),
461 Operator::Lt => Ok(RestrictedOp::Lt),
462 Operator::LtEq => Ok(RestrictedOp::LtEq),
463 Operator::Gt => Ok(RestrictedOp::Gt),
464 Operator::GtEq => Ok(RestrictedOp::GtEq),
465 Operator::And => Ok(RestrictedOp::And),
466 Operator::Or => Ok(RestrictedOp::Or),
467 _ => Err(datafusion_common::DataFusionError::Plan(format!(
468 "Unsupported operator: {:?}",
469 op
470 ))),
471 }
472 }
473
474 fn invert_operator(op: &Operator) -> DfResult<RestrictedOp> {
476 let Some(negated) = op.negate() else {
477 return Err(datafusion_common::DataFusionError::Plan(format!(
478 "Cannot invert operator: {:?}",
479 op
480 )));
481 };
482 Self::convert_operator(&negated)
483 }
484
485 fn is_safe_cast_for_partition_pruning(data_type: &DataType) -> bool {
488 match data_type {
489 DataType::Int8 => true,
491 DataType::Int16 => true,
492 DataType::Int32 => true,
493 DataType::Int64 => true,
494 DataType::UInt8 => true,
495 DataType::UInt16 => true,
496 DataType::UInt32 => true,
497 DataType::UInt64 => true,
498
499 DataType::Float32 => true,
501 DataType::Float64 => true,
502
503 DataType::Utf8 => true,
505 DataType::LargeUtf8 => true,
506
507 DataType::Date32 => true,
509 DataType::Date64 => true,
510 DataType::Timestamp(_, _) => true,
511
512 DataType::Boolean => true,
514
515 _ => false,
517 }
518 }
519}
520
521#[cfg(test)]
522mod tests {
523 use std::sync::Arc;
524
525 use datafusion::arrow::datatypes::{DataType, Field, Schema};
526 use datafusion::common::Column;
527 use datafusion::datasource::DefaultTableSource;
528 use datafusion_expr::{LogicalPlanBuilder, col, lit};
529 use datatypes::value::Value;
530 use partition::expr::{Operand, PartitionExpr, RestrictedOp};
531
532 use super::*;
533
534 fn create_test_table_scan() -> LogicalPlan {
535 let schema = Arc::new(Schema::new(vec![
536 Field::new(
537 "timestamp",
538 DataType::Timestamp(datafusion::arrow::datatypes::TimeUnit::Millisecond, None),
539 false,
540 ),
541 Field::new("user_id", DataType::Int64, false),
542 Field::new("value", DataType::Int64, false),
543 ]));
544
545 let empty_table = datafusion::datasource::empty::EmptyTable::new(schema);
546 let table_source = Arc::new(DefaultTableSource::new(Arc::new(empty_table)));
547
548 LogicalPlanBuilder::scan("test", table_source, None)
549 .unwrap()
550 .build()
551 .unwrap()
552 }
553
554 struct FilterTestCase {
555 name: &'static str,
556 filter_expr: Expr,
557 expected_partition_exprs: Vec<PartitionExpr>,
558 partition_columns: Vec<&'static str>,
559 }
560
561 impl FilterTestCase {
562 fn new(
563 name: &'static str,
564 filter_expr: Expr,
565 expected_partition_exprs: Vec<PartitionExpr>,
566 partition_columns: Vec<&'static str>,
567 ) -> Self {
568 Self {
569 name,
570 filter_expr,
571 expected_partition_exprs,
572 partition_columns,
573 }
574 }
575 }
576
577 fn check_partition_expressions(cases: Vec<FilterTestCase>) {
579 for case in cases {
580 let table_scan = create_test_table_scan();
581 let filter = case.filter_expr.clone();
582
583 let plan = LogicalPlanBuilder::from(table_scan)
584 .filter(filter)
585 .unwrap()
586 .build()
587 .unwrap();
588
589 let partition_columns: Vec<String> = case
590 .partition_columns
591 .iter()
592 .map(|s| s.to_string())
593 .collect();
594 let partition_exprs =
595 PredicateExtractor::extract_partition_expressions(&plan, &partition_columns)
596 .unwrap();
597 let expected = case.expected_partition_exprs.clone();
598 assert_eq!(
599 partition_exprs, expected,
600 "Test case '{}': expected partition expressions {:?}, got {:?}",
601 case.name, expected, partition_exprs
602 );
603 }
604 }
605
606 #[test]
607 fn test_extracts_table_scan_filters() {
608 let table_scan = create_test_table_scan();
609 let filter = col("user_id").gt_eq(lit(100i64));
610 let LogicalPlan::TableScan(scan) = table_scan else {
611 panic!("expected test table scan");
612 };
613 let plan = LogicalPlan::TableScan(datafusion_expr::logical_plan::TableScan {
614 filters: vec![filter],
615 ..scan
616 });
617
618 let partition_exprs =
619 PredicateExtractor::extract_partition_expressions(&plan, &["user_id".to_string()])
620 .unwrap();
621
622 assert_eq!(
623 partition_exprs,
624 vec![PartitionExpr::new(
625 Operand::Column("user_id".to_string()),
626 RestrictedOp::GtEq,
627 Operand::Value(Value::Int64(100)),
628 )]
629 );
630 }
631
632 #[test]
633 fn test_combines_table_scan_filters_as_conjunction() {
634 let table_scan = create_test_table_scan();
635 let filter_a = col("user_id").eq(lit(10i64));
636 let filter_b = col("value").eq(lit(20i64));
637 let LogicalPlan::TableScan(scan) = table_scan else {
638 panic!("expected test table scan");
639 };
640 let plan = LogicalPlan::TableScan(datafusion_expr::logical_plan::TableScan {
641 filters: vec![filter_a, filter_b],
642 ..scan
643 });
644
645 let partition_exprs = PredicateExtractor::extract_partition_expressions(
646 &plan,
647 &["user_id".to_string(), "value".to_string()],
648 )
649 .unwrap();
650
651 assert_eq!(
652 partition_exprs,
653 vec![PartitionExpr::new(
654 Operand::Expr(PartitionExpr::new(
655 Operand::Column("user_id".to_string()),
656 RestrictedOp::Eq,
657 Operand::Value(Value::Int64(10)),
658 )),
659 RestrictedOp::And,
660 Operand::Expr(PartitionExpr::new(
661 Operand::Column("value".to_string()),
662 RestrictedOp::Eq,
663 Operand::Value(Value::Int64(20)),
664 )),
665 )]
666 );
667 }
668
669 #[test]
670 fn test_basic_constraints_extraction() {
671 let cases = vec![
672 FilterTestCase::new(
673 "non_partition_column_ignored",
674 col("value").gt_eq(lit(100i64)),
675 vec![],
676 vec!["user_id"],
677 ),
678 FilterTestCase::new(
679 "simple_constraint",
680 col("user_id").gt_eq(lit(100i64)),
681 vec![PartitionExpr::new(
682 Operand::Column("user_id".to_string()),
683 RestrictedOp::GtEq,
684 Operand::Value(Value::Int64(100)),
685 )],
686 vec!["user_id"],
687 ),
688 FilterTestCase::new(
689 "or_expression",
690 col("user_id")
691 .eq(lit(100i64))
692 .or(col("user_id").eq(lit(200i64))),
693 vec![PartitionExpr::new(
694 Operand::Expr(PartitionExpr::new(
695 Operand::Column("user_id".to_string()),
696 RestrictedOp::Eq,
697 Operand::Value(Value::Int64(100)),
698 )),
699 RestrictedOp::Or,
700 Operand::Expr(PartitionExpr::new(
701 Operand::Column("user_id".to_string()),
702 RestrictedOp::Eq,
703 Operand::Value(Value::Int64(200)),
704 )),
705 )],
706 vec!["user_id"],
707 ),
708 FilterTestCase::new(
709 "complex_and_or",
710 col("user_id")
711 .gt_eq(lit(100i64))
712 .and(col("user_id").lt(lit(200i64)))
713 .or(col("user_id")
714 .gt_eq(lit(300i64))
715 .and(col("user_id").lt(lit(400i64)))),
716 vec![PartitionExpr::new(
717 Operand::Expr(PartitionExpr::new(
718 Operand::Expr(PartitionExpr::new(
719 Operand::Column("user_id".to_string()),
720 RestrictedOp::GtEq,
721 Operand::Value(Value::Int64(100)),
722 )),
723 RestrictedOp::And,
724 Operand::Expr(PartitionExpr::new(
725 Operand::Column("user_id".to_string()),
726 RestrictedOp::Lt,
727 Operand::Value(Value::Int64(200)),
728 )),
729 )),
730 RestrictedOp::Or,
731 Operand::Expr(PartitionExpr::new(
732 Operand::Expr(PartitionExpr::new(
733 Operand::Column("user_id".to_string()),
734 RestrictedOp::GtEq,
735 Operand::Value(Value::Int64(300)),
736 )),
737 RestrictedOp::And,
738 Operand::Expr(PartitionExpr::new(
739 Operand::Column("user_id".to_string()),
740 RestrictedOp::Lt,
741 Operand::Value(Value::Int64(400)),
742 )),
743 )),
744 )],
745 vec!["user_id"],
746 ),
747 ];
748 check_partition_expressions(cases);
749 }
750
751 #[test]
752 fn test_alias_expressions() {
753 let cases = vec![
754 FilterTestCase::new(
755 "simple_alias",
756 col("user_id").alias("uid").eq(lit(100i64)),
757 vec![PartitionExpr::new(
758 Operand::Column("user_id".to_string()),
759 RestrictedOp::Eq,
760 Operand::Value(Value::Int64(100)),
761 )],
762 vec!["user_id"],
763 ),
764 FilterTestCase::new(
765 "nested_alias",
766 col("user_id").alias("uid").alias("u").gt_eq(lit(50i64)),
767 vec![PartitionExpr::new(
768 Operand::Column("user_id".to_string()),
769 RestrictedOp::GtEq,
770 Operand::Value(Value::Int64(50)),
771 )],
772 vec!["user_id"],
773 ),
774 FilterTestCase::new(
775 "complex_alias_with_and_or",
776 col("user_id")
777 .alias("uid")
778 .gt_eq(lit(100i64))
779 .and(col("user_id").alias("u").lt(lit(200i64)))
780 .or(col("user_id").alias("id").eq(lit(300i64))),
781 vec![PartitionExpr::new(
782 Operand::Expr(PartitionExpr::new(
783 Operand::Expr(PartitionExpr::new(
784 Operand::Column("user_id".to_string()),
785 RestrictedOp::GtEq,
786 Operand::Value(Value::Int64(100)),
787 )),
788 RestrictedOp::And,
789 Operand::Expr(PartitionExpr::new(
790 Operand::Column("user_id".to_string()),
791 RestrictedOp::Lt,
792 Operand::Value(Value::Int64(200)),
793 )),
794 )),
795 RestrictedOp::Or,
796 Operand::Expr(PartitionExpr::new(
797 Operand::Column("user_id".to_string()),
798 RestrictedOp::Eq,
799 Operand::Value(Value::Int64(300)),
800 )),
801 )],
802 vec!["user_id"],
803 ),
804 ];
805 check_partition_expressions(cases);
806 }
807
808 #[test]
809 fn test_inlist_expressions() {
810 let cases = vec![
811 FilterTestCase::new(
812 "simple_inlist",
813 col("user_id").in_list(vec![lit(100i64), lit(200i64), lit(300i64)], false),
814 vec![PartitionExpr::new(
815 Operand::Expr(PartitionExpr::new(
816 Operand::Expr(PartitionExpr::new(
817 Operand::Column("user_id".to_string()),
818 RestrictedOp::Eq,
819 Operand::Value(Value::Int64(100)),
820 )),
821 RestrictedOp::Or,
822 Operand::Expr(PartitionExpr::new(
823 Operand::Column("user_id".to_string()),
824 RestrictedOp::Eq,
825 Operand::Value(Value::Int64(200)),
826 )),
827 )),
828 RestrictedOp::Or,
829 Operand::Expr(PartitionExpr::new(
830 Operand::Column("user_id".to_string()),
831 RestrictedOp::Eq,
832 Operand::Value(Value::Int64(300)),
833 )),
834 )],
835 vec!["user_id"],
836 ),
837 FilterTestCase::new(
838 "negated_inlist",
839 col("user_id").in_list(vec![lit(100i64), lit(200i64)], true),
840 vec![PartitionExpr::new(
841 Operand::Expr(PartitionExpr::new(
842 Operand::Column("user_id".to_string()),
843 RestrictedOp::NotEq,
844 Operand::Value(Value::Int64(100)),
845 )),
846 RestrictedOp::And,
847 Operand::Expr(PartitionExpr::new(
848 Operand::Column("user_id".to_string()),
849 RestrictedOp::NotEq,
850 Operand::Value(Value::Int64(200)),
851 )),
852 )],
853 vec!["user_id"],
854 ),
855 FilterTestCase::new(
856 "inlist_with_alias",
857 col("user_id")
858 .alias("uid")
859 .in_list(vec![lit(100i64), lit(200i64)], false),
860 vec![PartitionExpr::new(
861 Operand::Expr(PartitionExpr::new(
862 Operand::Column("user_id".to_string()),
863 RestrictedOp::Eq,
864 Operand::Value(Value::Int64(100)),
865 )),
866 RestrictedOp::Or,
867 Operand::Expr(PartitionExpr::new(
868 Operand::Column("user_id".to_string()),
869 RestrictedOp::Eq,
870 Operand::Value(Value::Int64(200)),
871 )),
872 )],
873 vec!["user_id"],
874 ),
875 ];
876 check_partition_expressions(cases);
877 }
878
879 #[test]
880 fn test_between_expressions() {
881 let cases = vec![
882 FilterTestCase::new(
883 "simple_between",
884 col("user_id").between(lit(100i64), lit(200i64)),
885 vec![PartitionExpr::new(
886 Operand::Expr(PartitionExpr::new(
887 Operand::Column("user_id".to_string()),
888 RestrictedOp::GtEq,
889 Operand::Value(Value::Int64(100)),
890 )),
891 RestrictedOp::And,
892 Operand::Expr(PartitionExpr::new(
893 Operand::Column("user_id".to_string()),
894 RestrictedOp::LtEq,
895 Operand::Value(Value::Int64(200)),
896 )),
897 )],
898 vec!["user_id"],
899 ),
900 FilterTestCase::new(
901 "negated_between",
902 Expr::Between(datafusion_expr::Between {
903 expr: Box::new(col("user_id")),
904 negated: true,
905 low: Box::new(lit(100i64)),
906 high: Box::new(lit(200i64)),
907 }),
908 vec![PartitionExpr::new(
909 Operand::Expr(PartitionExpr::new(
910 Operand::Column("user_id".to_string()),
911 RestrictedOp::Lt,
912 Operand::Value(Value::Int64(100)),
913 )),
914 RestrictedOp::Or,
915 Operand::Expr(PartitionExpr::new(
916 Operand::Column("user_id".to_string()),
917 RestrictedOp::Gt,
918 Operand::Value(Value::Int64(200)),
919 )),
920 )],
921 vec!["user_id"],
922 ),
923 FilterTestCase::new(
924 "between_with_alias",
925 col("user_id")
926 .alias("uid")
927 .between(lit(100i64), lit(200i64)),
928 vec![PartitionExpr::new(
929 Operand::Expr(PartitionExpr::new(
930 Operand::Column("user_id".to_string()),
931 RestrictedOp::GtEq,
932 Operand::Value(Value::Int64(100)),
933 )),
934 RestrictedOp::And,
935 Operand::Expr(PartitionExpr::new(
936 Operand::Column("user_id".to_string()),
937 RestrictedOp::LtEq,
938 Operand::Value(Value::Int64(200)),
939 )),
940 )],
941 vec!["user_id"],
942 ),
943 ];
944 check_partition_expressions(cases);
945 }
946
947 #[test]
948 fn test_null_expressions() {
949 let cases = vec![
950 FilterTestCase::new(
951 "is_null",
952 col("user_id").is_null(),
953 vec![PartitionExpr::new(
954 Operand::Column("user_id".to_string()),
955 RestrictedOp::Eq,
956 Operand::Value(Value::Null),
957 )],
958 vec!["user_id"],
959 ),
960 FilterTestCase::new(
961 "is_not_null",
962 col("user_id").is_not_null(),
963 vec![PartitionExpr::new(
964 Operand::Column("user_id".to_string()),
965 RestrictedOp::NotEq,
966 Operand::Value(Value::Null),
967 )],
968 vec!["user_id"],
969 ),
970 FilterTestCase::new(
971 "null_with_alias",
972 col("user_id").alias("uid").is_null(),
973 vec![PartitionExpr::new(
974 Operand::Column("user_id".to_string()),
975 RestrictedOp::Eq,
976 Operand::Value(Value::Null),
977 )],
978 vec!["user_id"],
979 ),
980 ];
981 check_partition_expressions(cases);
982 }
983
984 #[test]
985 fn test_cast_expressions() {
986 let cases = vec![
987 FilterTestCase::new(
988 "safe_cast",
989 Expr::Cast(datafusion_expr::Cast {
990 expr: Box::new(col("user_id")),
991 data_type: DataType::Int64,
992 })
993 .eq(lit(100i64)),
994 vec![PartitionExpr::new(
995 Operand::Column("user_id".to_string()),
996 RestrictedOp::Eq,
997 Operand::Value(Value::Int64(100)),
998 )],
999 vec!["user_id"],
1000 ),
1001 FilterTestCase::new(
1002 "cast_with_alias",
1003 Expr::Cast(datafusion_expr::Cast {
1004 expr: Box::new(col("user_id").alias("uid")),
1005 data_type: DataType::Int64,
1006 })
1007 .eq(lit(100i64)),
1008 vec![PartitionExpr::new(
1009 Operand::Column("user_id".to_string()),
1010 RestrictedOp::Eq,
1011 Operand::Value(Value::Int64(100)),
1012 )],
1013 vec!["user_id"],
1014 ),
1015 FilterTestCase::new(
1016 "unsafe_cast",
1017 Expr::Cast(datafusion_expr::Cast {
1018 expr: Box::new(col("user_id")),
1019 data_type: DataType::List(std::sync::Arc::new(
1020 datafusion::arrow::datatypes::Field::new("item", DataType::Int32, true),
1021 )),
1022 })
1023 .eq(lit(100i64)),
1024 vec![],
1025 vec!["user_id"],
1026 ),
1027 ];
1028 check_partition_expressions(cases);
1029 }
1030
1031 #[test]
1032 fn test_not_expressions() {
1033 let cases = vec![
1034 FilterTestCase::new(
1035 "not_equality",
1036 Expr::Not(Box::new(col("user_id").eq(lit(100i64)))),
1037 vec![PartitionExpr::new(
1038 Operand::Column("user_id".to_string()),
1039 RestrictedOp::NotEq,
1040 Operand::Value(Value::Int64(100)),
1041 )],
1042 vec!["user_id"],
1043 ),
1044 FilterTestCase::new(
1045 "not_comparison",
1046 Expr::Not(Box::new(col("user_id").lt(lit(100i64)))),
1047 vec![PartitionExpr::new(
1048 Operand::Column("user_id".to_string()),
1049 RestrictedOp::GtEq,
1050 Operand::Value(Value::Int64(100)),
1051 )],
1052 vec!["user_id"],
1053 ),
1054 FilterTestCase::new(
1055 "not_is_null",
1056 Expr::Not(Box::new(col("user_id").is_null())),
1057 vec![PartitionExpr::new(
1058 Operand::Column("user_id".to_string()),
1059 RestrictedOp::NotEq,
1060 Operand::Value(Value::Null),
1061 )],
1062 vec!["user_id"],
1063 ),
1064 FilterTestCase::new(
1065 "not_with_alias",
1066 Expr::Not(Box::new(col("user_id").alias("uid").eq(lit(100i64)))),
1067 vec![PartitionExpr::new(
1068 Operand::Column("user_id".to_string()),
1069 RestrictedOp::NotEq,
1070 Operand::Value(Value::Int64(100)),
1071 )],
1072 vec!["user_id"],
1073 ),
1074 ];
1075 check_partition_expressions(cases);
1076 }
1077
1078 #[test]
1079 fn test_edge_cases() {
1080 let cases = vec![
1081 FilterTestCase::new(
1082 "qualified_column_name",
1083 {
1084 let qualified_col = Expr::Column(Column::new(Some("test"), "user_id"));
1085 qualified_col.eq(lit(100i64))
1086 },
1087 vec![PartitionExpr::new(
1088 Operand::Column("user_id".to_string()),
1089 RestrictedOp::Eq,
1090 Operand::Value(Value::Int64(100)),
1091 )],
1092 vec!["user_id"],
1093 ),
1094 FilterTestCase::new(
1095 "comprehensive_combinations",
1096 {
1097 let in_expr = col("user_id")
1098 .alias("uid")
1099 .in_list(vec![lit(100i64), lit(200i64)], false);
1100 let cast_expr = Expr::Cast(datafusion_expr::Cast {
1101 expr: Box::new(col("user_id")),
1102 data_type: DataType::Int64,
1103 });
1104 let between_expr = cast_expr.between(lit(300i64), lit(400i64));
1105 in_expr.or(between_expr)
1106 },
1107 vec![PartitionExpr::new(
1108 Operand::Expr(PartitionExpr::new(
1109 Operand::Expr(PartitionExpr::new(
1110 Operand::Column("user_id".to_string()),
1111 RestrictedOp::Eq,
1112 Operand::Value(Value::Int64(100)),
1113 )),
1114 RestrictedOp::Or,
1115 Operand::Expr(PartitionExpr::new(
1116 Operand::Column("user_id".to_string()),
1117 RestrictedOp::Eq,
1118 Operand::Value(Value::Int64(200)),
1119 )),
1120 )),
1121 RestrictedOp::Or,
1122 Operand::Expr(PartitionExpr::new(
1123 Operand::Expr(PartitionExpr::new(
1124 Operand::Column("user_id".to_string()),
1125 RestrictedOp::GtEq,
1126 Operand::Value(Value::Int64(300)),
1127 )),
1128 RestrictedOp::And,
1129 Operand::Expr(PartitionExpr::new(
1130 Operand::Column("user_id".to_string()),
1131 RestrictedOp::LtEq,
1132 Operand::Value(Value::Int64(400)),
1133 )),
1134 )),
1135 )],
1136 vec!["user_id"],
1137 ),
1138 ];
1139 check_partition_expressions(cases);
1140 }
1141
1142 #[test]
1143 fn test_mixed_partition_non_partition_expressions() {
1144 let cases = vec![
1145 FilterTestCase::new(
1147 "mixed_and_expression",
1148 col("user_id")
1149 .eq(lit(100i64))
1150 .and(col("value").gt(lit(50i64))),
1151 vec![PartitionExpr::new(
1152 Operand::Column("user_id".to_string()),
1153 RestrictedOp::Eq,
1154 Operand::Value(Value::Int64(100)),
1155 )],
1156 vec!["user_id"],
1157 ),
1158 FilterTestCase::new(
1160 "mixed_or_expression",
1161 col("user_id")
1162 .between(lit(1i64), lit(10i64))
1163 .or(col("value").gt(lit(50i64))),
1164 vec![], vec!["user_id"],
1166 ),
1167 FilterTestCase::new(
1169 "complex_mixed_and",
1170 col("user_id")
1171 .gt_eq(lit(100i64))
1172 .and(col("value").eq(lit(200i64)))
1173 .and(col("timestamp").lt(lit(1000i64))),
1174 vec![
1175 PartitionExpr::new(
1176 Operand::Column("user_id".to_string()),
1177 RestrictedOp::GtEq,
1178 Operand::Value(Value::Int64(100)),
1179 ),
1180 PartitionExpr::new(
1181 Operand::Column("timestamp".to_string()),
1182 RestrictedOp::Lt,
1183 Operand::Value(Value::Int64(1000)),
1184 ),
1185 ],
1186 vec!["user_id", "timestamp"], ),
1188 FilterTestCase::new(
1190 "pure_partition_and",
1191 col("user_id")
1192 .gt_eq(lit(100i64))
1193 .and(col("timestamp").lt(lit(1000i64))),
1194 vec![PartitionExpr::new(
1195 Operand::Expr(PartitionExpr::new(
1196 Operand::Column("user_id".to_string()),
1197 RestrictedOp::GtEq,
1198 Operand::Value(Value::Int64(100)),
1199 )),
1200 RestrictedOp::And,
1201 Operand::Expr(PartitionExpr::new(
1202 Operand::Column("timestamp".to_string()),
1203 RestrictedOp::Lt,
1204 Operand::Value(Value::Int64(1000)),
1205 )),
1206 )],
1207 vec!["user_id", "timestamp"],
1208 ),
1209 FilterTestCase::new(
1211 "pure_partition_or",
1212 col("user_id")
1213 .eq(lit(100i64))
1214 .or(col("user_id").eq(lit(200i64))),
1215 vec![PartitionExpr::new(
1216 Operand::Expr(PartitionExpr::new(
1217 Operand::Column("user_id".to_string()),
1218 RestrictedOp::Eq,
1219 Operand::Value(Value::Int64(100)),
1220 )),
1221 RestrictedOp::Or,
1222 Operand::Expr(PartitionExpr::new(
1223 Operand::Column("user_id".to_string()),
1224 RestrictedOp::Eq,
1225 Operand::Value(Value::Int64(200)),
1226 )),
1227 )],
1228 vec!["user_id"],
1229 ),
1230 FilterTestCase::new(
1232 "pure_non_partition",
1233 col("value").gt_eq(lit(100i64)),
1234 vec![], vec!["user_id"],
1236 ),
1237 FilterTestCase::new(
1239 "nested_mixed_expression",
1240 (col("user_id")
1241 .eq(lit(100i64))
1242 .and(col("value").gt(lit(50i64))))
1243 .or(col("user_id").eq(lit(200i64))),
1244 vec![], vec!["user_id"],
1246 ),
1247 FilterTestCase::new(
1249 "and_with_nested_mixed_or",
1250 col("user_id")
1251 .gt_eq(lit(100i64))
1252 .and(col("value").eq(lit(1i64)).or(col("value").eq(lit(2i64)))),
1253 vec![PartitionExpr::new(
1254 Operand::Column("user_id".to_string()),
1255 RestrictedOp::GtEq,
1256 Operand::Value(Value::Int64(100)),
1257 )],
1258 vec!["user_id"],
1259 ),
1260 ];
1261 check_partition_expressions(cases);
1262 }
1263}