From 1d8d7593b5cbe2b106dd94c9c19a389e7a019cf0 Mon Sep 17 00:00:00 2001 From: WenyXu Date: Thu, 26 Mar 2026 07:45:44 +0000 Subject: [PATCH] refactor(partition): restrict expr split to range-only shapes Signed-off-by: WenyXu --- src/partition/src/utils/split.rs | 110 +++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 28 deletions(-) diff --git a/src/partition/src/utils/split.rs b/src/partition/src/utils/split.rs index 31364a4018..8559ff3800 100644 --- a/src/partition/src/utils/split.rs +++ b/src/partition/src/utils/split.rs @@ -46,6 +46,11 @@ pub enum ExprSplitDegradeReason { /// - `left = R AND S` /// - `right = R AND NOT(S)` /// +/// Supported shape: +/// - `split_expr` must be a single atomic range predicate (`<`, `<=`, `>`, `>=`). +/// - `base_expr` must be a pure `AND` tree of atomic range predicates, possibly +/// across unrelated columns. +/// /// Returns [`ExprSplitDegradeReason`] when this cannot safely process the shape/type. pub fn split_partition_expr( base_expr: PartitionExpr, @@ -58,6 +63,10 @@ pub fn split_partition_expr( return Err(ExprSplitDegradeReason::UnsupportedType); } + if !validate_base_expr_shape(&base) || !validate_split_expr_shape(&split) { + return Err(ExprSplitDegradeReason::UnsupportedType); + } + let not_split = match negate_split_expr(&split) { Ok(expr) => expr, Err(_) => { @@ -343,6 +352,43 @@ fn validate_atomic(expr: &PartitionExpr) -> Result<()> { } } +/// Validates that `base_expr` stays within the range-only split contract. +/// +/// Scope and intent: +/// - The split utility only handles interval-style partition predicates. +/// - `base_expr` may mention multiple columns, but it must remain a pure `AND` +/// tree of atomic range predicates. +fn validate_base_expr_shape(expr: &PartitionExpr) -> bool { + let mut atoms = Vec::new(); + if !collect_and_atoms(expr, &mut atoms) { + return false; + } + + atoms + .into_iter() + .all(|atom| is_atomic_range_expr(&atom.canonicalize())) +} + +/// Validates that `split_expr` is a single atomic range predicate. +/// +/// This restriction keeps `NOT(split_expr)` in the same range-only subset so the +/// resulting left/right branches stay within the supported contract. +fn validate_split_expr_shape(expr: &PartitionExpr) -> bool { + is_atomic_range_expr(expr) +} + +/// Returns whether `expr` is an atomic `column op value` range predicate. +/// +/// Supported operators are limited to `<`, `<=`, `>`, and `>=`. +fn is_atomic_range_expr(expr: &PartitionExpr) -> bool { + atom_col_op_val(expr).is_some_and(|(_, op, _)| { + matches!( + op, + RestrictedOp::Lt | RestrictedOp::LtEq | RestrictedOp::Gt | RestrictedOp::GtEq + ) + }) +} + fn is_supported_value(v: &Value) -> bool { matches!( v, @@ -762,6 +808,23 @@ mod tests { assert_eq!(right.to_string(), "a >= 5 AND a < 10"); } + #[test] + fn test_split_base_expr_allows_unrelated_range_columns() { + // R: a > 20 AND b < 20 + let base = col("a") + .gt(Value::Int64(20)) + .and(col("b").lt(Value::Int64(20))); + // S: a < 30 + let split = col("a").lt(Value::Int64(30)); + + let (left, right) = split_partition_expr(base, split).unwrap(); + + // left keeps the unrelated `b < 20` bound while splitting column `a`. + assert_eq!(left.to_string(), "a > 20 AND a < 30 AND b < 20"); + // right also preserves the unrelated column bound. + assert_eq!(right.to_string(), "a >= 30 AND b < 20"); + } + #[test] fn test_split_degrade_on_unsupported_type() { // intentionally excludes boolean from split-able value types. @@ -810,15 +873,14 @@ mod tests { } #[test] - fn test_split_degrade_on_eq_neq_conflict() { - // R: a = 5 + fn test_split_rejects_eq_in_base_expr() { + // R: a = 5 falls outside the range-only base_expr contract. let base = col("a").eq(Value::Int64(5)); - // S: a = 5 - let split = col("a").eq(Value::Int64(5)); + // S: a < 6 remains a valid range split. + let split = col("a").lt(Value::Int64(6)); - // right = (a = 5) AND (a <> 5) is unsatisfiable, should degrade. let result = split_partition_expr(base, split); - assert_eq!(result.unwrap_err(), ExprSplitDegradeReason::EmptyBranch); + assert_eq!(result.unwrap_err(), ExprSplitDegradeReason::UnsupportedType); } #[test] @@ -854,31 +916,29 @@ mod tests { } #[test] - fn test_split_degrade_on_singleton_bounds_excluded_by_negated_eq() { + fn test_split_rejects_not_eq_in_split_expr() { // R: a >= 5 AND a <= 5 let base = col("a") .gt_eq(Value::Int64(5)) .and(col("a").lt_eq(Value::Int64(5))); - // S: a <> 5 + // S: a <> 5 falls outside the range-only split_expr contract. let split = col("a").not_eq(Value::Int64(5)); - // left = (a >= 5 AND a <= 5) AND (a <> 5) is unsatisfiable, should degrade. let result = split_partition_expr(base, split); - assert_eq!(result.unwrap_err(), ExprSplitDegradeReason::EmptyBranch); + assert_eq!(result.unwrap_err(), ExprSplitDegradeReason::UnsupportedType); } #[test] - fn test_split_degrade_on_singleton_bounds_excluded_by_eq() { + fn test_split_rejects_eq_in_split_expr() { // R: a >= 5 AND a <= 5 let base = col("a") .gt_eq(Value::Int64(5)) .and(col("a").lt_eq(Value::Int64(5))); - // S: a = 5 + // S: a = 5 falls outside the range-only split_expr contract. let split = col("a").eq(Value::Int64(5)); - // right = (a >= 5 AND a <= 5) AND (a <> 5) is unsatisfiable, should degrade. let result = split_partition_expr(base, split); - assert_eq!(result.unwrap_err(), ExprSplitDegradeReason::EmptyBranch); + assert_eq!(result.unwrap_err(), ExprSplitDegradeReason::UnsupportedType); } #[test] @@ -1154,8 +1214,8 @@ mod tests { } #[test] - fn test_split_nested_base_or_and() { - // base: (a < 10) OR (a >= 20 AND a < 30) + fn test_split_rejects_or_in_base_expr() { + // R: (a < 10) OR (a >= 20 AND a < 30) falls outside the AND-only base_expr contract. let base = PartitionExpr::new( Operand::Expr(col("a").lt(Value::Int64(10))), RestrictedOp::Or, @@ -1165,21 +1225,18 @@ mod tests { .and(col("a").lt(Value::Int64(30))), ), ); - // split: a < 5 + // S: a < 5 let split = col("a").lt(Value::Int64(5)); let result = split_partition_expr(base, split); - assert_eq!( - result.unwrap_err(), - ExprSplitDegradeReason::ColliderRejected - ); + assert_eq!(result.unwrap_err(), ExprSplitDegradeReason::UnsupportedType); } #[test] - fn test_split_nested_split_expr_or_and() { - // base: a < 10 + fn test_split_rejects_or_in_split_expr() { + // R: a < 10 let base = col("a").lt(Value::Int64(10)); - // split: (a < 5) OR (a >= 8 AND a < 9) + // S: (a < 5) OR (a >= 8 AND a < 9) falls outside the atomic split_expr contract. let split = PartitionExpr::new( Operand::Expr(col("a").lt(Value::Int64(5))), RestrictedOp::Or, @@ -1191,9 +1248,6 @@ mod tests { ); let result = split_partition_expr(base, split); - assert_eq!( - result.unwrap_err(), - ExprSplitDegradeReason::ColliderRejected - ); + assert_eq!(result.unwrap_err(), ExprSplitDegradeReason::UnsupportedType); } }