From e45363cb8619306bbad46d6389e9a0236bb69609 Mon Sep 17 00:00:00 2001 From: WenyXu Date: Thu, 26 Mar 2026 04:09:58 +0000 Subject: [PATCH] refactor(partition): reuse conjunction bound collection in expr split Signed-off-by: WenyXu --- src/partition/src/utils/split.rs | 331 ++++++++++++++++--------------- 1 file changed, 169 insertions(+), 162 deletions(-) diff --git a/src/partition/src/utils/split.rs b/src/partition/src/utils/split.rs index ba5a538978..0c66d4badc 100644 --- a/src/partition/src/utils/split.rs +++ b/src/partition/src/utils/split.rs @@ -107,111 +107,22 @@ pub fn split_partition_expr( /// - This is still a conservative fast path focused on conjunction emptiness /// detection for split degradation. fn is_empty_and_conjunction(expr: &PartitionExpr) -> bool { - let mut atoms = Vec::new(); - // Only handle conjunction trees. If expression contains OR, skip here. - if !collect_and_atoms(expr, &mut atoms) { + let Some(collected) = collect_conjunction_bounds(expr) else { return false; + }; + + if collected.has_conflict { + return true; } - let mut lowers: BTreeMap = BTreeMap::new(); - let mut uppers: BTreeMap = BTreeMap::new(); - let mut equals: BTreeMap = BTreeMap::new(); - let mut not_equals: BTreeMap> = BTreeMap::new(); - - for atom in atoms { - let atom = atom.canonicalize(); - let Some((col, op, val)) = atom_col_op_val(&atom) else { - continue; - }; - - match op { - // Keep the tightest upper bound for this column. - RestrictedOp::Lt | RestrictedOp::LtEq => { - let candidate = UpperBound { - value: val, - inclusive: matches!(op, RestrictedOp::LtEq), - }; - match uppers.get_mut(&col) { - Some(current) => { - if prefer_upper(&candidate, current) { - *current = candidate; - } - } - None => { - uppers.insert(col, candidate); - } - } - } - // Keep the tightest lower bound for this column. - RestrictedOp::Gt | RestrictedOp::GtEq => { - let candidate = LowerBound { - value: val, - inclusive: matches!(op, RestrictedOp::GtEq), - }; - match lowers.get_mut(&col) { - Some(current) => { - if prefer_lower(&candidate, current) { - *current = candidate; - } - } - None => { - lowers.insert(col, candidate); - } - } - } - // Equality means both lower and upper are fixed at the same point. - RestrictedOp::Eq => { - if let Some(existing) = equals.get(&col) - && existing != &val - { - return true; - } - if not_equals - .get(&col) - .is_some_and(|excluded| excluded.contains(&val)) - { - return true; - } - equals.insert(col.clone(), val.clone()); - - let lower = LowerBound { - value: val.clone(), - inclusive: true, - }; - let upper = UpperBound { - value: val, - inclusive: true, - }; - match lowers.get_mut(&col) { - Some(current) => { - if prefer_lower(&lower, current) { - *current = lower; - } - } - None => { - lowers.insert(col.clone(), lower); - } - } - match uppers.get_mut(&col) { - Some(current) => { - if prefer_upper(&upper, current) { - *current = upper; - } - } - None => { - uppers.insert(col, upper); - } - } - } - RestrictedOp::NotEq => { - if equals.get(&col).is_some_and(|eq| eq == &val) { - return true; - } - not_equals.entry(col).or_default().insert(val); - } - RestrictedOp::And | RestrictedOp::Or => {} - } - } + let CollectedConjunction { + lowers, + uppers, + equals: _, + not_equals, + passthrough: _, + has_conflict: _, + } = collected; if uppers .iter() @@ -510,6 +421,15 @@ struct UpperBound { inclusive: bool, } +struct CollectedConjunction { + lowers: BTreeMap, + uppers: BTreeMap, + equals: BTreeMap, + not_equals: BTreeMap>, + passthrough: Vec, + has_conflict: bool, +} + /// Simplifies conjunction-only range predicates by keeping the tightest bounds per column. /// /// This pass is intentionally conservative and only runs when the whole expression @@ -532,68 +452,18 @@ struct UpperBound { /// - `a >= 10 AND a > 10` => `a > 10` /// - `a < 10 AND a < 5` => `a < 5` fn simplify_and_bounds(expr: PartitionExpr) -> PartitionExpr { - let mut atoms = Vec::new(); - if !collect_and_atoms(&expr, &mut atoms) { + let Some(collected) = collect_conjunction_bounds(&expr) else { return expr; - } + }; - let mut passthrough = Vec::new(); - let mut lowers: BTreeMap = BTreeMap::new(); - let mut uppers: BTreeMap = BTreeMap::new(); - let mut seen = HashSet::new(); - - for atom in atoms { - let atom = atom.canonicalize(); - let Some((col, op, val)) = atom_col_op_val(&atom) else { - let key = atom.to_string(); - if seen.insert(key) { - passthrough.push(atom); - } - continue; - }; - - // Keep only the tightest lower/upper range for each column. - match op { - RestrictedOp::Lt | RestrictedOp::LtEq => { - let candidate = UpperBound { - value: val, - inclusive: matches!(op, RestrictedOp::LtEq), - }; - match uppers.get_mut(&col) { - Some(current) => { - if prefer_upper(&candidate, current) { - *current = candidate; - } - } - None => { - uppers.insert(col, candidate); - } - } - } - RestrictedOp::Gt | RestrictedOp::GtEq => { - let candidate = LowerBound { - value: val, - inclusive: matches!(op, RestrictedOp::GtEq), - }; - match lowers.get_mut(&col) { - Some(current) => { - if prefer_lower(&candidate, current) { - *current = candidate; - } - } - None => { - lowers.insert(col, candidate); - } - } - } - _ => { - let key = atom.to_string(); - if seen.insert(key) { - passthrough.push(atom); - } - } - } - } + let CollectedConjunction { + lowers, + uppers, + equals: _, + not_equals: _, + passthrough, + has_conflict: _, + } = collected; let mut out = passthrough; out.extend(lowers.into_iter().map(|(col, lower)| { @@ -659,6 +529,143 @@ fn atom_col_op_val(expr: &PartitionExpr) -> Option<(String, RestrictedOp, Value) } } +fn collect_conjunction_bounds(expr: &PartitionExpr) -> Option { + let mut atoms = Vec::new(); + if !collect_and_atoms(expr, &mut atoms) { + return None; + } + + let mut lowers = BTreeMap::new(); + let mut uppers = BTreeMap::new(); + let mut equals = BTreeMap::new(); + let mut not_equals: BTreeMap> = BTreeMap::new(); + let mut passthrough = Vec::new(); + let mut seen = HashSet::new(); + let mut has_conflict = false; + + for atom in atoms { + let atom = atom.canonicalize(); + let Some((col, op, val)) = atom_col_op_val(&atom) else { + push_unique_expr(&mut passthrough, &mut seen, atom); + continue; + }; + + match op { + RestrictedOp::Lt | RestrictedOp::LtEq => update_upper_bound( + &mut uppers, + col, + UpperBound { + value: val, + inclusive: matches!(op, RestrictedOp::LtEq), + }, + ), + RestrictedOp::Gt | RestrictedOp::GtEq => update_lower_bound( + &mut lowers, + col, + LowerBound { + value: val, + inclusive: matches!(op, RestrictedOp::GtEq), + }, + ), + RestrictedOp::Eq => { + if let Some(existing) = equals.get(&col) + && existing != &val + { + has_conflict = true; + } + if not_equals + .get(&col) + .is_some_and(|excluded| excluded.contains(&val)) + { + has_conflict = true; + } + equals.insert(col.clone(), val.clone()); + update_lower_bound( + &mut lowers, + col.clone(), + LowerBound { + value: val.clone(), + inclusive: true, + }, + ); + update_upper_bound( + &mut uppers, + col, + UpperBound { + value: val, + inclusive: true, + }, + ); + push_unique_expr(&mut passthrough, &mut seen, atom); + } + RestrictedOp::NotEq => { + if equals.get(&col).is_some_and(|eq| eq == &val) { + has_conflict = true; + } + not_equals.entry(col).or_default().insert(val); + push_unique_expr(&mut passthrough, &mut seen, atom); + } + RestrictedOp::And | RestrictedOp::Or => { + push_unique_expr(&mut passthrough, &mut seen, atom); + } + } + } + + Some(CollectedConjunction { + lowers, + uppers, + equals, + not_equals, + passthrough, + has_conflict, + }) +} + +fn push_unique_expr( + out: &mut Vec, + seen: &mut HashSet, + expr: PartitionExpr, +) { + let key = expr.to_string(); + if seen.insert(key) { + out.push(expr); + } +} + +fn update_upper_bound( + uppers: &mut BTreeMap, + col: String, + candidate: UpperBound, +) { + match uppers.get_mut(&col) { + Some(current) => { + if prefer_upper(&candidate, current) { + *current = candidate; + } + } + None => { + uppers.insert(col, candidate); + } + } +} + +fn update_lower_bound( + lowers: &mut BTreeMap, + col: String, + candidate: LowerBound, +) { + match lowers.get_mut(&col) { + Some(current) => { + if prefer_lower(&candidate, current) { + *current = candidate; + } + } + None => { + lowers.insert(col, candidate); + } + } +} + fn prefer_upper(candidate: &UpperBound, current: &UpperBound) -> bool { // "Smaller" upper bound is tighter. For equal value, exclusive is tighter. match candidate.value.partial_cmp(¤t.value) {