mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-17 21:40:37 +00:00
fix: handle hash distribution properly (#6943)
* fix: handle hash distribution properly Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix clippy Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * Update src/query/src/optimizer/pass_distribution.rs Co-authored-by: dennis zhuang <killme2008@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: dennis zhuang <killme2008@gmail.com>
This commit is contained in:
@@ -420,17 +420,22 @@ impl MergeScanExec {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut hash_cols = HashSet::default();
|
||||
let partition_cols = self
|
||||
.partition_cols
|
||||
.iter()
|
||||
.map(|x| x.as_str())
|
||||
.collect::<HashSet<_>>();
|
||||
let mut overlaps = vec![];
|
||||
for expr in &hash_exprs {
|
||||
if let Some(col_expr) = expr.as_any().downcast_ref::<Column>() {
|
||||
hash_cols.insert(col_expr.name());
|
||||
// TODO(ruihang): tracking aliases
|
||||
if let Some(col_expr) = expr.as_any().downcast_ref::<Column>()
|
||||
&& partition_cols.contains(col_expr.name())
|
||||
{
|
||||
overlaps.push(expr.clone());
|
||||
}
|
||||
}
|
||||
for col in &self.partition_cols {
|
||||
if !hash_cols.contains(col.as_str()) {
|
||||
// The partitioning columns are not the same
|
||||
return None;
|
||||
}
|
||||
if overlaps.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Self {
|
||||
@@ -443,7 +448,7 @@ impl MergeScanExec {
|
||||
metric: self.metric.clone(),
|
||||
properties: PlanProperties::new(
|
||||
self.properties.eq_properties.clone(),
|
||||
Partitioning::Hash(hash_exprs, self.target_partition),
|
||||
Partitioning::Hash(overlaps, self.target_partition),
|
||||
self.properties.emission_type,
|
||||
self.properties.boundedness,
|
||||
),
|
||||
|
||||
@@ -17,7 +17,6 @@ use std::sync::Arc;
|
||||
use datafusion::config::ConfigOptions;
|
||||
use datafusion::physical_optimizer::PhysicalOptimizerRule;
|
||||
use datafusion::physical_plan::ExecutionPlan;
|
||||
use datafusion_common::tree_node::{Transformed, TreeNode};
|
||||
use datafusion_common::Result as DfResult;
|
||||
use datafusion_physical_expr::Distribution;
|
||||
|
||||
@@ -56,26 +55,52 @@ impl PassDistribution {
|
||||
plan: Arc<dyn ExecutionPlan>,
|
||||
_config: &ConfigOptions,
|
||||
) -> DfResult<Arc<dyn ExecutionPlan>> {
|
||||
let mut distribution_requirement = None;
|
||||
let result = plan.transform_down(|plan| {
|
||||
if let Some(distribution) = plan.required_input_distribution().first()
|
||||
&& !matches!(distribution, Distribution::UnspecifiedDistribution)
|
||||
// incorrect workaround, doesn't fix the actual issue
|
||||
&& plan.name() != "HashJoinExec"
|
||||
{
|
||||
distribution_requirement = Some(distribution.clone());
|
||||
}
|
||||
// Start from root with no requirement
|
||||
Self::rewrite_with_distribution(plan, None)
|
||||
}
|
||||
|
||||
if let Some(merge_scan) = plan.as_any().downcast_ref::<MergeScanExec>()
|
||||
&& let Some(distribution) = distribution_requirement.as_ref()
|
||||
&& let Some(new_plan) = merge_scan.try_with_new_distribution(distribution.clone())
|
||||
{
|
||||
Ok(Transformed::yes(Arc::new(new_plan) as _))
|
||||
} else {
|
||||
Ok(Transformed::no(plan))
|
||||
}
|
||||
})?;
|
||||
/// Top-down rewrite that propagates distribution requirements to children.
|
||||
fn rewrite_with_distribution(
|
||||
plan: Arc<dyn ExecutionPlan>,
|
||||
current_req: Option<Distribution>,
|
||||
) -> DfResult<Arc<dyn ExecutionPlan>> {
|
||||
// If this is a MergeScanExec, try to apply the current requirement.
|
||||
if let Some(merge_scan) = plan.as_any().downcast_ref::<MergeScanExec>()
|
||||
&& let Some(distribution) = current_req.as_ref()
|
||||
&& let Some(new_plan) = merge_scan.try_with_new_distribution(distribution.clone())
|
||||
{
|
||||
// Leaf node; no children to process
|
||||
return Ok(Arc::new(new_plan) as _);
|
||||
}
|
||||
|
||||
Ok(result.data)
|
||||
// Compute per-child requirements from the current node.
|
||||
let children = plan.children();
|
||||
if children.is_empty() {
|
||||
return Ok(plan);
|
||||
}
|
||||
|
||||
let required = plan.required_input_distribution();
|
||||
let mut new_children = Vec::with_capacity(children.len());
|
||||
for (idx, child) in children.into_iter().enumerate() {
|
||||
let child_req = match required.get(idx) {
|
||||
Some(Distribution::UnspecifiedDistribution) => None,
|
||||
None => current_req.clone(),
|
||||
Some(req) => Some(req.clone()),
|
||||
};
|
||||
let new_child = Self::rewrite_with_distribution(child.clone(), child_req)?;
|
||||
new_children.push(new_child);
|
||||
}
|
||||
|
||||
// Rebuild the node only if any child changed (pointer inequality)
|
||||
let unchanged = plan
|
||||
.children()
|
||||
.into_iter()
|
||||
.zip(new_children.iter())
|
||||
.all(|(old, new)| Arc::ptr_eq(old, new));
|
||||
if unchanged {
|
||||
Ok(plan)
|
||||
} else {
|
||||
plan.with_new_children(new_children)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user