feat: dist plan optimize part 2 (#2543)

* allow udf and scalar fn

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* put CountWildcardRule before dist planner

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* bump datafusion to fix first_value/last_value

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* update sqlness result

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* use retain instead

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2023-10-09 10:18:36 +08:00
committed by GitHub
parent 17b385a985
commit d5e4fcaaff
5 changed files with 30 additions and 39 deletions

18
Cargo.lock generated
View File

@@ -2476,7 +2476,7 @@ dependencies = [
[[package]]
name = "datafusion"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"ahash 0.8.3",
"arrow",
@@ -2524,7 +2524,7 @@ dependencies = [
[[package]]
name = "datafusion-common"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"arrow",
"arrow-array",
@@ -2538,7 +2538,7 @@ dependencies = [
[[package]]
name = "datafusion-execution"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"dashmap",
"datafusion-common",
@@ -2555,7 +2555,7 @@ dependencies = [
[[package]]
name = "datafusion-expr"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"ahash 0.8.3",
"arrow",
@@ -2569,7 +2569,7 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"arrow",
"async-trait",
@@ -2586,7 +2586,7 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"ahash 0.8.3",
"arrow",
@@ -2621,7 +2621,7 @@ dependencies = [
[[package]]
name = "datafusion-row"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"arrow",
"datafusion-common",
@@ -2632,7 +2632,7 @@ dependencies = [
[[package]]
name = "datafusion-sql"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"arrow",
"arrow-schema",
@@ -2645,7 +2645,7 @@ dependencies = [
[[package]]
name = "datafusion-substrait"
version = "27.0.0"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=c0b0fca548e99d020c76e1a1cd7132aab26000e1#c0b0fca548e99d020c76e1a1cd7132aab26000e1"
source = "git+https://github.com/waynexia/arrow-datafusion.git?rev=b6f3b28b6fe91924cc8dd3d83726b766f2a706ec#b6f3b28b6fe91924cc8dd3d83726b766f2a706ec"
dependencies = [
"async-recursion",
"chrono",

View File

@@ -68,13 +68,13 @@ arrow-schema = { version = "43.0", features = ["serde"] }
async-stream = "0.3"
async-trait = "0.1"
chrono = { version = "0.4", features = ["serde"] }
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "c0b0fca548e99d020c76e1a1cd7132aab26000e1" }
datafusion = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-common = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-optimizer = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-physical-expr = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-sql = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
datafusion-substrait = { git = "https://github.com/waynexia/arrow-datafusion.git", rev = "b6f3b28b6fe91924cc8dd3d83726b766f2a706ec" }
derive_builder = "0.12"
futures = "0.3"
futures-util = "0.3"

View File

@@ -140,7 +140,9 @@ impl Categorizer {
| Expr::Negative(_)
| Expr::Between(_)
| Expr::Sort(_)
| Expr::Exists(_) => Commutativity::Commutative,
| Expr::Exists(_)
| Expr::ScalarFunction(_)
| Expr::ScalarUDF(_) => Commutativity::Commutative,
Expr::Like(_)
| Expr::SimilarTo(_)
@@ -150,8 +152,6 @@ impl Categorizer {
| Expr::Case(_)
| Expr::Cast(_)
| Expr::TryCast(_)
| Expr::ScalarFunction(_)
| Expr::ScalarUDF(_)
| Expr::AggregateFunction(_)
| Expr::WindowFunction(_)
| Expr::AggregateUDF(_)

View File

@@ -34,7 +34,8 @@ use datafusion::physical_optimizer::PhysicalOptimizerRule;
use datafusion::physical_plan::ExecutionPlan;
use datafusion::physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner};
use datafusion_expr::LogicalPlan as DfLogicalPlan;
use datafusion_optimizer::analyzer::Analyzer;
use datafusion_optimizer::analyzer::count_wildcard_rule::CountWildcardRule;
use datafusion_optimizer::analyzer::{Analyzer, AnalyzerRule};
use datafusion_optimizer::optimizer::Optimizer;
use promql::extension_plan::PromExtensionPlanner;
use substrait::extension_serializer::ExtensionSerializer;
@@ -88,6 +89,8 @@ impl QueryEngineState {
}
analyzer.rules.insert(0, Arc::new(TypeConversionRule));
analyzer.rules.insert(0, Arc::new(StringNormalizationRule));
Self::remove_analyzer_rule(&mut analyzer.rules, CountWildcardRule {}.name());
analyzer.rules.insert(0, Arc::new(CountWildcardRule {}));
let mut optimizer = Optimizer::new();
optimizer.rules.push(Arc::new(OrderHintRule));
@@ -132,20 +135,15 @@ impl QueryEngineState {
}
}
fn remove_analyzer_rule(rules: &mut Vec<Arc<dyn AnalyzerRule + Send + Sync>>, name: &str) {
rules.retain(|rule| rule.name() != name);
}
fn remove_physical_optimize_rule(
rules: &mut Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,
name: &str,
) {
let mut index_to_move = None;
for (i, rule) in rules.iter().enumerate() {
if rule.name() == name {
index_to_move = Some(i);
break;
}
}
if let Some(index) = index_to_move {
let _ = rules.remove(index);
}
rules.retain(|rule| rule.name() != name);
}
/// Register a udf function

View File

@@ -12,15 +12,8 @@ EXPLAIN SELECT COUNT(*) FROM single_partition;
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]]_|
|_|_Projection:_|
|_|_MergeScan [is_placeholder=false]_|
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[COUNT(UInt8(1))]_|
|_|_CoalescePartitionsExec_|
|_|_AggregateExec: mode=Partial, gby=[], aggr=[COUNT(UInt8(1))]_|
|_|_RepartitionExec: partitioning=REDACTED
|_|_ProjectionExec: expr=[]_|
|_|_MergeScanExec: REDACTED
| logical_plan_| MergeScan [is_placeholder=false]_|
| physical_plan | MergeScanExec: REDACTED
|_|_|
+-+-+