mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-20 15:00:40 +00:00
fix: group by expr not as column in step aggr (#7008)
* fix: group by expr not as column Signed-off-by: discord9 <discord9@163.com> * test: dist analyzer date_bin Signed-off-by: discord9 <discord9@163.com> * ???fix wip Signed-off-by: discord9 <discord9@163.com> * fix: deduce using correct input fields Signed-off-by: discord9 <discord9@163.com> * refactor: clearer wrapper Signed-off-by: discord9 <discord9@163.com> * chore: update sqlness Signed-off-by: discord9 <discord9@163.com> * chore: per review Signed-off-by: discord9 <discord9@163.com> * chore: per review Signed-off-by: discord9 <discord9@163.com> * chore: rm todo Signed-off-by: discord9 <discord9@163.com> --------- Signed-off-by: discord9 <discord9@163.com>
This commit is contained in:
@@ -137,6 +137,15 @@ impl StateMergeHelper {
|
||||
let mut lower_aggr_exprs = vec![];
|
||||
let mut upper_aggr_exprs = vec![];
|
||||
|
||||
// group exprs for upper plan should refer to the output group expr as column from lower plan
|
||||
// to avoid re-compute group exprs again.
|
||||
let upper_group_exprs = aggr
|
||||
.group_expr
|
||||
.iter()
|
||||
.map(|c| c.qualified_name())
|
||||
.map(|(r, c)| Expr::Column(Column::new(r, c)))
|
||||
.collect();
|
||||
|
||||
for aggr_expr in aggr.aggr_expr.iter() {
|
||||
let Some(aggr_func) = get_aggr_func(aggr_expr) else {
|
||||
return Err(datafusion_common::DataFusionError::NotImplemented(format!(
|
||||
@@ -198,10 +207,13 @@ impl StateMergeHelper {
|
||||
// update aggregate's output schema
|
||||
let lower_plan = lower_plan.recompute_schema()?;
|
||||
|
||||
let mut upper = aggr.clone();
|
||||
let upper = Aggregate::try_new(
|
||||
Arc::new(lower_plan.clone()),
|
||||
upper_group_exprs,
|
||||
upper_aggr_exprs.clone(),
|
||||
)?;
|
||||
let aggr_plan = LogicalPlan::Aggregate(aggr);
|
||||
upper.aggr_expr = upper_aggr_exprs;
|
||||
upper.input = Arc::new(lower_plan.clone());
|
||||
|
||||
// upper schema's output schema should be the same as the original aggregate plan's output schema
|
||||
let upper_check = upper;
|
||||
let upper_plan = LogicalPlan::Aggregate(upper_check).recompute_schema()?;
|
||||
@@ -245,7 +257,19 @@ impl StateWrapper {
|
||||
&self,
|
||||
acc_args: &datafusion_expr::function::AccumulatorArgs,
|
||||
) -> datafusion_common::Result<FieldRef> {
|
||||
self.inner.return_field(acc_args.schema.fields())
|
||||
let input_fields = acc_args
|
||||
.exprs
|
||||
.iter()
|
||||
.map(|e| e.return_field(acc_args.schema))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
self.inner.return_field(&input_fields).inspect_err(|e| {
|
||||
common_telemetry::error!(
|
||||
"StateWrapper: {:#?}\nacc_args:{:?}\nerror:{:?}",
|
||||
&self,
|
||||
&acc_args,
|
||||
e
|
||||
);
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -402,7 +426,7 @@ pub struct MergeWrapper {
|
||||
merge_signature: Signature,
|
||||
/// The original physical expression of the aggregate function, can't store the original aggregate function directly, as PhysicalExpr didn't implement Any
|
||||
original_phy_expr: Arc<AggregateFunctionExpr>,
|
||||
original_input_types: Vec<DataType>,
|
||||
return_type: DataType,
|
||||
}
|
||||
impl MergeWrapper {
|
||||
pub fn new(
|
||||
@@ -413,13 +437,14 @@ impl MergeWrapper {
|
||||
let name = aggr_merge_func_name(inner.name());
|
||||
// the input type is actually struct type, which is the state fields of the original aggregate function.
|
||||
let merge_signature = Signature::user_defined(datafusion_expr::Volatility::Immutable);
|
||||
let return_type = inner.return_type(&original_input_types)?;
|
||||
|
||||
Ok(Self {
|
||||
inner,
|
||||
name,
|
||||
merge_signature,
|
||||
original_phy_expr,
|
||||
original_input_types,
|
||||
return_type,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -471,8 +496,7 @@ impl AggregateUDFImpl for MergeWrapper {
|
||||
/// so return fixed return type instead of using `arg_types` to determine the return type.
|
||||
fn return_type(&self, _arg_types: &[DataType]) -> datafusion_common::Result<DataType> {
|
||||
// The return type is the same as the original aggregate function's return type.
|
||||
let ret_type = self.inner.return_type(&self.original_input_types)?;
|
||||
Ok(ret_type)
|
||||
Ok(self.return_type.clone())
|
||||
}
|
||||
fn signature(&self) -> &Signature {
|
||||
&self.merge_signature
|
||||
|
||||
@@ -23,6 +23,7 @@ use datafusion::catalog::{Session, TableProvider};
|
||||
use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext};
|
||||
use datafusion::functions_aggregate::average::avg_udaf;
|
||||
use datafusion::functions_aggregate::count::count_udaf;
|
||||
use datafusion::functions_aggregate::sum::sum_udaf;
|
||||
use datafusion::optimizer::AnalyzerRule;
|
||||
use datafusion::optimizer::analyzer::type_coercion::TypeCoercion;
|
||||
@@ -555,6 +556,7 @@ async fn test_udaf_correct_eval_result() {
|
||||
input_schema: SchemaRef,
|
||||
input: Vec<ArrayRef>,
|
||||
expected_output: Option<ScalarValue>,
|
||||
// extra check function on the final array result
|
||||
expected_fn: Option<ExpectedFn>,
|
||||
distinct: bool,
|
||||
filter: Option<Box<Expr>>,
|
||||
@@ -585,6 +587,27 @@ async fn test_udaf_correct_eval_result() {
|
||||
order_by: vec![],
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
func: count_udaf(),
|
||||
input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
|
||||
"str_val",
|
||||
DataType::Utf8,
|
||||
true,
|
||||
)])),
|
||||
args: vec![Expr::Column(Column::new_unqualified("str_val"))],
|
||||
input: vec![Arc::new(StringArray::from(vec![
|
||||
Some("hello"),
|
||||
Some("world"),
|
||||
None,
|
||||
Some("what"),
|
||||
]))],
|
||||
expected_output: Some(ScalarValue::Int64(Some(3))),
|
||||
expected_fn: None,
|
||||
distinct: false,
|
||||
filter: None,
|
||||
order_by: vec![],
|
||||
null_treatment: None,
|
||||
},
|
||||
TestCase {
|
||||
func: avg_udaf(),
|
||||
input_schema: Arc::new(arrow_schema::Schema::new(vec![Field::new(
|
||||
|
||||
@@ -325,7 +325,7 @@ impl PlanRewriter {
|
||||
}
|
||||
|
||||
/// Return true if should stop and expand. The input plan is the parent node of current node
|
||||
fn should_expand(&mut self, plan: &LogicalPlan) -> bool {
|
||||
fn should_expand(&mut self, plan: &LogicalPlan) -> DfResult<bool> {
|
||||
debug!(
|
||||
"Check should_expand at level: {} with Stack:\n{}, ",
|
||||
self.level,
|
||||
@@ -335,20 +335,21 @@ impl PlanRewriter {
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n"),
|
||||
);
|
||||
if DFLogicalSubstraitConvertor
|
||||
.encode(plan, DefaultSerializer)
|
||||
.is_err()
|
||||
{
|
||||
return true;
|
||||
if let Err(e) = DFLogicalSubstraitConvertor.encode(plan, DefaultSerializer) {
|
||||
debug!(
|
||||
"PlanRewriter: plan cannot be converted to substrait with error={e:?}, expanding now: {plan}"
|
||||
);
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
if self.expand_on_next_call {
|
||||
self.expand_on_next_call = false;
|
||||
return true;
|
||||
debug!("PlanRewriter: expand_on_next_call is true, expanding now");
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
if self.expand_on_next_part_cond_trans_commutative {
|
||||
let comm = Categorizer::check_plan(plan, self.partition_cols.clone());
|
||||
let comm = Categorizer::check_plan(plan, self.partition_cols.clone())?;
|
||||
match comm {
|
||||
Commutativity::PartialCommutative => {
|
||||
// a small difference is that for partial commutative, we still need to
|
||||
@@ -364,13 +365,16 @@ impl PlanRewriter {
|
||||
// again a new node that can be push down, we should just
|
||||
// do push down now and avoid further expansion
|
||||
self.expand_on_next_part_cond_trans_commutative = false;
|
||||
return true;
|
||||
debug!(
|
||||
"PlanRewriter: meet a new conditional/transformed commutative plan, expanding now: {plan}"
|
||||
);
|
||||
return Ok(true);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
match Categorizer::check_plan(plan, self.partition_cols.clone()) {
|
||||
match Categorizer::check_plan(plan, self.partition_cols.clone())? {
|
||||
Commutativity::Commutative => {}
|
||||
Commutativity::PartialCommutative => {
|
||||
if let Some(plan) = partial_commutative_transformer(plan) {
|
||||
@@ -391,9 +395,8 @@ impl PlanRewriter {
|
||||
}
|
||||
}
|
||||
Commutativity::TransformedCommutative { transformer } => {
|
||||
if let Some(transformer) = transformer
|
||||
&& let Some(transformer_actions) = transformer(plan)
|
||||
{
|
||||
if let Some(transformer) = transformer {
|
||||
let transformer_actions = transformer(plan)?;
|
||||
debug!(
|
||||
"PlanRewriter: transformed plan: {}\n from {plan}",
|
||||
transformer_actions
|
||||
@@ -424,11 +427,12 @@ impl PlanRewriter {
|
||||
Commutativity::NonCommutative
|
||||
| Commutativity::Unimplemented
|
||||
| Commutativity::Unsupported => {
|
||||
return true;
|
||||
debug!("PlanRewriter: meet a non-commutative plan, expanding now: {plan}");
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
/// Update the column requirements for the current plan, plan_level is the level of the plan
|
||||
@@ -838,8 +842,7 @@ impl TreeNodeRewriter for PlanRewriter {
|
||||
|
||||
let parent = parent.clone();
|
||||
|
||||
// TODO(ruihang): avoid this clone
|
||||
if self.should_expand(&parent) {
|
||||
if self.should_expand(&parent)? {
|
||||
// TODO(ruihang): does this work for nodes with multiple children?;
|
||||
debug!(
|
||||
"PlanRewriter: should expand child:\n {node}\n Of Parent: {}",
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow::datatypes::IntervalDayTime;
|
||||
use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_recordbatch::adapter::RecordBatchMetrics;
|
||||
@@ -25,7 +26,9 @@ use datafusion::datasource::DefaultTableSource;
|
||||
use datafusion::functions_aggregate::expr_fn::avg;
|
||||
use datafusion::functions_aggregate::min_max::{max, min};
|
||||
use datafusion_common::JoinType;
|
||||
use datafusion_expr::expr::ScalarFunction;
|
||||
use datafusion_expr::{Expr, LogicalPlanBuilder, col, lit};
|
||||
use datafusion_functions::datetime::date_bin;
|
||||
use datafusion_sql::TableReference;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
@@ -1453,3 +1456,41 @@ fn transform_sort_subquery_alias() {
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_bin_ts_group_by() {
|
||||
init_default_ut_logging();
|
||||
let test_table = TestTable::table_with_name(0, "t".to_string());
|
||||
let table_source = Arc::new(DefaultTableSource::new(Arc::new(
|
||||
DfTableProviderAdapter::new(test_table),
|
||||
)));
|
||||
let date_bin_call = Expr::ScalarFunction(ScalarFunction::new_udf(
|
||||
date_bin(),
|
||||
vec![
|
||||
lit(datafusion_common::ScalarValue::IntervalDayTime(Some(
|
||||
IntervalDayTime::new(0, 60 * 1000), // 1 minute in millis
|
||||
))),
|
||||
col("ts"),
|
||||
],
|
||||
));
|
||||
let plan = LogicalPlanBuilder::scan_with_filters("t", table_source, None, vec![])
|
||||
.unwrap()
|
||||
.aggregate(vec![date_bin_call], vec![min(col("number"))])
|
||||
.unwrap()
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let config = ConfigOptions::default();
|
||||
let result = DistPlannerAnalyzer {}.analyze(plan, &config).unwrap();
|
||||
|
||||
let expected = [
|
||||
r#"Projection: date_bin(IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 60000 }"),t.ts), min(t.number)"#,
|
||||
r#" Aggregate: groupBy=[[date_bin(IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 60000 }"),t.ts)]], aggr=[[__min_merge(__min_state(t.number)) AS min(t.number)]]"#,
|
||||
" MergeScan [is_placeholder=false, remote_input=[",
|
||||
r#"Aggregate: groupBy=[[date_bin(IntervalDayTime("IntervalDayTime { days: 0, milliseconds: 60000 }"), t.ts)]], aggr=[[__min_state(t.number)]]"#,
|
||||
" TableScan: t",
|
||||
"]]",
|
||||
]
|
||||
.join("\n");
|
||||
assert_eq!(expected, result.to_string());
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ use std::sync::Arc;
|
||||
use common_function::aggrs::aggr_wrapper::{StateMergeHelper, aggr_state_func_name};
|
||||
use common_function::function_registry::FUNCTION_REGISTRY;
|
||||
use common_telemetry::debug;
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion_expr::{Expr, LogicalPlan, UserDefinedLogicalNode};
|
||||
use promql::extension_plan::{
|
||||
EmptyMetric, InstantManipulate, RangeManipulate, SeriesDivide, SeriesNormalize,
|
||||
@@ -121,15 +122,18 @@ pub enum Commutativity {
|
||||
pub struct Categorizer {}
|
||||
|
||||
impl Categorizer {
|
||||
pub fn check_plan(plan: &LogicalPlan, partition_cols: Option<AliasMapping>) -> Commutativity {
|
||||
pub fn check_plan(
|
||||
plan: &LogicalPlan,
|
||||
partition_cols: Option<AliasMapping>,
|
||||
) -> DfResult<Commutativity> {
|
||||
let partition_cols = partition_cols.unwrap_or_default();
|
||||
|
||||
match plan {
|
||||
let comm = match plan {
|
||||
LogicalPlan::Projection(proj) => {
|
||||
for expr in &proj.expr {
|
||||
let commutativity = Self::check_expr(expr);
|
||||
if !matches!(commutativity, Commutativity::Commutative) {
|
||||
return commutativity;
|
||||
return Ok(commutativity);
|
||||
}
|
||||
}
|
||||
Commutativity::Commutative
|
||||
@@ -142,24 +146,27 @@ impl Categorizer {
|
||||
let matches_partition = Self::check_partition(&aggr.group_expr, &partition_cols);
|
||||
if !matches_partition && is_all_steppable {
|
||||
debug!("Plan is steppable: {plan}");
|
||||
return Commutativity::TransformedCommutative {
|
||||
return Ok(Commutativity::TransformedCommutative {
|
||||
transformer: Some(Arc::new(|plan: &LogicalPlan| {
|
||||
debug!("Before Step optimize: {plan}");
|
||||
let ret = step_aggr_to_upper_aggr(plan);
|
||||
ret.ok().map(|s| TransformerAction {
|
||||
ret.inspect_err(|err| {
|
||||
common_telemetry::error!("Failed to step aggregate plan: {err:?}");
|
||||
})
|
||||
.map(|s| TransformerAction {
|
||||
extra_parent_plans: s.extra_parent_plans,
|
||||
new_child_plan: s.new_child_plan,
|
||||
})
|
||||
})),
|
||||
};
|
||||
});
|
||||
}
|
||||
if !matches_partition {
|
||||
return Commutativity::NonCommutative;
|
||||
return Ok(Commutativity::NonCommutative);
|
||||
}
|
||||
for expr in &aggr.aggr_expr {
|
||||
let commutativity = Self::check_expr(expr);
|
||||
if !matches!(commutativity, Commutativity::Commutative) {
|
||||
return commutativity;
|
||||
return Ok(commutativity);
|
||||
}
|
||||
}
|
||||
// all group by expressions are partition columns can push down, unless
|
||||
@@ -170,7 +177,7 @@ impl Categorizer {
|
||||
}
|
||||
LogicalPlan::Sort(_) => {
|
||||
if partition_cols.is_empty() {
|
||||
return Commutativity::Commutative;
|
||||
return Ok(Commutativity::Commutative);
|
||||
}
|
||||
|
||||
// sort plan needs to consider column priority
|
||||
@@ -219,7 +226,9 @@ impl Categorizer {
|
||||
LogicalPlan::Ddl(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::Copy(_) => Commutativity::Unsupported,
|
||||
LogicalPlan::RecursiveQuery(_) => Commutativity::Unsupported,
|
||||
}
|
||||
};
|
||||
|
||||
Ok(comm)
|
||||
}
|
||||
|
||||
pub fn check_extension_plan(
|
||||
@@ -334,7 +343,7 @@ impl Categorizer {
|
||||
pub type Transformer = Arc<dyn Fn(&LogicalPlan) -> Option<LogicalPlan>>;
|
||||
|
||||
/// Returns transformer action that need to be applied
|
||||
pub type StageTransformer = Arc<dyn Fn(&LogicalPlan) -> Option<TransformerAction>>;
|
||||
pub type StageTransformer = Arc<dyn Fn(&LogicalPlan) -> DfResult<TransformerAction>>;
|
||||
|
||||
/// The Action that a transformer should take on the plan.
|
||||
pub struct TransformerAction {
|
||||
@@ -369,7 +378,7 @@ mod test {
|
||||
fetch: None,
|
||||
});
|
||||
assert!(matches!(
|
||||
Categorizer::check_plan(&plan, Some(Default::default())),
|
||||
Categorizer::check_plan(&plan, Some(Default::default())).unwrap(),
|
||||
Commutativity::Commutative
|
||||
));
|
||||
}
|
||||
|
||||
@@ -14,13 +14,17 @@ Affected Rows: 0
|
||||
INSERT INTO
|
||||
integers (host, i, ts)
|
||||
VALUES
|
||||
('220-A', 2, '2023-01-01 00:00:00'),
|
||||
('220-B', 3, '2023-01-01 00:00:00'),
|
||||
('550-A', 1, '2023-01-01 00:00:00'),
|
||||
('550-B', 5, '2023-01-01 00:00:00'),
|
||||
('550-A', 2, '2023-01-01 01:00:00'),
|
||||
('550-W', 3, '2023-01-01 02:00:00'),
|
||||
('550-W', 4, '2023-01-01 03:00:00');
|
||||
('550-Z', 4, '2023-01-01 02:00:00'),
|
||||
('550-W', 5, '2023-01-01 03:00:00'),
|
||||
('550-Z', 6, '2023-01-01 03:00:00');
|
||||
|
||||
Affected Rows: 5
|
||||
Affected Rows: 9
|
||||
|
||||
SELECT
|
||||
count(i),
|
||||
@@ -33,7 +37,7 @@ FROM
|
||||
+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| count(integers.i) | sum(integers.i) | uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)) | hll_count(hll(integers.i)) |
|
||||
+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| 5 | 15 | 2.9742334234767016 | 5 |
|
||||
| 9 | 31 | 2.9742334234767016 | 6 |
|
||||
+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -122,11 +126,11 @@ SELECT
|
||||
FROM
|
||||
integers;
|
||||
|
||||
+-----------------+
|
||||
| avg(integers.i) |
|
||||
+-----------------+
|
||||
| 3.0 |
|
||||
+-----------------+
|
||||
+--------------------+
|
||||
| avg(integers.i) |
|
||||
+--------------------+
|
||||
| 3.4444444444444446 |
|
||||
+--------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
@@ -214,10 +218,10 @@ ORDER BY
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| ts | count(integers.i) | sum(integers.i) | uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)) | hll_count(hll(integers.i)) |
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| 2023-01-01T00:00:00 | 2 | 6 | 5.002829575110705 | 2 |
|
||||
| 2023-01-01T00:00:00 | 4 | 11 | 2.9742334234767016 | 4 |
|
||||
| 2023-01-01T01:00:00 | 1 | 2 | 1.9936617014173446 | 1 |
|
||||
| 2023-01-01T02:00:00 | 1 | 3 | 2.9742334234767016 | 1 |
|
||||
| 2023-01-01T03:00:00 | 1 | 4 | 4.014835333028587 | 1 |
|
||||
| 2023-01-01T02:00:00 | 2 | 7 | 4.014835333028587 | 2 |
|
||||
| 2023-01-01T03:00:00 | 2 | 11 | 5.98951037117262 | 2 |
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -321,6 +325,129 @@ ORDER BY
|
||||
|_|_| Total rows: 4_|
|
||||
+-+-+-+
|
||||
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| time_window | count(integers.i) | sum(integers.i) | uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)) | hll_count(hll(integers.i)) |
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
| 2023-01-01T00:00:00 | 4 | 11 | 2.9742334234767016 | 4 |
|
||||
| 2023-01-01T01:00:00 | 1 | 2 | 1.9936617014173446 | 1 |
|
||||
| 2023-01-01T02:00:00 | 2 | 7 | 4.014835333028587 | 2 |
|
||||
| 2023-01-01T03:00:00 | 2 | 11 | 5.98951037117262 | 2 |
|
||||
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: time_window ASC NULLS LAST_|
|
||||
|_|_Projection: date_bin(Utf8("2 seconds"),integers.ts) AS time_window, count(integers.i), sum(integers.i), uddsketch_calc(Float64(0.5), uddsketch_state(Int64(128),Float64(0.01),integers.i)), hll_count(hll(integers.i))_|
|
||||
|_|_Aggregate: groupBy=[[date_bin(Utf8("2 seconds"),integers.ts)]], aggr=[[__count_merge(__count_state(integers.i)) AS count(integers.i), __sum_merge(__sum_state(integers.i)) AS sum(integers.i), __uddsketch_state_merge(__uddsketch_state_state(Int64(128),Float64(0.01),integers.i)) AS uddsketch_state(Int64(128),Float64(0.01),integers.i), __hll_merge(__hll_state(integers.i)) AS hll(integers.i)]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[date_bin(CAST(Utf8("2 seconds") AS Interval(MonthDayNano)), integers.ts)]], aggr=[[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128), Float64(0.01), CAST(integers.i AS Float64)), __hll_state(CAST(integers.i AS Utf8))]]_|
|
||||
|_|_TableScan: integers_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [time_window@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[time_window@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_ProjectionExec: expr=[date_bin(Utf8("2 seconds"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i), sum(integers.i)@2 as sum(integers.i), uddsketch_calc(0.5, uddsketch_state(Int64(128),Float64(0.01),integers.i)@3) as uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)), hll_count(hll(integers.i)@4) as hll_count(hll(integers.i))]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
EXPLAIN ANALYZE
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
+-+-+-+
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [time_window@0 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[time_window@0 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_ProjectionExec: expr=[date_bin(Utf8("2 seconds"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i), sum(integers.i)@2 as sum(integers.i), uddsketch_calc(0.5, uddsketch_state(Int64(128),Float64(0.01),integers.i)@3) as uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)), hll_count(hll(integers.i)@4) as hll_count(hll(integers.i))] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 2000000000 }, ts@1) as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 2000000000 }, ts@1) as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 2000000000 }, ts@1) as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 4_|
|
||||
+-+-+-+
|
||||
|
||||
DROP TABLE integers;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -12,11 +12,15 @@ CREATE TABLE integers(
|
||||
INSERT INTO
|
||||
integers (host, i, ts)
|
||||
VALUES
|
||||
('220-A', 2, '2023-01-01 00:00:00'),
|
||||
('220-B', 3, '2023-01-01 00:00:00'),
|
||||
('550-A', 1, '2023-01-01 00:00:00'),
|
||||
('550-B', 5, '2023-01-01 00:00:00'),
|
||||
('550-A', 2, '2023-01-01 01:00:00'),
|
||||
('550-W', 3, '2023-01-01 02:00:00'),
|
||||
('550-W', 4, '2023-01-01 03:00:00');
|
||||
('550-Z', 4, '2023-01-01 02:00:00'),
|
||||
('550-W', 5, '2023-01-01 03:00:00'),
|
||||
('550-Z', 6, '2023-01-01 03:00:00');
|
||||
|
||||
SELECT
|
||||
count(i),
|
||||
@@ -142,4 +146,60 @@ GROUP BY
|
||||
ORDER BY
|
||||
ts;
|
||||
|
||||
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
EXPLAIN
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
-- SQLNESS REPLACE (metrics.*) REDACTED
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
-- might write to different partitions
|
||||
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
EXPLAIN ANALYZE
|
||||
SELECT
|
||||
date_bin('2s'::INTERVAL, ts) as time_window,
|
||||
count(i),
|
||||
sum(i),
|
||||
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
|
||||
hll_count(hll(i))
|
||||
FROM
|
||||
integers
|
||||
GROUP BY
|
||||
time_window
|
||||
ORDER BY
|
||||
time_window;
|
||||
|
||||
DROP TABLE integers;
|
||||
|
||||
@@ -1037,3 +1037,277 @@ drop table aggr_optimize_not;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
--
|
||||
-- Additional test cases for step aggregation pushdown
|
||||
--
|
||||
CREATE TABLE step_aggr_extended (
|
||||
pk_col_1 STRING,
|
||||
pk_col_2 BIGINT,
|
||||
val_col_1 BIGINT,
|
||||
val_col_2 STRING,
|
||||
val_col_3 BIGINT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
PRIMARY KEY(pk_col_1, pk_col_2)
|
||||
) PARTITION ON COLUMNS (pk_col_1) (
|
||||
pk_col_1 < 'f',
|
||||
pk_col_1 >= 'f'
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
INSERT INTO step_aggr_extended VALUES
|
||||
('a', 1, 100, 'v1', 10, 1672531200000),
|
||||
('a', 2, 200, 'v2', NULL, 1672531201000),
|
||||
('g', 1, 300, 'v1', 30, 1672531202000),
|
||||
('g', 2, 400, 'v2', 40, 1672531203000),
|
||||
('a', 3, 100, 'v3', 10, 1672531204000),
|
||||
('g', 3, 300, 'v3', 30, 1672531205000),
|
||||
('h', 4, 500, NULL, 50, 1672531206000);
|
||||
|
||||
Affected Rows: 7
|
||||
|
||||
-- Case 12: GROUP BY includes a mix of partition key and non-partition key.
|
||||
-- `pk_col_1` is a partition key, `pk_col_2` is not.
|
||||
-- This should pushdown entire aggregation to datanodes since it's partitioned by `pk_col_1`.
|
||||
-- Expected: Full pushdown of aggregation to datanodes.
|
||||
SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
|
||||
|
||||
+----------+----------+-----------------------------------+
|
||||
| pk_col_1 | pk_col_2 | sum(step_aggr_extended.val_col_1) |
|
||||
+----------+----------+-----------------------------------+
|
||||
| a | 1 | 100 |
|
||||
| a | 2 | 200 |
|
||||
| a | 3 | 100 |
|
||||
| g | 1 | 300 |
|
||||
| g | 2 | 400 |
|
||||
| g | 3 | 300 |
|
||||
| h | 4 | 500 |
|
||||
+----------+----------+-----------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: step_aggr_extended.pk_col_1 ASC NULLS LAST, step_aggr_extended.pk_col_2 ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: step_aggr_extended.pk_col_1 ASC NULLS LAST, step_aggr_extended.pk_col_2 ASC NULLS LAST_|
|
||||
|_|_Projection: step_aggr_extended.pk_col_1, step_aggr_extended.pk_col_2, sum(step_aggr_extended.val_col_1)_|
|
||||
|_|_Aggregate: groupBy=[[step_aggr_extended.pk_col_1, step_aggr_extended.pk_col_2]], aggr=[[sum(step_aggr_extended.val_col_1)]] |
|
||||
|_|_TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [pk_col_1@0 ASC NULLS LAST, pk_col_2@1 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- Case 13: COUNT(DISTINCT) aggregation.
|
||||
-- `DISTINCT` aggregation is more complex and requires a two-phase distinct calculation in a distributed environment. Currently not supported for pushdown.
|
||||
-- Expected: datanode only do table scan, actual aggregation happens on frontend.
|
||||
SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
|
||||
|
||||
+----------------------------------------------+
|
||||
| count(DISTINCT step_aggr_extended.val_col_1) |
|
||||
+----------------------------------------------+
|
||||
| 5 |
|
||||
+----------------------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Projection: count(alias1) AS count(DISTINCT step_aggr_extended.val_col_1)_|
|
||||
|_|_Aggregate: groupBy=[[]], aggr=[[count(alias1)]]_|
|
||||
|_|_Aggregate: groupBy=[[step_aggr_extended.val_col_1 AS alias1]], aggr=[[]]_|
|
||||
|_|_Projection: step_aggr_extended.val_col_1_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | ProjectionExec: expr=[count(alias1)@0 as count(DISTINCT step_aggr_extended.val_col_1)]_|
|
||||
|_|_AggregateExec: mode=Final, gby=[], aggr=[count(alias1)]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(alias1)]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[val_col_1@0 as alias1], aggr=[]_|
|
||||
|_|_ProjectionExec: expr=[val_col_1@2 as val_col_1]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- Case 14: Aggregation with a HAVING clause.
|
||||
-- The `HAVING` clause filters results after aggregation.
|
||||
-- Expected: The `HAVING` filter should be applied on the frontend after the final aggregation is complete, not pushed down to datanodes.
|
||||
SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
|
||||
|
||||
+----------+-----------------------------------+
|
||||
| pk_col_2 | sum(step_aggr_extended.val_col_1) |
|
||||
+----------+-----------------------------------+
|
||||
| 1 | 400 |
|
||||
| 2 | 600 |
|
||||
| 3 | 400 |
|
||||
| 4 | 500 |
|
||||
+----------+-----------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Sort: step_aggr_extended.pk_col_2 ASC NULLS LAST_|
|
||||
|_|_Filter: sum(step_aggr_extended.val_col_1) > Int64(300)_|
|
||||
|_|_Aggregate: groupBy=[[step_aggr_extended.pk_col_2]], aggr=[[__sum_merge(__sum_state(step_aggr_extended.val_col_1)) AS sum(step_aggr_extended.val_col_1)]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[step_aggr_extended.pk_col_2]], aggr=[[__sum_state(step_aggr_extended.val_col_1)]]_|
|
||||
|_|_TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [pk_col_2@0 ASC NULLS LAST]_|
|
||||
|_|_SortExec: expr=[pk_col_2@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_FilterExec: sum(step_aggr_extended.val_col_1)@1 > 300_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[pk_col_2@0 as pk_col_2], aggr=[sum(step_aggr_extended.val_col_1)]_|
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[pk_col_2@0 as pk_col_2], aggr=[sum(step_aggr_extended.val_col_1)]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- Case 15: Aggregation on a column with NULL values.
|
||||
-- `SUM` should ignore NULLs. `COUNT(val_col_2)` should count non-nulls, `COUNT(*)` should count all rows.
|
||||
-- Expected: Correct aggregation results, proving NULLs are handled properly in a distributed context.
|
||||
SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
|
||||
|
||||
+-----------------------------------+-------------------------------------+-------------------------------------+----------+
|
||||
| sum(step_aggr_extended.val_col_3) | count(step_aggr_extended.val_col_2) | count(step_aggr_extended.val_col_3) | count(*) |
|
||||
+-----------------------------------+-------------------------------------+-------------------------------------+----------+
|
||||
| 170 | 6 | 6 | 7 |
|
||||
+-----------------------------------+-------------------------------------+-------------------------------------+----------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Projection: sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3), count(Int64(1)) AS count(*)_|
|
||||
|_|_Aggregate: groupBy=[[]], aggr=[[__sum_merge(__sum_state(step_aggr_extended.val_col_3)) AS sum(step_aggr_extended.val_col_3), __count_merge(__count_state(step_aggr_extended.val_col_2)) AS count(step_aggr_extended.val_col_2), __count_merge(__count_state(step_aggr_extended.val_col_3)) AS count(step_aggr_extended.val_col_3), __count_merge(__count_state(step_aggr_extended.ts)) AS count(Int64(1))]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[]], aggr=[[__sum_state(step_aggr_extended.val_col_3), __count_state(step_aggr_extended.val_col_2), __count_state(step_aggr_extended.val_col_3), __count_state(step_aggr_extended.ts)]]_|
|
||||
|_|_TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | ProjectionExec: expr=[sum(step_aggr_extended.val_col_3)@0 as sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2)@1 as count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3)@2 as count(step_aggr_extended.val_col_3), count(Int64(1))@3 as count(*)]_|
|
||||
|_|_AggregateExec: mode=Final, gby=[], aggr=[sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3), count(Int64(1))]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3), count(Int64(1))]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- Case 16: Aggregation on STRING columns.
|
||||
-- `MIN` and `MAX` can operate on strings.
|
||||
-- Expected: Correct lexicographical min/max results.
|
||||
SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
|
||||
|
||||
+----------------------------------+-----------------------------------+
|
||||
| min(step_aggr_extended.pk_col_1) | max(step_aggr_extended.val_col_2) |
|
||||
+----------------------------------+-----------------------------------+
|
||||
| a | v3 |
|
||||
+----------------------------------+-----------------------------------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Aggregate: groupBy=[[]], aggr=[[__min_merge(__min_state(step_aggr_extended.pk_col_1)) AS min(step_aggr_extended.pk_col_1), __max_merge(__max_state(step_aggr_extended.val_col_2)) AS max(step_aggr_extended.val_col_2)]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[]], aggr=[[__min_state(step_aggr_extended.pk_col_1), __max_state(step_aggr_extended.val_col_2)]]_|
|
||||
|_|_TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[min(step_aggr_extended.pk_col_1), max(step_aggr_extended.val_col_2)]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[min(step_aggr_extended.pk_col_1), max(step_aggr_extended.val_col_2)]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
-- Case 17: Aggregation on an empty input set.
|
||||
-- `WHERE` clause filters out all rows.
|
||||
-- Expected: Aggregation should return correct default values (e.g., COUNT is 0, SUM is NULL).
|
||||
SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
|
||||
|
||||
+-----------------------------------+----------+
|
||||
| sum(step_aggr_extended.val_col_1) | count(*) |
|
||||
+-----------------------------------+----------+
|
||||
| | 0 |
|
||||
+-----------------------------------+----------+
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
|
||||
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| Projection: sum(step_aggr_extended.val_col_1), count(Int64(1)) AS count(*)_|
|
||||
|_|_Aggregate: groupBy=[[]], aggr=[[__sum_merge(__sum_state(step_aggr_extended.val_col_1)) AS sum(step_aggr_extended.val_col_1), __count_merge(__count_state(step_aggr_extended.ts)) AS count(Int64(1))]] |
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[]], aggr=[[__sum_state(step_aggr_extended.val_col_1), __count_state(step_aggr_extended.ts)]]_|
|
||||
|_|_Filter: step_aggr_extended.pk_col_1 = Utf8("non_existent")_|
|
||||
|_|_TableScan: step_aggr_extended_|
|
||||
|_| ]]_|
|
||||
| physical_plan | ProjectionExec: expr=[sum(step_aggr_extended.val_col_1)@0 as sum(step_aggr_extended.val_col_1), count(Int64(1))@1 as count(*)]_|
|
||||
|_|_AggregateExec: mode=Final, gby=[], aggr=[sum(step_aggr_extended.val_col_1), count(Int64(1))]_|
|
||||
|_|_CoalescePartitionsExec_|
|
||||
|_|_AggregateExec: mode=Partial, gby=[], aggr=[sum(step_aggr_extended.val_col_1), count(Int64(1))]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
+-+-+
|
||||
|
||||
DROP TABLE step_aggr_extended;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
|
||||
@@ -305,3 +305,110 @@ GROUP BY
|
||||
drop table aggr_optimize_not_count;
|
||||
|
||||
drop table aggr_optimize_not;
|
||||
|
||||
--
|
||||
-- Additional test cases for step aggregation pushdown
|
||||
--
|
||||
CREATE TABLE step_aggr_extended (
|
||||
pk_col_1 STRING,
|
||||
pk_col_2 BIGINT,
|
||||
val_col_1 BIGINT,
|
||||
val_col_2 STRING,
|
||||
val_col_3 BIGINT,
|
||||
ts TIMESTAMP TIME INDEX,
|
||||
PRIMARY KEY(pk_col_1, pk_col_2)
|
||||
) PARTITION ON COLUMNS (pk_col_1) (
|
||||
pk_col_1 < 'f',
|
||||
pk_col_1 >= 'f'
|
||||
);
|
||||
|
||||
INSERT INTO step_aggr_extended VALUES
|
||||
('a', 1, 100, 'v1', 10, 1672531200000),
|
||||
('a', 2, 200, 'v2', NULL, 1672531201000),
|
||||
('g', 1, 300, 'v1', 30, 1672531202000),
|
||||
('g', 2, 400, 'v2', 40, 1672531203000),
|
||||
('a', 3, 100, 'v3', 10, 1672531204000),
|
||||
('g', 3, 300, 'v3', 30, 1672531205000),
|
||||
('h', 4, 500, NULL, 50, 1672531206000);
|
||||
|
||||
|
||||
-- Case 12: GROUP BY includes a mix of partition key and non-partition key.
|
||||
-- `pk_col_1` is a partition key, `pk_col_2` is not.
|
||||
-- This should pushdown entire aggregation to datanodes since it's partitioned by `pk_col_1`.
|
||||
-- Expected: Full pushdown of aggregation to datanodes.
|
||||
SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
|
||||
|
||||
-- Case 13: COUNT(DISTINCT) aggregation.
|
||||
-- `DISTINCT` aggregation is more complex and requires a two-phase distinct calculation in a distributed environment. Currently not supported for pushdown.
|
||||
-- Expected: datanode only do table scan, actual aggregation happens on frontend.
|
||||
SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
|
||||
|
||||
-- Case 14: Aggregation with a HAVING clause.
|
||||
-- The `HAVING` clause filters results after aggregation.
|
||||
-- Expected: The `HAVING` filter should be applied on the frontend after the final aggregation is complete, not pushed down to datanodes.
|
||||
SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
|
||||
|
||||
-- Case 15: Aggregation on a column with NULL values.
|
||||
-- `SUM` should ignore NULLs. `COUNT(val_col_2)` should count non-nulls, `COUNT(*)` should count all rows.
|
||||
-- Expected: Correct aggregation results, proving NULLs are handled properly in a distributed context.
|
||||
SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
|
||||
|
||||
-- Case 16: Aggregation on STRING columns.
|
||||
-- `MIN` and `MAX` can operate on strings.
|
||||
-- Expected: Correct lexicographical min/max results.
|
||||
SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
|
||||
|
||||
-- Case 17: Aggregation on an empty input set.
|
||||
-- `WHERE` clause filters out all rows.
|
||||
-- Expected: Aggregation should return correct default values (e.g., COUNT is 0, SUM is NULL).
|
||||
SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
|
||||
|
||||
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
|
||||
-- SQLNESS REPLACE (peers.*) REDACTED
|
||||
-- SQLNESS REPLACE (Hash.*) REDACTED
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
-- SQLNESS REPLACE (\s\s+) _
|
||||
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
|
||||
EXPLAIN SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
|
||||
|
||||
DROP TABLE step_aggr_extended;
|
||||
|
||||
@@ -14,13 +14,17 @@ Affected Rows: 0
|
||||
INSERT INTO
|
||||
integers (host, i, ts)
|
||||
VALUES
|
||||
('220-A', 2, '2023-01-01 00:00:00'),
|
||||
('220-B', 3, '2023-01-01 00:00:00'),
|
||||
('550-A', 1, '2023-01-01 00:00:00'),
|
||||
('550-B', 5, '2023-01-01 00:00:00'),
|
||||
('550-A', 2, '2023-01-01 01:00:00'),
|
||||
('550-W', 3, '2023-01-01 02:00:00'),
|
||||
('550-W', 4, '2023-01-01 03:00:00');
|
||||
('550-Z', 4, '2023-01-01 02:00:00'),
|
||||
('550-W', 5, '2023-01-01 03:00:00'),
|
||||
('550-Z', 6, '2023-01-01 03:00:00');
|
||||
|
||||
Affected Rows: 5
|
||||
Affected Rows: 9
|
||||
|
||||
-- count
|
||||
SELECT
|
||||
@@ -31,7 +35,7 @@ FROM
|
||||
+-------------------+
|
||||
| count(integers.i) |
|
||||
+-------------------+
|
||||
| 5 |
|
||||
| 9 |
|
||||
+-------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -120,10 +124,10 @@ ORDER BY
|
||||
+---------------------+-------------------+
|
||||
| ts | count(integers.i) |
|
||||
+---------------------+-------------------+
|
||||
| 2023-01-01T00:00:00 | 2 |
|
||||
| 2023-01-01T00:00:00 | 4 |
|
||||
| 2023-01-01T01:00:00 | 1 |
|
||||
| 2023-01-01T02:00:00 | 1 |
|
||||
| 2023-01-01T03:00:00 | 1 |
|
||||
| 2023-01-01T02:00:00 | 2 |
|
||||
| 2023-01-01T03:00:00 | 2 |
|
||||
+---------------------+-------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -234,10 +238,10 @@ ORDER BY
|
||||
+---------------------+-------------------+
|
||||
| time_window | count(integers.i) |
|
||||
+---------------------+-------------------+
|
||||
| 2023-01-01T00:00:00 | 2 |
|
||||
| 2023-01-01T00:00:00 | 4 |
|
||||
| 2023-01-01T01:00:00 | 1 |
|
||||
| 2023-01-01T02:00:00 | 1 |
|
||||
| 2023-01-01T03:00:00 | 1 |
|
||||
| 2023-01-01T02:00:00 | 2 |
|
||||
| 2023-01-01T03:00:00 | 2 |
|
||||
+---------------------+-------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -260,15 +264,20 @@ ORDER BY
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: time_window ASC NULLS LAST, count(integers.i) ASC NULLS LAST_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: time_window ASC NULLS LAST, count(integers.i) ASC NULLS LAST_|
|
||||
| logical_plan_| Sort: time_window ASC NULLS LAST, count(integers.i) ASC NULLS LAST_|
|
||||
|_|_Projection: date_bin(Utf8("1 hour"),integers.ts) AS time_window, count(integers.i)_|
|
||||
|_|_Aggregate: groupBy=[[date_bin(CAST(Utf8("1 hour") AS Interval(MonthDayNano)), integers.ts)]], aggr=[[count(integers.i)]] |
|
||||
|_|_Aggregate: groupBy=[[date_bin(Utf8("1 hour"),integers.ts)]], aggr=[[__count_merge(__count_state(integers.i)) AS count(integers.i)]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Aggregate: groupBy=[[date_bin(CAST(Utf8("1 hour") AS Interval(MonthDayNano)), integers.ts)]], aggr=[[__count_state(integers.i)]]_|
|
||||
|_|_TableScan: integers_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_SortExec: expr=[time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] |
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
@@ -299,37 +308,33 @@ ORDER BY
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SortExec: expr=[time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|
||||
|_|_|_SortPreservingMergeExec: [date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
@@ -354,10 +359,13 @@ ORDER BY
|
||||
| integers.ts + Int64(1) | integers.i / Int64(2) | count(integers.i) |
|
||||
+------------------------+-----------------------+-------------------+
|
||||
| 1672531200001 | 0 | 1 |
|
||||
| 1672531200001 | 1 | 2 |
|
||||
| 1672531200001 | 2 | 1 |
|
||||
| 1672534800001 | 1 | 1 |
|
||||
| 1672538400001 | 1 | 1 |
|
||||
| 1672538400001 | 2 | 1 |
|
||||
| 1672542000001 | 2 | 1 |
|
||||
| 1672542000001 | 3 | 1 |
|
||||
+------------------------+-----------------------+-------------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
@@ -383,15 +391,18 @@ ORDER BY
|
||||
+-+-+
|
||||
| plan_type_| plan_|
|
||||
+-+-+
|
||||
| logical_plan_| MergeSort: integers.ts + Int64(1) ASC NULLS LAST, integers.i / Int64(2) ASC NULLS LAST_|
|
||||
| logical_plan_| Sort: integers.ts + Int64(1) ASC NULLS LAST, integers.i / Int64(2) ASC NULLS LAST_|
|
||||
|_|_Aggregate: groupBy=[[integers.ts + Int64(1), integers.i / Int64(2)]], aggr=[[__count_merge(__count_state(integers.i)) AS count(integers.i)]]_|
|
||||
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|
||||
|_| Sort: integers.ts + Int64(1) ASC NULLS LAST, integers.i / Int64(2) ASC NULLS LAST_|
|
||||
|_|_Projection: integers.ts + Int64(1), integers.i / Int64(2), count(integers.i)_|
|
||||
|_|_Aggregate: groupBy=[[CAST(integers.ts AS Int64) + Int64(1), integers.i / Int64(2)]], aggr=[[count(integers.i)]] |
|
||||
|_| Aggregate: groupBy=[[CAST(integers.ts AS Int64) + Int64(1), integers.i / Int64(2)]], aggr=[[__count_state(integers.i)]]_|
|
||||
|_|_TableScan: integers_|
|
||||
|_| ]]_|
|
||||
| physical_plan | SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true]_|
|
||||
|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] |
|
||||
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|
||||
|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_AggregateExec: mode=Partial, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)]_|
|
||||
|_|_CooperativeExec_|
|
||||
|_|_MergeScanExec: REDACTED
|
||||
|_|_|
|
||||
@@ -425,38 +436,36 @@ ORDER BY
|
||||
| stage | node | plan_|
|
||||
+-+-+-+
|
||||
| 0_| 0_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_MergeScanExec: REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 0_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 1_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
| 1_| 2_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|
||||
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|
||||
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|
||||
|_|_|_RepartitionExec: partitioning=REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|
||||
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|
||||
|_|_|_CooperativeExec REDACTED
|
||||
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|
||||
|_|_|_|
|
||||
|_|_| Total rows: 5_|
|
||||
|_|_| Total rows: 8_|
|
||||
+-+-+-+
|
||||
|
||||
-- test udd/hll_merege pushdown
|
||||
@@ -487,7 +496,7 @@ GROUP BY
|
||||
time_window,
|
||||
host;
|
||||
|
||||
Affected Rows: 5
|
||||
Affected Rows: 9
|
||||
|
||||
SELECT
|
||||
uddsketch_calc(0.5, uddsketch_merge(128, 0.01, udd_state)) as udd_result,
|
||||
@@ -498,7 +507,7 @@ FROM
|
||||
+--------------------+------------+
|
||||
| udd_result | hll_result |
|
||||
+--------------------+------------+
|
||||
| 2.9742334234767016 | 5 |
|
||||
| 2.9742334234767016 | 6 |
|
||||
+--------------------+------------+
|
||||
|
||||
-- SQLNESS REPLACE (-+) -
|
||||
|
||||
@@ -12,11 +12,15 @@ CREATE TABLE integers(
|
||||
INSERT INTO
|
||||
integers (host, i, ts)
|
||||
VALUES
|
||||
('220-A', 2, '2023-01-01 00:00:00'),
|
||||
('220-B', 3, '2023-01-01 00:00:00'),
|
||||
('550-A', 1, '2023-01-01 00:00:00'),
|
||||
('550-B', 5, '2023-01-01 00:00:00'),
|
||||
('550-A', 2, '2023-01-01 01:00:00'),
|
||||
('550-W', 3, '2023-01-01 02:00:00'),
|
||||
('550-W', 4, '2023-01-01 03:00:00');
|
||||
('550-Z', 4, '2023-01-01 02:00:00'),
|
||||
('550-W', 5, '2023-01-01 03:00:00'),
|
||||
('550-Z', 6, '2023-01-01 03:00:00');
|
||||
|
||||
-- count
|
||||
SELECT
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user