feat: steppable aggr fn

poc: step aggr query

feat: mvp poc stuff

test: sqlness

chore: import missing

feat: support first/last_value

fix: check also include first/last value
This commit is contained in:
discord9
2025-06-08 19:19:11 +08:00
parent 1786190235
commit 540fc92a7c
6 changed files with 856 additions and 8 deletions

View File

@@ -15,6 +15,7 @@
use std::collections::HashSet;
use std::sync::Arc;
use common_telemetry::debug;
use datafusion::datasource::DefaultTableSource;
use datafusion::error::Result as DfResult;
use datafusion_common::config::ConfigOptions;
@@ -154,6 +155,7 @@ struct PlanRewriter {
/// Partition columns of the table in current pass
partition_cols: Option<Vec<String>>,
column_requirements: HashSet<Column>,
expand_on_next_call: bool,
}
impl PlanRewriter {
@@ -174,6 +176,10 @@ impl PlanRewriter {
{
return true;
}
if self.expand_on_next_call {
self.expand_on_next_call = false;
return true;
}
match Categorizer::check_plan(plan, self.partition_cols.clone()) {
Commutativity::Commutative => {}
Commutativity::PartialCommutative => {
@@ -190,12 +196,17 @@ impl PlanRewriter {
self.stage.push(plan)
}
}
Commutativity::TransformedCommutative(transformer) => {
Commutativity::TransformedCommutative {
transformer,
expand_on_parent,
} => {
if let Some(transformer) = transformer
&& let Some(plan) = transformer(plan)
&& let Some(changed_plan) = transformer(plan)
{
self.update_column_requirements(&plan);
self.stage.push(plan)
debug!("PlanRewriter: transformed plan: {changed_plan} from {plan}");
self.update_column_requirements(&changed_plan);
self.stage.push(changed_plan);
self.expand_on_next_call = expand_on_parent;
}
}
Commutativity::NonCommutative
@@ -391,10 +402,21 @@ impl TreeNodeRewriter for PlanRewriter {
return Ok(Transformed::yes(node));
};
let parent = parent.clone();
// TODO(ruihang): avoid this clone
if self.should_expand(&parent.clone()) {
if self.should_expand(&parent) {
// TODO(ruihang): does this work for nodes with multiple children?;
let node = self.expand(node)?;
debug!("PlanRewriter: should expand child:\n {node}\n Of Parent: {parent}");
let node = self.expand(node);
debug!(
"PlanRewriter: expanded plan: {}",
match &node {
Ok(n) => n.to_string(),
Err(e) => format!("Error expanding plan: {e}"),
}
);
let node = node?;
self.pop_stack();
return Ok(Transformed::yes(node));
}

View File

@@ -15,6 +15,9 @@
use std::collections::HashSet;
use std::sync::Arc;
use common_function::aggr::{HllState, UddSketchState, HLL_NAME, UDDSKETCH_STATE_NAME};
use common_telemetry::debug;
use datafusion::functions_aggregate::sum::sum_udaf;
use datafusion_expr::{Expr, LogicalPlan, UserDefinedLogicalNode};
use promql::extension_plan::{
EmptyMetric, InstantManipulate, RangeManipulate, SeriesDivide, SeriesNormalize,
@@ -23,12 +26,157 @@ use promql::extension_plan::{
use crate::dist_plan::merge_sort::{merge_sort_transformer, MergeSortLogicalPlan};
use crate::dist_plan::MergeScanLogicalPlan;
/// generate the upper aggregation plan that will execute on the frontend.
pub fn step_aggr_to_upper_aggr(aggr_plan: &LogicalPlan) -> datafusion_common::Result<LogicalPlan> {
let LogicalPlan::Aggregate(aggr) = aggr_plan else {
return Err(datafusion_common::DataFusionError::Plan(
"step_aggr_to_upper_aggr only accepts Aggregate plan".to_string(),
));
};
if !is_all_aggr_exprs_steppable(&aggr.aggr_expr) {
return Err(datafusion_common::DataFusionError::NotImplemented(
"Some aggregate expressions are not steppable".to_string(),
));
}
let mut upper_aggr_expr = vec![];
for aggr_expr in &aggr.aggr_expr {
let Some(aggr_func) = get_aggr_func(aggr_expr) else {
return Err(datafusion_common::DataFusionError::NotImplemented(
"Aggregate function not found".to_string(),
));
};
let col_name = aggr_expr.name_for_alias()?;
let input_column =
Expr::Column(datafusion_common::Column::new_unqualified(col_name.clone()));
let upper_func = match aggr_func.func.name() {
"sum" | "min" | "max" | "last_value" | "first_value" => {
// aggr_calc(aggr_merge(input_column))) as col_name
let mut new_aggr_func = aggr_func.clone();
new_aggr_func.args = vec![input_column.clone()];
new_aggr_func
}
"count" => {
// sum(input_column) as col_name
let mut new_aggr_func = aggr_func.clone();
new_aggr_func.func = sum_udaf();
new_aggr_func.args = vec![input_column.clone()];
new_aggr_func
}
UDDSKETCH_STATE_NAME => {
// udd_merge(bucket_size, error_rate input_column) as col_name
let mut new_aggr_func = aggr_func.clone();
new_aggr_func.func = Arc::new(UddSketchState::merge_udf_impl());
new_aggr_func.args[2] = input_column.clone();
new_aggr_func
}
HLL_NAME => {
// hll_merge(input_column) as col_name
let mut new_aggr_func = aggr_func.clone();
new_aggr_func.func = Arc::new(HllState::merge_udf_impl());
new_aggr_func.args = vec![input_column.clone()];
new_aggr_func
}
_ => {
return Err(datafusion_common::DataFusionError::NotImplemented(format!(
"Aggregate function {} is not supported for Step aggregation",
aggr_func.func.name()
)))
}
};
// deal with nested alias case
let mut new_aggr_expr = aggr_expr.clone();
{
let new_aggr_func = get_aggr_func_mut(&mut new_aggr_expr).unwrap();
*new_aggr_func = upper_func;
}
// make the column name the same, so parent can recognize it
upper_aggr_expr.push(new_aggr_expr.alias(col_name));
}
let mut new_aggr = aggr.clone();
new_aggr.aggr_expr = upper_aggr_expr;
// group by expr also need alias to avoid duplicated computing
let mut new_group_expr = new_aggr.group_expr.clone();
for expr in &mut new_group_expr {
if let Expr::Column(_) = expr {
// already a column, no need to change
continue;
}
let col_name = expr.name_for_alias()?;
let input_column =
Expr::Column(datafusion_common::Column::new_unqualified(col_name.clone()));
*expr = input_column.alias(col_name);
}
new_aggr.group_expr = new_group_expr;
// return the new logical plan
Ok(LogicalPlan::Aggregate(new_aggr))
}
/// Check if the given aggregate expression is steppable.
/// As in if it can be split into multiple steps:
/// i.e. on datanode first call `state(input)` then
/// on frontend call `calc(merge(state))` to get the final result.
///
pub fn is_all_aggr_exprs_steppable(aggr_exprs: &[Expr]) -> bool {
let step_action = HashSet::from([
"sum",
"count",
"min",
"max",
"first_value",
"last_value",
UDDSKETCH_STATE_NAME,
HLL_NAME,
]);
aggr_exprs.iter().all(|expr| {
if let Some(aggr_func) = get_aggr_func(expr) {
if aggr_func.distinct {
// Distinct aggregate functions are not steppable(yet).
return false;
}
step_action.contains(aggr_func.func.name())
} else {
false
}
})
}
pub fn get_aggr_func(expr: &Expr) -> Option<&datafusion_expr::expr::AggregateFunction> {
let mut expr_ref = expr;
while let Expr::Alias(alias) = expr_ref {
expr_ref = &alias.expr;
}
if let Expr::AggregateFunction(aggr_func) = expr_ref {
Some(aggr_func)
} else {
None
}
}
pub fn get_aggr_func_mut(expr: &mut Expr) -> Option<&mut datafusion_expr::expr::AggregateFunction> {
let mut expr_ref = expr;
while let Expr::Alias(alias) = expr_ref {
expr_ref = &mut alias.expr;
}
if let Expr::AggregateFunction(aggr_func) = expr_ref {
Some(aggr_func)
} else {
None
}
}
#[allow(dead_code)]
pub enum Commutativity {
Commutative,
PartialCommutative,
ConditionalCommutative(Option<Transformer>),
TransformedCommutative(Option<Transformer>),
TransformedCommutative {
transformer: Option<Transformer>,
/// whether the transformer changes the child to parent
expand_on_parent: bool,
},
NonCommutative,
Unimplemented,
/// For unrelated plans like DDL
@@ -55,7 +203,18 @@ impl Categorizer {
LogicalPlan::Filter(filter) => Self::check_expr(&filter.predicate),
LogicalPlan::Window(_) => Commutativity::Unimplemented,
LogicalPlan::Aggregate(aggr) => {
if !Self::check_partition(&aggr.group_expr, &partition_cols) {
let is_all_steppable = is_all_aggr_exprs_steppable(&aggr.aggr_expr);
let is_partition = Self::check_partition(&aggr.group_expr, &partition_cols);
if !is_partition && is_all_steppable {
debug!("Plan is steppable: {plan}");
return Commutativity::TransformedCommutative {
transformer: Some(Arc::new(|plan: &LogicalPlan| {
step_aggr_to_upper_aggr(plan).ok()
})),
expand_on_parent: true,
};
}
if !is_partition {
return Commutativity::NonCommutative;
}
for expr in &aggr.aggr_expr {

View File

@@ -0,0 +1,409 @@
CREATE TABLE integers(
host STRING,
i BIGINT,
ts TIMESTAMP TIME INDEX
) PARTITION ON COLUMNS (host) (
host < '550-A',
host >= '550-A'
AND host < '550-W',
host >= '550-W'
);
Affected Rows: 0
INSERT INTO integers (host, i, ts) VALUES
('550-A', 1, '2023-01-01 00:00:00'),
('550-A', 2, '2023-01-01 01:00:00'),
('550-W', 3, '2023-01-01 02:00:00'),
('550-W', 4, '2023-01-01 03:00:00');
Affected Rows: 4
-- count
EXPLAIN SELECT
count(i)
FROM
integers;
+---------------+-------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[]], aggr=[[sum(count(integers.i)) AS count(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[]], aggr=[[count(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[count(integers.i)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[count(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
ts,
count(i)
FROM
integers
GROUP BY
ts;
+---------------+---------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[integers.ts]], aggr=[[sum(count(integers.i)) AS count(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[count(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[count(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[count(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
count(i)
FROM
integers
GROUP BY
ts;
+---------------+-----------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: count(integers.i) |
| | Aggregate: groupBy=[[integers.ts]], aggr=[[sum(count(integers.i)) AS count(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[count(integers.i)]] |
| | TableScan: integers] |
| physical_plan | ProjectionExec: expr=[count(integers.i)@1 as count(integers.i)] |
| | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[count(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[count(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------+
-- sum
EXPLAIN SELECT
sum(i)
FROM
integers;
+---------------+-------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[]], aggr=[[sum(sum(integers.i)) AS sum(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[]], aggr=[[sum(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[sum(integers.i)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[sum(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
ts,
sum(i)
FROM
integers
GROUP BY
ts;
+---------------+---------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[integers.ts]], aggr=[[sum(sum(integers.i)) AS sum(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[sum(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[sum(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[sum(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
sum(i)
FROM
integers
GROUP BY
ts;
+---------------+-----------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: sum(integers.i) |
| | Aggregate: groupBy=[[integers.ts]], aggr=[[sum(sum(integers.i)) AS sum(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[sum(integers.i)]] |
| | TableScan: integers] |
| physical_plan | ProjectionExec: expr=[sum(integers.i)@1 as sum(integers.i)] |
| | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[sum(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[sum(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------+
-- min
EXPLAIN SELECT
min(i)
FROM
integers;
+---------------+-------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[]], aggr=[[min(min(integers.i)) AS min(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[]], aggr=[[min(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[min(integers.i)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[min(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
ts,
min(i)
FROM
integers
GROUP BY
ts;
+---------------+---------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[integers.ts]], aggr=[[min(min(integers.i)) AS min(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[min(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[min(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[min(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
min(i)
FROM
integers
GROUP BY
ts;
+---------------+-----------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: min(integers.i) |
| | Aggregate: groupBy=[[integers.ts]], aggr=[[min(min(integers.i)) AS min(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[min(integers.i)]] |
| | TableScan: integers] |
| physical_plan | ProjectionExec: expr=[min(integers.i)@1 as min(integers.i)] |
| | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[min(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[min(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------+
-- max
EXPLAIN SELECT
max(i)
FROM
integers;
+---------------+-------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[]], aggr=[[max(max(integers.i)) AS max(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[]], aggr=[[max(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[max(integers.i)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[max(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
ts,
max(i)
FROM
integers
GROUP BY
ts;
+---------------+---------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[integers.ts]], aggr=[[max(max(integers.i)) AS max(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[max(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[max(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[max(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
max(i)
FROM
integers
GROUP BY
ts;
+---------------+-----------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: max(integers.i) |
| | Aggregate: groupBy=[[integers.ts]], aggr=[[max(max(integers.i)) AS max(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[max(integers.i)]] |
| | TableScan: integers] |
| physical_plan | ProjectionExec: expr=[max(integers.i)@1 as max(integers.i)] |
| | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[max(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[max(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------+
-- uddsketch_state
EXPLAIN SELECT
uddsketch_state(128, 0.01, i)
FROM
integers;
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[]], aggr=[[uddsketch_merge(Int64(128), Float64(0.01), uddsketch_state(Int64(128),Float64(0.01),integers.i)) AS uddsketch_state(Int64(128),Float64(0.01),integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[]], aggr=[[uddsketch_state(Int64(128), Float64(0.01), CAST(integers.i AS Float64))]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[uddsketch_state(Int64(128),Float64(0.01),integers.i)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[uddsketch_state(Int64(128),Float64(0.01),integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
ts,
uddsketch_state(128, 0.01, i)
FROM
integers
GROUP BY
ts;
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[integers.ts]], aggr=[[uddsketch_merge(Int64(128), Float64(0.01), uddsketch_state(Int64(128),Float64(0.01),integers.i)) AS uddsketch_state(Int64(128),Float64(0.01),integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[uddsketch_state(Int64(128), Float64(0.01), CAST(integers.i AS Float64))]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[uddsketch_state(Int64(128),Float64(0.01),integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[uddsketch_state(Int64(128),Float64(0.01),integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
uddsketch_state(128, 0.01, i)
FROM
integers
GROUP BY
ts;
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: uddsketch_state(Int64(128),Float64(0.01),integers.i) |
| | Aggregate: groupBy=[[integers.ts]], aggr=[[uddsketch_merge(Int64(128), Float64(0.01), uddsketch_state(Int64(128),Float64(0.01),integers.i)) AS uddsketch_state(Int64(128),Float64(0.01),integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[uddsketch_state(Int64(128), Float64(0.01), CAST(integers.i AS Float64))]] |
| | TableScan: integers] |
| physical_plan | ProjectionExec: expr=[uddsketch_state(Int64(128),Float64(0.01),integers.i)@1 as uddsketch_state(Int64(128),Float64(0.01),integers.i)] |
| | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[uddsketch_state(Int64(128),Float64(0.01),integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[uddsketch_state(Int64(128),Float64(0.01),integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-- hll
EXPLAIN SELECT
hll(i)
FROM
integers;
+---------------+----------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+----------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[]], aggr=[[hll_merge(hll(integers.i)) AS hll(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[]], aggr=[[hll(CAST(integers.i AS Utf8))]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[hll(integers.i)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[hll(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+----------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
ts,
hll(i)
FROM
integers
GROUP BY
ts;
+---------------+---------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[integers.ts]], aggr=[[hll_merge(hll(integers.i)) AS hll(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[hll(CAST(integers.i AS Utf8))]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[hll(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[hll(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
hll(i)
FROM
integers
GROUP BY
ts;
+---------------+-----------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-----------------------------------------------------------------------------------------------------------------------+
| logical_plan | Projection: hll(integers.i) |
| | Aggregate: groupBy=[[integers.ts]], aggr=[[hll_merge(hll(integers.i)) AS hll(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[hll(CAST(integers.i AS Utf8))]] |
| | TableScan: integers] |
| physical_plan | ProjectionExec: expr=[hll(integers.i)@1 as hll(integers.i)] |
| | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[hll(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[hll(integers.i)] |
| | MergeScanExec: peers=[4402341478400(1025, 0), 4402341478401(1025, 1), 4402341478402(1025, 2), ] |
| | |
+---------------+-----------------------------------------------------------------------------------------------------------------------+
DROP TABLE integers;
Affected Rows: 0

View File

@@ -0,0 +1,144 @@
CREATE TABLE integers(
host STRING,
i BIGINT,
ts TIMESTAMP TIME INDEX
) PARTITION ON COLUMNS (host) (
host < '550-A',
host >= '550-A'
AND host < '550-W',
host >= '550-W'
);
INSERT INTO integers (host, i, ts) VALUES
('550-A', 1, '2023-01-01 00:00:00'),
('550-A', 2, '2023-01-01 01:00:00'),
('550-W', 3, '2023-01-01 02:00:00'),
('550-W', 4, '2023-01-01 03:00:00');
-- count
EXPLAIN SELECT
count(i)
FROM
integers;
EXPLAIN SELECT
ts,
count(i)
FROM
integers
GROUP BY
ts;
EXPLAIN SELECT
count(i)
FROM
integers
GROUP BY
ts;
-- sum
EXPLAIN SELECT
sum(i)
FROM
integers;
EXPLAIN SELECT
ts,
sum(i)
FROM
integers
GROUP BY
ts;
EXPLAIN SELECT
sum(i)
FROM
integers
GROUP BY
ts;
-- min
EXPLAIN SELECT
min(i)
FROM
integers;
EXPLAIN SELECT
ts,
min(i)
FROM
integers
GROUP BY
ts;
EXPLAIN SELECT
min(i)
FROM
integers
GROUP BY
ts;
-- max
EXPLAIN SELECT
max(i)
FROM
integers;
EXPLAIN SELECT
ts,
max(i)
FROM
integers
GROUP BY
ts;
EXPLAIN SELECT
max(i)
FROM
integers
GROUP BY
ts;
-- uddsketch_state
EXPLAIN SELECT
uddsketch_state(128, 0.01, i)
FROM
integers;
EXPLAIN SELECT
ts,
uddsketch_state(128, 0.01, i)
FROM
integers
GROUP BY
ts;
EXPLAIN SELECT
uddsketch_state(128, 0.01, i)
FROM
integers
GROUP BY
ts;
-- hll
EXPLAIN SELECT
hll(i)
FROM
integers;
EXPLAIN SELECT
ts,
hll(i)
FROM
integers
GROUP BY
ts;
EXPLAIN SELECT
hll(i)
FROM
integers
GROUP BY
ts;
DROP TABLE integers;

View File

@@ -0,0 +1,80 @@
CREATE TABLE integers(
host STRING,
i BIGINT,
ts TIMESTAMP TIME INDEX
) PARTITION ON COLUMNS (host) (
host < '550-A',
host >= '550-A'
AND host < '550-W',
host >= '550-W'
);
Affected Rows: 0
-- count
EXPLAIN SELECT
count(i)
FROM
integers;
+---------------+-------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[]], aggr=[[sum(count(integers.i)) AS count(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[]], aggr=[[count(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[count(integers.i)] |
| | CoalescePartitionsExec |
| | AggregateExec: mode=Partial, gby=[], aggr=[count(integers.i)] |
| | MergeScanExec: peers=[4398046511104(1024, 0), 4398046511105(1024, 1), 4398046511106(1024, 2), ] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
ts,
count(i)
FROM
integers
GROUP BY
ts;
+---------------+---------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+---------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[integers.ts]], aggr=[[sum(count(integers.i)) AS count(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[integers.ts]], aggr=[[count(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=FinalPartitioned, gby=[ts@0 as ts], aggr=[count(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([ts@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[ts@0 as ts], aggr=[count(integers.i)] |
| | MergeScanExec: peers=[4398046511104(1024, 0), 4398046511105(1024, 1), 4398046511106(1024, 2), ] |
| | |
+---------------+---------------------------------------------------------------------------------------------------------+
EXPLAIN SELECT
date_bin('1 hour'::INTERVAL, ts),
count(i)
FROM
integers
GROUP BY
date_bin('1 hour'::INTERVAL, ts);
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type | plan |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| logical_plan | Aggregate: groupBy=[[date_bin(Utf8("1 hour"),integers.ts) AS date_bin(Utf8("1 hour"),integers.ts)]], aggr=[[sum(count(integers.i)) AS count(integers.i)]] |
| | MergeScan [is_placeholder=false, input=Aggregate: groupBy=[[date_bin(CAST(Utf8("1 hour") AS Interval(MonthDayNano)), integers.ts)]], aggr=[[count(integers.i)]] |
| | TableScan: integers] |
| physical_plan | AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] |
| | CoalesceBatchesExec: target_batch_size=8192 |
| | RepartitionExec: partitioning=Hash([date_bin(Utf8("1 hour"),integers.ts)@0], 20), input_partitions=20 |
| | AggregateExec: mode=Partial, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] |
| | MergeScanExec: peers=[4398046511104(1024, 0), 4398046511105(1024, 1), 4398046511106(1024, 2), ] |
| | |
+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+
DROP TABLE integers;
Affected Rows: 0

View File

@@ -0,0 +1,34 @@
CREATE TABLE integers(
host STRING,
i BIGINT,
ts TIMESTAMP TIME INDEX
) PARTITION ON COLUMNS (host) (
host < '550-A',
host >= '550-A'
AND host < '550-W',
host >= '550-W'
);
-- count
EXPLAIN SELECT
count(i)
FROM
integers;
EXPLAIN SELECT
ts,
count(i)
FROM
integers
GROUP BY
ts;
EXPLAIN SELECT
date_bin('1 hour'::INTERVAL, ts),
count(i)
FROM
integers
GROUP BY
date_bin('1 hour'::INTERVAL, ts);
DROP TABLE integers;