fix: group by expr not as column in step aggr (#7008)

* fix: group by expr not as column

Signed-off-by: discord9 <discord9@163.com>

* test: dist analyzer date_bin

Signed-off-by: discord9 <discord9@163.com>

* ???fix wip

Signed-off-by: discord9 <discord9@163.com>

* fix: deduce using correct input fields

Signed-off-by: discord9 <discord9@163.com>

* refactor: clearer wrapper

Signed-off-by: discord9 <discord9@163.com>

* chore: update sqlness

Signed-off-by: discord9 <discord9@163.com>

* chore: per review

Signed-off-by: discord9 <discord9@163.com>

* chore: per review

Signed-off-by: discord9 <discord9@163.com>

* chore: rm todo

Signed-off-by: discord9 <discord9@163.com>

---------

Signed-off-by: discord9 <discord9@163.com>
This commit is contained in:
discord9
2025-09-24 14:57:01 +08:00
committed by GitHub
parent 0c038f755f
commit 238ed003df
12 changed files with 830 additions and 154 deletions

View File

@@ -14,13 +14,17 @@ Affected Rows: 0
INSERT INTO
integers (host, i, ts)
VALUES
('220-A', 2, '2023-01-01 00:00:00'),
('220-B', 3, '2023-01-01 00:00:00'),
('550-A', 1, '2023-01-01 00:00:00'),
('550-B', 5, '2023-01-01 00:00:00'),
('550-A', 2, '2023-01-01 01:00:00'),
('550-W', 3, '2023-01-01 02:00:00'),
('550-W', 4, '2023-01-01 03:00:00');
('550-Z', 4, '2023-01-01 02:00:00'),
('550-W', 5, '2023-01-01 03:00:00'),
('550-Z', 6, '2023-01-01 03:00:00');
Affected Rows: 5
Affected Rows: 9
SELECT
count(i),
@@ -33,7 +37,7 @@ FROM
+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
| count(integers.i) | sum(integers.i) | uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)) | hll_count(hll(integers.i)) |
+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
| 5 | 15 | 2.9742334234767016 | 5 |
| 9 | 31 | 2.9742334234767016 | 6 |
+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
-- SQLNESS REPLACE (-+) -
@@ -122,11 +126,11 @@ SELECT
FROM
integers;
+-----------------+
| avg(integers.i) |
+-----------------+
| 3.0 |
+-----------------+
+--------------------+
| avg(integers.i) |
+--------------------+
| 3.4444444444444446 |
+--------------------+
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
@@ -214,10 +218,10 @@ ORDER BY
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
| ts | count(integers.i) | sum(integers.i) | uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)) | hll_count(hll(integers.i)) |
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
| 2023-01-01T00:00:00 | 2 | 6 | 5.002829575110705 | 2 |
| 2023-01-01T00:00:00 | 4 | 11 | 2.9742334234767016 | 4 |
| 2023-01-01T01:00:00 | 1 | 2 | 1.9936617014173446 | 1 |
| 2023-01-01T02:00:00 | 1 | 3 | 2.9742334234767016 | 1 |
| 2023-01-01T03:00:00 | 1 | 4 | 4.014835333028587 | 1 |
| 2023-01-01T02:00:00 | 2 | 7 | 4.014835333028587 | 2 |
| 2023-01-01T03:00:00 | 2 | 11 | 5.98951037117262 | 2 |
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
-- SQLNESS REPLACE (-+) -
@@ -321,6 +325,129 @@ ORDER BY
|_|_| Total rows: 4_|
+-+-+-+
SELECT
date_bin('2s'::INTERVAL, ts) as time_window,
count(i),
sum(i),
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
hll_count(hll(i))
FROM
integers
GROUP BY
time_window
ORDER BY
time_window;
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
| time_window | count(integers.i) | sum(integers.i) | uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)) | hll_count(hll(integers.i)) |
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
| 2023-01-01T00:00:00 | 4 | 11 | 2.9742334234767016 | 4 |
| 2023-01-01T01:00:00 | 1 | 2 | 1.9936617014173446 | 1 |
| 2023-01-01T02:00:00 | 2 | 7 | 4.014835333028587 | 2 |
| 2023-01-01T03:00:00 | 2 | 11 | 5.98951037117262 | 2 |
+---------------------+-------------------+-----------------+-----------------------------------------------------------------------------------+----------------------------+
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
EXPLAIN
SELECT
date_bin('2s'::INTERVAL, ts) as time_window,
count(i),
sum(i),
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
hll_count(hll(i))
FROM
integers
GROUP BY
time_window
ORDER BY
time_window;
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| Sort: time_window ASC NULLS LAST_|
|_|_Projection: date_bin(Utf8("2 seconds"),integers.ts) AS time_window, count(integers.i), sum(integers.i), uddsketch_calc(Float64(0.5), uddsketch_state(Int64(128),Float64(0.01),integers.i)), hll_count(hll(integers.i))_|
|_|_Aggregate: groupBy=[[date_bin(Utf8("2 seconds"),integers.ts)]], aggr=[[__count_merge(__count_state(integers.i)) AS count(integers.i), __sum_merge(__sum_state(integers.i)) AS sum(integers.i), __uddsketch_state_merge(__uddsketch_state_state(Int64(128),Float64(0.01),integers.i)) AS uddsketch_state(Int64(128),Float64(0.01),integers.i), __hll_merge(__hll_state(integers.i)) AS hll(integers.i)]] |
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| Aggregate: groupBy=[[date_bin(CAST(Utf8("2 seconds") AS Interval(MonthDayNano)), integers.ts)]], aggr=[[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128), Float64(0.01), CAST(integers.i AS Float64)), __hll_state(CAST(integers.i AS Utf8))]]_|
|_|_TableScan: integers_|
|_| ]]_|
| physical_plan | SortPreservingMergeExec: [time_window@0 ASC NULLS LAST]_|
|_|_SortExec: expr=[time_window@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|_|_ProjectionExec: expr=[date_bin(Utf8("2 seconds"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i), sum(integers.i)@2 as sum(integers.i), uddsketch_calc(0.5, uddsketch_state(Int64(128),Float64(0.01),integers.i)@3) as uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)), hll_count(hll(integers.i)@4) as hll_count(hll(integers.i))]_|
|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)]_|
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|_|_RepartitionExec: partitioning=REDACTED
|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)]_|
|_|_CooperativeExec_|
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
-- might write to different partitions
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
EXPLAIN ANALYZE
SELECT
date_bin('2s'::INTERVAL, ts) as time_window,
count(i),
sum(i),
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
hll_count(hll(i))
FROM
integers
GROUP BY
time_window
ORDER BY
time_window;
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_SortPreservingMergeExec: [time_window@0 ASC NULLS LAST] REDACTED
|_|_|_SortExec: expr=[time_window@0 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_ProjectionExec: expr=[date_bin(Utf8("2 seconds"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i), sum(integers.i)@2 as sum(integers.i), uddsketch_calc(0.5, uddsketch_state(Int64(128),Float64(0.01),integers.i)@3) as uddsketch_calc(Float64(0.5),uddsketch_state(Int64(128),Float64(0.01),integers.i)), hll_count(hll(integers.i)@4) as hll_count(hll(integers.i))] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[count(integers.i), sum(integers.i), uddsketch_state(Int64(128),Float64(0.01),integers.i), hll(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 2000000000 }, ts@1) as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|_|_|_|
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 2000000000 }, ts@1) as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|_|_|_|
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("2 seconds"),integers.ts)@0 as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 2000000000 }, ts@1) as date_bin(Utf8("2 seconds"),integers.ts)], aggr=[__count_state(integers.i), __sum_state(integers.i), __uddsketch_state_state(Int64(128),Float64(0.01),integers.i), __hll_state(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|_|_|_|
|_|_| Total rows: 4_|
+-+-+-+
DROP TABLE integers;
Affected Rows: 0

View File

@@ -12,11 +12,15 @@ CREATE TABLE integers(
INSERT INTO
integers (host, i, ts)
VALUES
('220-A', 2, '2023-01-01 00:00:00'),
('220-B', 3, '2023-01-01 00:00:00'),
('550-A', 1, '2023-01-01 00:00:00'),
('550-B', 5, '2023-01-01 00:00:00'),
('550-A', 2, '2023-01-01 01:00:00'),
('550-W', 3, '2023-01-01 02:00:00'),
('550-W', 4, '2023-01-01 03:00:00');
('550-Z', 4, '2023-01-01 02:00:00'),
('550-W', 5, '2023-01-01 03:00:00'),
('550-Z', 6, '2023-01-01 03:00:00');
SELECT
count(i),
@@ -142,4 +146,60 @@ GROUP BY
ORDER BY
ts;
SELECT
date_bin('2s'::INTERVAL, ts) as time_window,
count(i),
sum(i),
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
hll_count(hll(i))
FROM
integers
GROUP BY
time_window
ORDER BY
time_window;
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
EXPLAIN
SELECT
date_bin('2s'::INTERVAL, ts) as time_window,
count(i),
sum(i),
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
hll_count(hll(i))
FROM
integers
GROUP BY
time_window
ORDER BY
time_window;
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
-- might write to different partitions
-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
EXPLAIN ANALYZE
SELECT
date_bin('2s'::INTERVAL, ts) as time_window,
count(i),
sum(i),
uddsketch_calc(0.5, uddsketch_state(128, 0.01, i)),
hll_count(hll(i))
FROM
integers
GROUP BY
time_window
ORDER BY
time_window;
DROP TABLE integers;

View File

@@ -1037,3 +1037,277 @@ drop table aggr_optimize_not;
Affected Rows: 0
--
-- Additional test cases for step aggregation pushdown
--
CREATE TABLE step_aggr_extended (
pk_col_1 STRING,
pk_col_2 BIGINT,
val_col_1 BIGINT,
val_col_2 STRING,
val_col_3 BIGINT,
ts TIMESTAMP TIME INDEX,
PRIMARY KEY(pk_col_1, pk_col_2)
) PARTITION ON COLUMNS (pk_col_1) (
pk_col_1 < 'f',
pk_col_1 >= 'f'
);
Affected Rows: 0
INSERT INTO step_aggr_extended VALUES
('a', 1, 100, 'v1', 10, 1672531200000),
('a', 2, 200, 'v2', NULL, 1672531201000),
('g', 1, 300, 'v1', 30, 1672531202000),
('g', 2, 400, 'v2', 40, 1672531203000),
('a', 3, 100, 'v3', 10, 1672531204000),
('g', 3, 300, 'v3', 30, 1672531205000),
('h', 4, 500, NULL, 50, 1672531206000);
Affected Rows: 7
-- Case 12: GROUP BY includes a mix of partition key and non-partition key.
-- `pk_col_1` is a partition key, `pk_col_2` is not.
-- This should pushdown entire aggregation to datanodes since it's partitioned by `pk_col_1`.
-- Expected: Full pushdown of aggregation to datanodes.
SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
+----------+----------+-----------------------------------+
| pk_col_1 | pk_col_2 | sum(step_aggr_extended.val_col_1) |
+----------+----------+-----------------------------------+
| a | 1 | 100 |
| a | 2 | 200 |
| a | 3 | 100 |
| g | 1 | 300 |
| g | 2 | 400 |
| g | 3 | 300 |
| h | 4 | 500 |
+----------+----------+-----------------------------------+
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| MergeSort: step_aggr_extended.pk_col_1 ASC NULLS LAST, step_aggr_extended.pk_col_2 ASC NULLS LAST_|
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| Sort: step_aggr_extended.pk_col_1 ASC NULLS LAST, step_aggr_extended.pk_col_2 ASC NULLS LAST_|
|_|_Projection: step_aggr_extended.pk_col_1, step_aggr_extended.pk_col_2, sum(step_aggr_extended.val_col_1)_|
|_|_Aggregate: groupBy=[[step_aggr_extended.pk_col_1, step_aggr_extended.pk_col_2]], aggr=[[sum(step_aggr_extended.val_col_1)]] |
|_|_TableScan: step_aggr_extended_|
|_| ]]_|
| physical_plan | SortPreservingMergeExec: [pk_col_1@0 ASC NULLS LAST, pk_col_2@1 ASC NULLS LAST]_|
|_|_CooperativeExec_|
|_|_CooperativeExec_|
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+
-- Case 13: COUNT(DISTINCT) aggregation.
-- `DISTINCT` aggregation is more complex and requires a two-phase distinct calculation in a distributed environment. Currently not supported for pushdown.
-- Expected: datanode only do table scan, actual aggregation happens on frontend.
SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
+----------------------------------------------+
| count(DISTINCT step_aggr_extended.val_col_1) |
+----------------------------------------------+
| 5 |
+----------------------------------------------+
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| Projection: count(alias1) AS count(DISTINCT step_aggr_extended.val_col_1)_|
|_|_Aggregate: groupBy=[[]], aggr=[[count(alias1)]]_|
|_|_Aggregate: groupBy=[[step_aggr_extended.val_col_1 AS alias1]], aggr=[[]]_|
|_|_Projection: step_aggr_extended.val_col_1_|
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| TableScan: step_aggr_extended_|
|_| ]]_|
| physical_plan | ProjectionExec: expr=[count(alias1)@0 as count(DISTINCT step_aggr_extended.val_col_1)]_|
|_|_AggregateExec: mode=Final, gby=[], aggr=[count(alias1)]_|
|_|_CoalescePartitionsExec_|
|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(alias1)]_|
|_|_AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[]_|
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|_|_RepartitionExec: partitioning=REDACTED
|_|_AggregateExec: mode=Partial, gby=[val_col_1@0 as alias1], aggr=[]_|
|_|_ProjectionExec: expr=[val_col_1@2 as val_col_1]_|
|_|_CooperativeExec_|
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+
-- Case 14: Aggregation with a HAVING clause.
-- The `HAVING` clause filters results after aggregation.
-- Expected: The `HAVING` filter should be applied on the frontend after the final aggregation is complete, not pushed down to datanodes.
SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
+----------+-----------------------------------+
| pk_col_2 | sum(step_aggr_extended.val_col_1) |
+----------+-----------------------------------+
| 1 | 400 |
| 2 | 600 |
| 3 | 400 |
| 4 | 500 |
+----------+-----------------------------------+
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| Sort: step_aggr_extended.pk_col_2 ASC NULLS LAST_|
|_|_Filter: sum(step_aggr_extended.val_col_1) > Int64(300)_|
|_|_Aggregate: groupBy=[[step_aggr_extended.pk_col_2]], aggr=[[__sum_merge(__sum_state(step_aggr_extended.val_col_1)) AS sum(step_aggr_extended.val_col_1)]] |
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| Aggregate: groupBy=[[step_aggr_extended.pk_col_2]], aggr=[[__sum_state(step_aggr_extended.val_col_1)]]_|
|_|_TableScan: step_aggr_extended_|
|_| ]]_|
| physical_plan | SortPreservingMergeExec: [pk_col_2@0 ASC NULLS LAST]_|
|_|_SortExec: expr=[pk_col_2@0 ASC NULLS LAST], preserve_partitioning=[true]_|
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|_|_FilterExec: sum(step_aggr_extended.val_col_1)@1 > 300_|
|_|_AggregateExec: mode=FinalPartitioned, gby=[pk_col_2@0 as pk_col_2], aggr=[sum(step_aggr_extended.val_col_1)]_|
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|_|_RepartitionExec: partitioning=REDACTED
|_|_AggregateExec: mode=Partial, gby=[pk_col_2@0 as pk_col_2], aggr=[sum(step_aggr_extended.val_col_1)]_|
|_|_CooperativeExec_|
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+
-- Case 15: Aggregation on a column with NULL values.
-- `SUM` should ignore NULLs. `COUNT(val_col_2)` should count non-nulls, `COUNT(*)` should count all rows.
-- Expected: Correct aggregation results, proving NULLs are handled properly in a distributed context.
SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
+-----------------------------------+-------------------------------------+-------------------------------------+----------+
| sum(step_aggr_extended.val_col_3) | count(step_aggr_extended.val_col_2) | count(step_aggr_extended.val_col_3) | count(*) |
+-----------------------------------+-------------------------------------+-------------------------------------+----------+
| 170 | 6 | 6 | 7 |
+-----------------------------------+-------------------------------------+-------------------------------------+----------+
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| Projection: sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3), count(Int64(1)) AS count(*)_|
|_|_Aggregate: groupBy=[[]], aggr=[[__sum_merge(__sum_state(step_aggr_extended.val_col_3)) AS sum(step_aggr_extended.val_col_3), __count_merge(__count_state(step_aggr_extended.val_col_2)) AS count(step_aggr_extended.val_col_2), __count_merge(__count_state(step_aggr_extended.val_col_3)) AS count(step_aggr_extended.val_col_3), __count_merge(__count_state(step_aggr_extended.ts)) AS count(Int64(1))]] |
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| Aggregate: groupBy=[[]], aggr=[[__sum_state(step_aggr_extended.val_col_3), __count_state(step_aggr_extended.val_col_2), __count_state(step_aggr_extended.val_col_3), __count_state(step_aggr_extended.ts)]]_|
|_|_TableScan: step_aggr_extended_|
|_| ]]_|
| physical_plan | ProjectionExec: expr=[sum(step_aggr_extended.val_col_3)@0 as sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2)@1 as count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3)@2 as count(step_aggr_extended.val_col_3), count(Int64(1))@3 as count(*)]_|
|_|_AggregateExec: mode=Final, gby=[], aggr=[sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3), count(Int64(1))]_|
|_|_CoalescePartitionsExec_|
|_|_AggregateExec: mode=Partial, gby=[], aggr=[sum(step_aggr_extended.val_col_3), count(step_aggr_extended.val_col_2), count(step_aggr_extended.val_col_3), count(Int64(1))]_|
|_|_CooperativeExec_|
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+
-- Case 16: Aggregation on STRING columns.
-- `MIN` and `MAX` can operate on strings.
-- Expected: Correct lexicographical min/max results.
SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
+----------------------------------+-----------------------------------+
| min(step_aggr_extended.pk_col_1) | max(step_aggr_extended.val_col_2) |
+----------------------------------+-----------------------------------+
| a | v3 |
+----------------------------------+-----------------------------------+
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| Aggregate: groupBy=[[]], aggr=[[__min_merge(__min_state(step_aggr_extended.pk_col_1)) AS min(step_aggr_extended.pk_col_1), __max_merge(__max_state(step_aggr_extended.val_col_2)) AS max(step_aggr_extended.val_col_2)]] |
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| Aggregate: groupBy=[[]], aggr=[[__min_state(step_aggr_extended.pk_col_1), __max_state(step_aggr_extended.val_col_2)]]_|
|_|_TableScan: step_aggr_extended_|
|_| ]]_|
| physical_plan | AggregateExec: mode=Final, gby=[], aggr=[min(step_aggr_extended.pk_col_1), max(step_aggr_extended.val_col_2)]_|
|_|_CoalescePartitionsExec_|
|_|_AggregateExec: mode=Partial, gby=[], aggr=[min(step_aggr_extended.pk_col_1), max(step_aggr_extended.val_col_2)]_|
|_|_CooperativeExec_|
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+
-- Case 17: Aggregation on an empty input set.
-- `WHERE` clause filters out all rows.
-- Expected: Aggregation should return correct default values (e.g., COUNT is 0, SUM is NULL).
SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
+-----------------------------------+----------+
| sum(step_aggr_extended.val_col_1) | count(*) |
+-----------------------------------+----------+
| | 0 |
+-----------------------------------+----------+
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| Projection: sum(step_aggr_extended.val_col_1), count(Int64(1)) AS count(*)_|
|_|_Aggregate: groupBy=[[]], aggr=[[__sum_merge(__sum_state(step_aggr_extended.val_col_1)) AS sum(step_aggr_extended.val_col_1), __count_merge(__count_state(step_aggr_extended.ts)) AS count(Int64(1))]] |
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| Aggregate: groupBy=[[]], aggr=[[__sum_state(step_aggr_extended.val_col_1), __count_state(step_aggr_extended.ts)]]_|
|_|_Filter: step_aggr_extended.pk_col_1 = Utf8("non_existent")_|
|_|_TableScan: step_aggr_extended_|
|_| ]]_|
| physical_plan | ProjectionExec: expr=[sum(step_aggr_extended.val_col_1)@0 as sum(step_aggr_extended.val_col_1), count(Int64(1))@1 as count(*)]_|
|_|_AggregateExec: mode=Final, gby=[], aggr=[sum(step_aggr_extended.val_col_1), count(Int64(1))]_|
|_|_CoalescePartitionsExec_|
|_|_AggregateExec: mode=Partial, gby=[], aggr=[sum(step_aggr_extended.val_col_1), count(Int64(1))]_|
|_|_CooperativeExec_|
|_|_MergeScanExec: REDACTED
|_|_|
+-+-+
DROP TABLE step_aggr_extended;
Affected Rows: 0

View File

@@ -305,3 +305,110 @@ GROUP BY
drop table aggr_optimize_not_count;
drop table aggr_optimize_not;
--
-- Additional test cases for step aggregation pushdown
--
CREATE TABLE step_aggr_extended (
pk_col_1 STRING,
pk_col_2 BIGINT,
val_col_1 BIGINT,
val_col_2 STRING,
val_col_3 BIGINT,
ts TIMESTAMP TIME INDEX,
PRIMARY KEY(pk_col_1, pk_col_2)
) PARTITION ON COLUMNS (pk_col_1) (
pk_col_1 < 'f',
pk_col_1 >= 'f'
);
INSERT INTO step_aggr_extended VALUES
('a', 1, 100, 'v1', 10, 1672531200000),
('a', 2, 200, 'v2', NULL, 1672531201000),
('g', 1, 300, 'v1', 30, 1672531202000),
('g', 2, 400, 'v2', 40, 1672531203000),
('a', 3, 100, 'v3', 10, 1672531204000),
('g', 3, 300, 'v3', 30, 1672531205000),
('h', 4, 500, NULL, 50, 1672531206000);
-- Case 12: GROUP BY includes a mix of partition key and non-partition key.
-- `pk_col_1` is a partition key, `pk_col_2` is not.
-- This should pushdown entire aggregation to datanodes since it's partitioned by `pk_col_1`.
-- Expected: Full pushdown of aggregation to datanodes.
SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT pk_col_1, pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_1, pk_col_2 ORDER BY pk_col_1, pk_col_2;
-- Case 13: COUNT(DISTINCT) aggregation.
-- `DISTINCT` aggregation is more complex and requires a two-phase distinct calculation in a distributed environment. Currently not supported for pushdown.
-- Expected: datanode only do table scan, actual aggregation happens on frontend.
SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT COUNT(DISTINCT val_col_1) FROM step_aggr_extended;
-- Case 14: Aggregation with a HAVING clause.
-- The `HAVING` clause filters results after aggregation.
-- Expected: The `HAVING` filter should be applied on the frontend after the final aggregation is complete, not pushed down to datanodes.
SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT pk_col_2, sum(val_col_1) FROM step_aggr_extended GROUP BY pk_col_2 HAVING sum(val_col_1) > 300 ORDER BY pk_col_2;
-- Case 15: Aggregation on a column with NULL values.
-- `SUM` should ignore NULLs. `COUNT(val_col_2)` should count non-nulls, `COUNT(*)` should count all rows.
-- Expected: Correct aggregation results, proving NULLs are handled properly in a distributed context.
SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT SUM(val_col_3), COUNT(val_col_2), COUNT(val_col_3), COUNT(*) FROM step_aggr_extended;
-- Case 16: Aggregation on STRING columns.
-- `MIN` and `MAX` can operate on strings.
-- Expected: Correct lexicographical min/max results.
SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT MIN(pk_col_1), MAX(val_col_2) FROM step_aggr_extended;
-- Case 17: Aggregation on an empty input set.
-- `WHERE` clause filters out all rows.
-- Expected: Aggregation should return correct default values (e.g., COUNT is 0, SUM is NULL).
SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN SELECT SUM(val_col_1), COUNT(*) FROM step_aggr_extended WHERE pk_col_1 = 'non_existent';
DROP TABLE step_aggr_extended;

View File

@@ -14,13 +14,17 @@ Affected Rows: 0
INSERT INTO
integers (host, i, ts)
VALUES
('220-A', 2, '2023-01-01 00:00:00'),
('220-B', 3, '2023-01-01 00:00:00'),
('550-A', 1, '2023-01-01 00:00:00'),
('550-B', 5, '2023-01-01 00:00:00'),
('550-A', 2, '2023-01-01 01:00:00'),
('550-W', 3, '2023-01-01 02:00:00'),
('550-W', 4, '2023-01-01 03:00:00');
('550-Z', 4, '2023-01-01 02:00:00'),
('550-W', 5, '2023-01-01 03:00:00'),
('550-Z', 6, '2023-01-01 03:00:00');
Affected Rows: 5
Affected Rows: 9
-- count
SELECT
@@ -31,7 +35,7 @@ FROM
+-------------------+
| count(integers.i) |
+-------------------+
| 5 |
| 9 |
+-------------------+
-- SQLNESS REPLACE (-+) -
@@ -120,10 +124,10 @@ ORDER BY
+---------------------+-------------------+
| ts | count(integers.i) |
+---------------------+-------------------+
| 2023-01-01T00:00:00 | 2 |
| 2023-01-01T00:00:00 | 4 |
| 2023-01-01T01:00:00 | 1 |
| 2023-01-01T02:00:00 | 1 |
| 2023-01-01T03:00:00 | 1 |
| 2023-01-01T02:00:00 | 2 |
| 2023-01-01T03:00:00 | 2 |
+---------------------+-------------------+
-- SQLNESS REPLACE (-+) -
@@ -234,10 +238,10 @@ ORDER BY
+---------------------+-------------------+
| time_window | count(integers.i) |
+---------------------+-------------------+
| 2023-01-01T00:00:00 | 2 |
| 2023-01-01T00:00:00 | 4 |
| 2023-01-01T01:00:00 | 1 |
| 2023-01-01T02:00:00 | 1 |
| 2023-01-01T03:00:00 | 1 |
| 2023-01-01T02:00:00 | 2 |
| 2023-01-01T03:00:00 | 2 |
+---------------------+-------------------+
-- SQLNESS REPLACE (-+) -
@@ -260,15 +264,20 @@ ORDER BY
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| MergeSort: time_window ASC NULLS LAST, count(integers.i) ASC NULLS LAST_|
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| Sort: time_window ASC NULLS LAST, count(integers.i) ASC NULLS LAST_|
| logical_plan_| Sort: time_window ASC NULLS LAST, count(integers.i) ASC NULLS LAST_|
|_|_Projection: date_bin(Utf8("1 hour"),integers.ts) AS time_window, count(integers.i)_|
|_|_Aggregate: groupBy=[[date_bin(CAST(Utf8("1 hour") AS Interval(MonthDayNano)), integers.ts)]], aggr=[[count(integers.i)]] |
|_|_Aggregate: groupBy=[[date_bin(Utf8("1 hour"),integers.ts)]], aggr=[[__count_merge(__count_state(integers.i)) AS count(integers.i)]]_|
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| Aggregate: groupBy=[[date_bin(CAST(Utf8("1 hour") AS Interval(MonthDayNano)), integers.ts)]], aggr=[[__count_state(integers.i)]]_|
|_|_TableScan: integers_|
|_| ]]_|
| physical_plan | SortPreservingMergeExec: [time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST]_|
|_|_CooperativeExec_|
|_|_SortExec: expr=[time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true]_|
|_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)]_|
|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] |
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|_|_RepartitionExec: partitioning=REDACTED
|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)]_|
|_|_CooperativeExec_|
|_|_MergeScanExec: REDACTED
|_|_|
@@ -299,37 +308,33 @@ ORDER BY
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_SortPreservingMergeExec: [time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SortExec: expr=[time_window@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|_|_|_SortPreservingMergeExec: [date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|_|_|_SortExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|_|_|_|
| 1_| 1_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|_|_|_SortPreservingMergeExec: [date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|_|_|_SortExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|_|_|_|
| 1_| 2_|_ProjectionExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 as time_window, count(integers.i)@1 as count(integers.i)] REDACTED
|_|_|_SortPreservingMergeExec: [date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST] REDACTED
|_|_|_SortExec: expr=[date_bin(Utf8("1 hour"),integers.ts)@0 ASC NULLS LAST, count(integers.i)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[date_bin(Utf8("1 hour"),integers.ts)@0 as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[count(integers.i)] REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[date_bin(IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 3600000000000 }, ts@1) as date_bin(Utf8("1 hour"),integers.ts)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|_|_|_|
@@ -354,10 +359,13 @@ ORDER BY
| integers.ts + Int64(1) | integers.i / Int64(2) | count(integers.i) |
+------------------------+-----------------------+-------------------+
| 1672531200001 | 0 | 1 |
| 1672531200001 | 1 | 2 |
| 1672531200001 | 2 | 1 |
| 1672534800001 | 1 | 1 |
| 1672538400001 | 1 | 1 |
| 1672538400001 | 2 | 1 |
| 1672542000001 | 2 | 1 |
| 1672542000001 | 3 | 1 |
+------------------------+-----------------------+-------------------+
-- SQLNESS REPLACE (-+) -
@@ -383,15 +391,18 @@ ORDER BY
+-+-+
| plan_type_| plan_|
+-+-+
| logical_plan_| MergeSort: integers.ts + Int64(1) ASC NULLS LAST, integers.i / Int64(2) ASC NULLS LAST_|
| logical_plan_| Sort: integers.ts + Int64(1) ASC NULLS LAST, integers.i / Int64(2) ASC NULLS LAST_|
|_|_Aggregate: groupBy=[[integers.ts + Int64(1), integers.i / Int64(2)]], aggr=[[__count_merge(__count_state(integers.i)) AS count(integers.i)]]_|
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| Sort: integers.ts + Int64(1) ASC NULLS LAST, integers.i / Int64(2) ASC NULLS LAST_|
|_|_Projection: integers.ts + Int64(1), integers.i / Int64(2), count(integers.i)_|
|_|_Aggregate: groupBy=[[CAST(integers.ts AS Int64) + Int64(1), integers.i / Int64(2)]], aggr=[[count(integers.i)]] |
|_| Aggregate: groupBy=[[CAST(integers.ts AS Int64) + Int64(1), integers.i / Int64(2)]], aggr=[[__count_state(integers.i)]]_|
|_|_TableScan: integers_|
|_| ]]_|
| physical_plan | SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST]_|
|_|_CooperativeExec_|
|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true]_|
|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] |
|_|_CoalesceBatchesExec: target_batch_size=8192_|
|_|_RepartitionExec: partitioning=REDACTED
|_|_AggregateExec: mode=Partial, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)]_|
|_|_CooperativeExec_|
|_|_MergeScanExec: REDACTED
|_|_|
@@ -425,38 +436,36 @@ ORDER BY
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|_|_|_|
| 1_| 1_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|_|_|_|
| 1_| 2_|_SortPreservingMergeExec: [integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST] REDACTED
|_|_|_SortExec: expr=[integers.ts + Int64(1)@0 ASC NULLS LAST, integers.i / Int64(2)@1 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
| 1_| 2_|_AggregateExec: mode=FinalPartitioned, gby=[integers.ts + Int64(1)@0 as integers.ts + Int64(1), integers.i / Int64(2)@1 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[count(integers.i)] REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[CAST(ts@1 AS Int64) + 1 as integers.ts + Int64(1), i@0 / 2 as integers.i / Int64(2)], aggr=[__count_state(integers.i)] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":REDACTED REDACTED
|_|_|_|
|_|_| Total rows: 5_|
|_|_| Total rows: 8_|
+-+-+-+
-- test udd/hll_merege pushdown
@@ -487,7 +496,7 @@ GROUP BY
time_window,
host;
Affected Rows: 5
Affected Rows: 9
SELECT
uddsketch_calc(0.5, uddsketch_merge(128, 0.01, udd_state)) as udd_result,
@@ -498,7 +507,7 @@ FROM
+--------------------+------------+
| udd_result | hll_result |
+--------------------+------------+
| 2.9742334234767016 | 5 |
| 2.9742334234767016 | 6 |
+--------------------+------------+
-- SQLNESS REPLACE (-+) -

View File

@@ -12,11 +12,15 @@ CREATE TABLE integers(
INSERT INTO
integers (host, i, ts)
VALUES
('220-A', 2, '2023-01-01 00:00:00'),
('220-B', 3, '2023-01-01 00:00:00'),
('550-A', 1, '2023-01-01 00:00:00'),
('550-B', 5, '2023-01-01 00:00:00'),
('550-A', 2, '2023-01-01 01:00:00'),
('550-W', 3, '2023-01-01 02:00:00'),
('550-W', 4, '2023-01-01 03:00:00');
('550-Z', 4, '2023-01-01 02:00:00'),
('550-W', 5, '2023-01-01 03:00:00'),
('550-Z', 6, '2023-01-01 03:00:00');
-- count
SELECT

File diff suppressed because one or more lines are too long