perf: treat DISTINCT as comm/part-comm (#7348)

* perf: treat DISTINCT as comm/part-comm

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* sqlness tests

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2025-12-04 19:14:00 +08:00
committed by GitHub
parent 9d35b8cad4
commit f2288a86b0
4 changed files with 169 additions and 2 deletions

View File

@@ -187,7 +187,7 @@ impl Categorizer {
if partition_cols.is_empty() {
Commutativity::Commutative
} else {
Commutativity::Unimplemented
Commutativity::PartialCommutative
}
}
LogicalPlan::Unnest(_) => Commutativity::Commutative,

View File

@@ -373,7 +373,8 @@ EXPLAIN SELECT DISTINCT x FROM (SELECT a AS x FROM t) sq ORDER BY x;
| logical_plan_| Sort: sq.x ASC NULLS LAST_|
|_|_Aggregate: groupBy=[[sq.x]], aggr=[[]]_|
|_|_MergeScan [is_placeholder=false, remote_input=[_|
|_| Projection: sq.x_|
|_| Distinct:_|
|_|_Projection: sq.x_|
|_|_SubqueryAlias: sq_|
|_|_Projection: t.a AS x_|
|_|_TableScan: t_|

View File

@@ -105,6 +105,133 @@ EXPLAIN ANALYZE SELECT DISTINCT a FROM test ORDER BY a;
|_|_| Total rows: 2_|
+-+-+-+
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN ANALYZE SELECT DISTINCT a, b FROM test ORDER BY a;
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_CooperativeExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_SortPreservingMergeExec: [a@0 ASC NULLS LAST] REDACTED
|_|_|_SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[a@0 as a, b@1 as b], aggr=[] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[a@0 as a, b@1 as b], aggr=[] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED
|_|_|_|
|_|_| Total rows: 3_|
+-+-+-+
DROP TABLE test;
Affected Rows: 0
CREATE TABLE test (a INTEGER, b INTEGER, t TIMESTAMP TIME INDEX)
PARTITION ON COLUMNS(a) (
a <= 10,
a > 10,
);
Affected Rows: 0
INSERT INTO test VALUES (1, 22, 1), (1, 21, 2), (100, 21, 3), (100, 22, 4);
Affected Rows: 4
SELECT DISTINCT a, b FROM test ORDER BY a, b;
+-----+----+
| a | b |
+-----+----+
| 1 | 21 |
| 1 | 22 |
| 100 | 21 |
| 100 | 22 |
+-----+----+
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN ANALYZE SELECT DISTINCT a FROM test ORDER BY a;
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_SortPreservingMergeExec: [a@0 ASC NULLS LAST] REDACTED
|_|_|_SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_AggregateExec: mode=SinglePartitioned, gby=[a@0 as a], aggr=[] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED
|_|_|_|
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED
|_|_|_|
|_|_| Total rows: 2_|
+-+-+-+
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN ANALYZE SELECT DISTINCT a, b FROM test ORDER BY a;
+-+-+-+
| stage | node | plan_|
+-+-+-+
| 0_| 0_|_SortPreservingMergeExec: [a@0 ASC NULLS LAST] REDACTED
|_|_|_SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true] REDACTED
|_|_|_AggregateExec: mode=FinalPartitioned, gby=[a@0 as a, b@1 as b], aggr=[] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[a@0 as a, b@1 as b], aggr=[] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_MergeScanExec: REDACTED
|_|_|_|
| 1_| 0_|_AggregateExec: mode=FinalPartitioned, gby=[a@0 as a, b@1 as b], aggr=[] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[a@0 as a, b@1 as b], aggr=[] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED
|_|_|_|
| 1_| 1_|_AggregateExec: mode=FinalPartitioned, gby=[a@0 as a, b@1 as b], aggr=[] REDACTED
|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED
|_|_|_RepartitionExec: partitioning=REDACTED
|_|_|_AggregateExec: mode=Partial, gby=[a@0 as a, b@1 as b], aggr=[] REDACTED
|_|_|_CooperativeExec REDACTED
|_|_|_SeqScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED
|_|_|_|
|_|_| Total rows: 4_|
+-+-+-+
DROP TABLE test;
Affected Rows: 0

View File

@@ -27,4 +27,43 @@ SELECT DISTINCT ON (a) * FROM test ORDER BY a, t DESC;
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN ANALYZE SELECT DISTINCT a FROM test ORDER BY a;
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN ANALYZE SELECT DISTINCT a, b FROM test ORDER BY a;
DROP TABLE test;
CREATE TABLE test (a INTEGER, b INTEGER, t TIMESTAMP TIME INDEX)
PARTITION ON COLUMNS(a) (
a <= 10,
a > 10,
);
INSERT INTO test VALUES (1, 22, 1), (1, 21, 2), (100, 21, 3), (100, 22, 4);
SELECT DISTINCT a, b FROM test ORDER BY a, b;
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN ANALYZE SELECT DISTINCT a FROM test ORDER BY a;
-- SQLNESS REPLACE (metrics.*) REDACTED
-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED
-- SQLNESS REPLACE (Hash.*) REDACTED
-- SQLNESS REPLACE (-+) -
-- SQLNESS REPLACE (\s\s+) _
-- SQLNESS REPLACE (peers.*) REDACTED
-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED
EXPLAIN ANALYZE SELECT DISTINCT a, b FROM test ORDER BY a;
DROP TABLE test;