From 68247fc9b1d9f8e5d249b6b169dbb913c4a79be2 Mon Sep 17 00:00:00 2001 From: discord9 <55937128+discord9@users.noreply.github.com> Date: Mon, 27 Oct 2025 10:14:45 +0800 Subject: [PATCH] fix: count_state use stat to eval&predicate w/out region (#7116) * fix: count_state use stat to eval Signed-off-by: discord9 * cleanup Signed-off-by: discord9 * fix: use predicate without region Signed-off-by: discord9 * test: diverge standalone/dist impl Signed-off-by: discord9 --------- Signed-off-by: discord9 --- src/common/function/src/aggrs/aggr_wrapper.rs | 47 ++ src/mito2/src/read/scan_region.rs | 5 +- src/mito2/src/read/seq_scan.rs | 8 +- src/mito2/src/read/series_scan.rs | 8 +- src/mito2/src/read/unordered_scan.rs | 10 +- src/store-api/src/region_engine.rs | 6 +- src/table/src/table/scan.rs | 39 +- .../cases/distributed/optimizer/count.result | 465 ++++++++++++++++++ tests/cases/distributed/optimizer/count.sql | 1 + .../standalone/common/aggregate/count.result | 103 ---- .../standalone/common/aggregate/count.sql | 43 -- tests/cases/standalone/optimizer/count.result | 462 +++++++++++++++++ tests/cases/standalone/optimizer/count.sql | 201 ++++++++ 13 files changed, 1221 insertions(+), 177 deletions(-) create mode 100644 tests/cases/distributed/optimizer/count.result create mode 120000 tests/cases/distributed/optimizer/count.sql delete mode 100644 tests/cases/standalone/common/aggregate/count.result delete mode 100644 tests/cases/standalone/common/aggregate/count.sql create mode 100644 tests/cases/standalone/optimizer/count.result create mode 100644 tests/cases/standalone/optimizer/count.sql diff --git a/src/common/function/src/aggrs/aggr_wrapper.rs b/src/common/function/src/aggrs/aggr_wrapper.rs index ed691296ee..54dc1ac78e 100644 --- a/src/common/function/src/aggrs/aggr_wrapper.rs +++ b/src/common/function/src/aggrs/aggr_wrapper.rs @@ -29,6 +29,8 @@ use arrow::array::StructArray; use arrow_schema::{FieldRef, Fields}; use common_telemetry::debug; use datafusion::functions_aggregate::all_default_aggregate_functions; +use datafusion::functions_aggregate::count::Count; +use datafusion::functions_aggregate::min_max::{Max, Min}; use datafusion::optimizer::AnalyzerRule; use datafusion::optimizer::analyzer::type_coercion::TypeCoercion; use datafusion::physical_planner::create_aggregate_expr_and_maybe_filter; @@ -413,6 +415,51 @@ impl AggregateUDFImpl for StateWrapper { fn coerce_types(&self, arg_types: &[DataType]) -> datafusion_common::Result> { self.inner.coerce_types(arg_types) } + + fn value_from_stats( + &self, + statistics_args: &datafusion_expr::StatisticsArgs, + ) -> Option { + let inner = self.inner().inner().as_any(); + // only count/min/max need special handling here, for getting result from statistics + // the result of count/min/max is also the result of count_state so can return directly + let can_use_stat = inner.is::() || inner.is::() || inner.is::(); + if !can_use_stat { + return None; + } + + // fix return type by extract the first field's data type from the struct type + let state_type = if let DataType::Struct(fields) = &statistics_args.return_type { + if fields.is_empty() { + return None; + } + fields[0].data_type().clone() + } else { + return None; + }; + + let fixed_args = datafusion_expr::StatisticsArgs { + statistics: statistics_args.statistics, + return_type: &state_type, + is_distinct: statistics_args.is_distinct, + exprs: statistics_args.exprs, + }; + + let ret = self.inner().value_from_stats(&fixed_args)?; + + // wrap the result into struct scalar value + let fields = if let DataType::Struct(fields) = &statistics_args.return_type { + fields + } else { + return None; + }; + + let array = ret.to_array().ok()?; + + let struct_array = StructArray::new(fields.clone(), vec![array], None); + let ret = ScalarValue::Struct(Arc::new(struct_array)); + Some(ret) + } } /// The wrapper's input is the same as the original aggregate function's input, diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 536c48e248..29eef2ef6d 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -1106,9 +1106,8 @@ impl ScanInput { rows } - /// Returns table predicate of all exprs. - pub(crate) fn predicate(&self) -> Option<&Predicate> { - self.predicate.predicate() + pub(crate) fn predicate_group(&self) -> &PredicateGroup { + &self.predicate } /// Returns number of memtables to scan. diff --git a/src/mito2/src/read/seq_scan.rs b/src/mito2/src/read/seq_scan.rs index 631c40b42a..c90ea89b90 100644 --- a/src/mito2/src/read/seq_scan.rs +++ b/src/mito2/src/read/seq_scan.rs @@ -632,8 +632,12 @@ impl RegionScanner for SeqScan { Ok(()) } - fn has_predicate(&self) -> bool { - let predicate = self.stream_ctx.input.predicate(); + fn has_predicate_without_region(&self) -> bool { + let predicate = self + .stream_ctx + .input + .predicate_group() + .predicate_without_region(); predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false) } diff --git a/src/mito2/src/read/series_scan.rs b/src/mito2/src/read/series_scan.rs index 3a006dcb67..a99e3c46bb 100644 --- a/src/mito2/src/read/series_scan.rs +++ b/src/mito2/src/read/series_scan.rs @@ -314,8 +314,12 @@ impl RegionScanner for SeriesScan { Ok(()) } - fn has_predicate(&self) -> bool { - let predicate = self.stream_ctx.input.predicate(); + fn has_predicate_without_region(&self) -> bool { + let predicate = self + .stream_ctx + .input + .predicate_group() + .predicate_without_region(); predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false) } diff --git a/src/mito2/src/read/unordered_scan.rs b/src/mito2/src/read/unordered_scan.rs index 4dc5d59b98..8dbfcf07ec 100644 --- a/src/mito2/src/read/unordered_scan.rs +++ b/src/mito2/src/read/unordered_scan.rs @@ -427,8 +427,14 @@ impl RegionScanner for UnorderedScan { .map_err(BoxedError::new) } - fn has_predicate(&self) -> bool { - let predicate = self.stream_ctx.input.predicate(); + /// If this scanner have predicate other than region partition exprs + fn has_predicate_without_region(&self) -> bool { + let predicate = self + .stream_ctx + .input + .predicate_group() + .predicate_without_region(); + predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false) } diff --git a/src/store-api/src/region_engine.rs b/src/store-api/src/region_engine.rs index 000b36cc17..fe8df673d0 100644 --- a/src/store-api/src/region_engine.rs +++ b/src/store-api/src/region_engine.rs @@ -444,8 +444,8 @@ pub trait RegionScanner: Debug + DisplayAs + Send { partition: usize, ) -> Result; - /// Check if there is any predicate that may be executed in this scanner. - fn has_predicate(&self) -> bool; + /// Check if there is any predicate exclude region partition exprs that may be executed in this scanner. + fn has_predicate_without_region(&self) -> bool; /// Sets whether the scanner is reading a logical region. fn set_logical_region(&mut self, logical_region: bool); @@ -857,7 +857,7 @@ impl RegionScanner for SinglePartitionScanner { Ok(result.unwrap()) } - fn has_predicate(&self) -> bool { + fn has_predicate_without_region(&self) -> bool { false } diff --git a/src/table/src/table/scan.rs b/src/table/src/table/scan.rs index 32940dfacc..a60215618b 100644 --- a/src/table/src/table/scan.rs +++ b/src/table/src/table/scan.rs @@ -335,25 +335,26 @@ impl ExecutionPlan for RegionScanExec { return Ok(Statistics::new_unknown(self.schema().as_ref())); } - let statistics = if self.append_mode && !self.scanner.lock().unwrap().has_predicate() { - let column_statistics = self - .arrow_schema - .fields - .iter() - .map(|_| ColumnStatistics { - distinct_count: Precision::Exact(self.total_rows), - null_count: Precision::Exact(0), // all null rows are counted for append-only table - ..Default::default() - }) - .collect(); - Statistics { - num_rows: Precision::Exact(self.total_rows), - total_byte_size: Default::default(), - column_statistics, - } - } else { - Statistics::new_unknown(&self.arrow_schema) - }; + let statistics = + if self.append_mode && !self.scanner.lock().unwrap().has_predicate_without_region() { + let column_statistics = self + .arrow_schema + .fields + .iter() + .map(|_| ColumnStatistics { + distinct_count: Precision::Exact(self.total_rows), + null_count: Precision::Exact(0), // all null rows are counted for append-only table + ..Default::default() + }) + .collect(); + Statistics { + num_rows: Precision::Exact(self.total_rows), + total_byte_size: Default::default(), + column_statistics, + } + } else { + Statistics::new_unknown(&self.arrow_schema) + }; Ok(statistics) } diff --git a/tests/cases/distributed/optimizer/count.result b/tests/cases/distributed/optimizer/count.result new file mode 100644 index 0000000000..79a6fad6f9 --- /dev/null +++ b/tests/cases/distributed/optimizer/count.result @@ -0,0 +1,465 @@ +create table "HelloWorld" (a string, b timestamp time index); + +Affected Rows: 0 + +insert into "HelloWorld" values ("a", 1) ,("b", 2); + +Affected Rows: 2 + +select count(*) from "HelloWorld"; + ++----------+ +| count(*) | ++----------+ +| 2 | ++----------+ + +create table test (a string, "BbB" timestamp time index); + +Affected Rows: 0 + +insert into test values ("c", 1) ; + +Affected Rows: 1 + +select count(*) from test; + ++----------+ +| count(*) | ++----------+ +| 1 | ++----------+ + +select count(*) from (select count(*) from test where a = 'a'); + ++----------+ +| count(*) | ++----------+ +| 1 | ++----------+ + +select count(*) from (select * from test cross join "HelloWorld"); + ++----------+ +| count(*) | ++----------+ +| 2 | ++----------+ + +drop table "HelloWorld"; + +Affected Rows: 0 + +drop table test; + +Affected Rows: 0 + +-- Append table +create table count_where_bug ( + `tag` String, + ts TimestampMillisecond time index, + num Int64, + primary key (`tag`), +) engine=mito with('append_mode'='true'); + +Affected Rows: 0 + +insert into count_where_bug (`tag`, ts, num) +values ('a', '2024-09-06T06:00:01Z', 1), + ('a', '2024-09-06T06:00:02Z', 2), + ('a', '2024-09-06T06:00:03Z', 3), + ('b', '2024-09-06T06:00:04Z', 4), + ('b', '2024-09-06T06:00:05Z', 5); + +Affected Rows: 5 + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count\]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_ProjectionExec: expr=[5 as count(Int64(1))] REDACTED +|_|_|_PlaceholderRowExec REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 5 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where `tag` = 'b'; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_ProjectionExec: expr=[count(count_where_bug.ts)@0 as count(Int64(1))] REDACTED +|_|_|_AggregateExec: mode=Final, gby=[], aggr=[count(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(count_where_bug.ts)] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where `tag` = 'b'; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 2 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count\]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_ProjectionExec: expr=[count(count_where_bug.ts)@0 as count(Int64(1))] REDACTED +|_|_|_AggregateExec: mode=Final, gby=[], aggr=[count(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(count_where_bug.ts)] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":REDACTED REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 1 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where num != 3; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_ProjectionExec: expr=[count(count_where_bug.ts)@0 as count(Int64(1))] REDACTED +|_|_|_AggregateExec: mode=Final, gby=[], aggr=[count(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(count_where_bug.ts)] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: num@1 != 3, projection=[ts@0] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where num != 3; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 4 | ++-----------------+ + +drop table count_where_bug; + +Affected Rows: 0 + +-- partition-ed Append table +create table count_where_bug ( + `tag` String, + ts TimestampMillisecond time index, + num Int64, + primary key (`tag`), +) PARTITION ON COLUMNS (`tag`) ( + tag <= 'a', + tag > 'a' + ) +engine=mito with('append_mode'='true'); + +Affected Rows: 0 + +insert into count_where_bug (`tag`, ts, num) +values ('a', '2024-09-06T06:00:01Z', 1), + ('a', '2024-09-06T06:00:02Z', 2), + ('a', '2024-09-06T06:00:03Z', 3), + ('b', '2024-09-06T06:00:04Z', 4), + ('b', '2024-09-06T06:00:05Z', 5); + +Affected Rows: 5 + +-- This should use statistics +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count\]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_ProjectionExec: expr=[{count[count]:REDACTED} as __count_state(count_where_bug.ts)] REDACTED +|_|_|_PlaceholderRowExec REDACTED +|_|_|_| +| 1_| 1_|_ProjectionExec: expr=[{count[count]:REDACTED} as __count_state(count_where_bug.ts)] REDACTED +|_|_|_PlaceholderRowExec REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 5 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where `tag` = 'b'; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where `tag` = 'b'; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where `tag` = 'b'; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 2 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":REDACTED REDACTED +|_|_|_| +| 1_| 1_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":REDACTED REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 1 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where num != 3; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: num@1 != 3, projection=[ts@0] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +| 1_| 1_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: num@1 != 3, projection=[ts@0] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where num != 3; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 4 | ++-----------------+ + +drop table count_where_bug; + +Affected Rows: 0 + diff --git a/tests/cases/distributed/optimizer/count.sql b/tests/cases/distributed/optimizer/count.sql new file mode 120000 index 0000000000..b539a04238 --- /dev/null +++ b/tests/cases/distributed/optimizer/count.sql @@ -0,0 +1 @@ +../../standalone/optimizer/count.sql \ No newline at end of file diff --git a/tests/cases/standalone/common/aggregate/count.result b/tests/cases/standalone/common/aggregate/count.result deleted file mode 100644 index 0a8e96b357..0000000000 --- a/tests/cases/standalone/common/aggregate/count.result +++ /dev/null @@ -1,103 +0,0 @@ -create table "HelloWorld" (a string, b timestamp time index); - -Affected Rows: 0 - -insert into "HelloWorld" values ("a", 1) ,("b", 2); - -Affected Rows: 2 - -select count(*) from "HelloWorld"; - -+----------+ -| count(*) | -+----------+ -| 2 | -+----------+ - -create table test (a string, "BbB" timestamp time index); - -Affected Rows: 0 - -insert into test values ("c", 1) ; - -Affected Rows: 1 - -select count(*) from test; - -+----------+ -| count(*) | -+----------+ -| 1 | -+----------+ - -select count(*) from (select count(*) from test where a = 'a'); - -+----------+ -| count(*) | -+----------+ -| 1 | -+----------+ - -select count(*) from (select * from test cross join "HelloWorld"); - -+----------+ -| count(*) | -+----------+ -| 2 | -+----------+ - -drop table "HelloWorld"; - -Affected Rows: 0 - -drop table test; - -Affected Rows: 0 - --- Append table -create table count_where_bug ( - `tag` String, - ts TimestampMillisecond time index, - num Int64, - primary key (`tag`), -) engine=mito with('append_mode'='true'); - -Affected Rows: 0 - -insert into count_where_bug (`tag`, ts, num) -values ('a', '2024-09-06T06:00:01Z', 1), - ('a', '2024-09-06T06:00:02Z', 2), - ('a', '2024-09-06T06:00:03Z', 3), - ('b', '2024-09-06T06:00:04Z', 4), - ('b', '2024-09-06T06:00:05Z', 5); - -Affected Rows: 5 - -select count(1) from count_where_bug where `tag` = 'b'; - -+-----------------+ -| count(Int64(1)) | -+-----------------+ -| 2 | -+-----------------+ - -select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; - -+-----------------+ -| count(Int64(1)) | -+-----------------+ -| 1 | -+-----------------+ - -select count(1) from count_where_bug where num != 3; - -+-----------------+ -| count(Int64(1)) | -+-----------------+ -| 4 | -+-----------------+ - -drop table count_where_bug; - -Affected Rows: 0 - diff --git a/tests/cases/standalone/common/aggregate/count.sql b/tests/cases/standalone/common/aggregate/count.sql deleted file mode 100644 index dfc86f9273..0000000000 --- a/tests/cases/standalone/common/aggregate/count.sql +++ /dev/null @@ -1,43 +0,0 @@ -create table "HelloWorld" (a string, b timestamp time index); - -insert into "HelloWorld" values ("a", 1) ,("b", 2); - -select count(*) from "HelloWorld"; - -create table test (a string, "BbB" timestamp time index); - -insert into test values ("c", 1) ; - -select count(*) from test; - -select count(*) from (select count(*) from test where a = 'a'); - -select count(*) from (select * from test cross join "HelloWorld"); - -drop table "HelloWorld"; - -drop table test; - --- Append table - -create table count_where_bug ( - `tag` String, - ts TimestampMillisecond time index, - num Int64, - primary key (`tag`), -) engine=mito with('append_mode'='true'); - -insert into count_where_bug (`tag`, ts, num) -values ('a', '2024-09-06T06:00:01Z', 1), - ('a', '2024-09-06T06:00:02Z', 2), - ('a', '2024-09-06T06:00:03Z', 3), - ('b', '2024-09-06T06:00:04Z', 4), - ('b', '2024-09-06T06:00:05Z', 5); - -select count(1) from count_where_bug where `tag` = 'b'; - -select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; - -select count(1) from count_where_bug where num != 3; - -drop table count_where_bug; diff --git a/tests/cases/standalone/optimizer/count.result b/tests/cases/standalone/optimizer/count.result new file mode 100644 index 0000000000..3819243393 --- /dev/null +++ b/tests/cases/standalone/optimizer/count.result @@ -0,0 +1,462 @@ +create table "HelloWorld" (a string, b timestamp time index); + +Affected Rows: 0 + +insert into "HelloWorld" values ("a", 1) ,("b", 2); + +Affected Rows: 2 + +select count(*) from "HelloWorld"; + ++----------+ +| count(*) | ++----------+ +| 2 | ++----------+ + +create table test (a string, "BbB" timestamp time index); + +Affected Rows: 0 + +insert into test values ("c", 1) ; + +Affected Rows: 1 + +select count(*) from test; + ++----------+ +| count(*) | ++----------+ +| 1 | ++----------+ + +select count(*) from (select count(*) from test where a = 'a'); + ++----------+ +| count(*) | ++----------+ +| 1 | ++----------+ + +select count(*) from (select * from test cross join "HelloWorld"); + ++----------+ +| count(*) | ++----------+ +| 2 | ++----------+ + +drop table "HelloWorld"; + +Affected Rows: 0 + +drop table test; + +Affected Rows: 0 + +-- Append table +create table count_where_bug ( + `tag` String, + ts TimestampMillisecond time index, + num Int64, + primary key (`tag`), +) engine=mito with('append_mode'='true'); + +Affected Rows: 0 + +insert into count_where_bug (`tag`, ts, num) +values ('a', '2024-09-06T06:00:01Z', 1), + ('a', '2024-09-06T06:00:02Z', 2), + ('a', '2024-09-06T06:00:03Z', 3), + ('b', '2024-09-06T06:00:04Z', 4), + ('b', '2024-09-06T06:00:05Z', 5); + +Affected Rows: 5 + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count\]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_ProjectionExec: expr=[5 as count(Int64(1))] REDACTED +|_|_|_PlaceholderRowExec REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 5 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where `tag` = 'b'; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where `tag` = 'b'; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 2 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count\]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":REDACTED REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 1 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where num != 3; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: num@1 != 3, projection=[ts@0] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where num != 3; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 4 | ++-----------------+ + +drop table count_where_bug; + +Affected Rows: 0 + +-- partition-ed Append table +create table count_where_bug ( + `tag` String, + ts TimestampMillisecond time index, + num Int64, + primary key (`tag`), +) PARTITION ON COLUMNS (`tag`) ( + tag <= 'a', + tag > 'a' + ) +engine=mito with('append_mode'='true'); + +Affected Rows: 0 + +insert into count_where_bug (`tag`, ts, num) +values ('a', '2024-09-06T06:00:01Z', 1), + ('a', '2024-09-06T06:00:02Z', 2), + ('a', '2024-09-06T06:00:03Z', 3), + ('b', '2024-09-06T06:00:04Z', 4), + ('b', '2024-09-06T06:00:05Z', 5); + +Affected Rows: 5 + +-- This should use statistics +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count\]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_ProjectionExec: expr=[{count[count]:REDACTED} as __count_state(count_where_bug.ts)] REDACTED +|_|_|_PlaceholderRowExec REDACTED +|_|_|_| +| 1_| 1_|_ProjectionExec: expr=[{count[count]:REDACTED} as __count_state(count_where_bug.ts)] REDACTED +|_|_|_PlaceholderRowExec REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 5 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where `tag` = 'b'; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where `tag` = 'b'; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where `tag` = 'b'; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 2 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":REDACTED REDACTED +|_|_|_| +| 1_| 1_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":REDACTED REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 1 | ++-----------------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where num != 3; + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: num@1 != 3, projection=[ts@0] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +| 1_| 1_|_AggregateExec: mode=Final, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalescePartitionsExec REDACTED +|_|_|_AggregateExec: mode=Partial, gby=[], aggr=[__count_state(count_where_bug.ts)] REDACTED +|_|_|_CoalesceBatchesExec: target_batch_size=8192 REDACTED +|_|_|_FilterExec: num@1 != 3, projection=[ts@0] REDACTED +|_|_|_CooperativeExec REDACTED +|_|_|_UnorderedScan: region=REDACTED, "partition_count":{"count":1, "mem_ranges":1, "files":0, "file_ranges":0} REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + +select count(1) from count_where_bug where num != 3; + ++-----------------+ +| count(Int64(1)) | ++-----------------+ +| 4 | ++-----------------+ + +drop table count_where_bug; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/optimizer/count.sql b/tests/cases/standalone/optimizer/count.sql new file mode 100644 index 0000000000..caa961b1f0 --- /dev/null +++ b/tests/cases/standalone/optimizer/count.sql @@ -0,0 +1,201 @@ +create table "HelloWorld" (a string, b timestamp time index); + +insert into "HelloWorld" values ("a", 1) ,("b", 2); + +select count(*) from "HelloWorld"; + +create table test (a string, "BbB" timestamp time index); + +insert into test values ("c", 1) ; + +select count(*) from test; + +select count(*) from (select count(*) from test where a = 'a'); + +select count(*) from (select * from test cross join "HelloWorld"); + +drop table "HelloWorld"; + +drop table test; + +-- Append table + +create table count_where_bug ( + `tag` String, + ts TimestampMillisecond time index, + num Int64, + primary key (`tag`), +) engine=mito with('append_mode'='true'); + +insert into count_where_bug (`tag`, ts, num) +values ('a', '2024-09-06T06:00:01Z', 1), + ('a', '2024-09-06T06:00:02Z', 2), + ('a', '2024-09-06T06:00:03Z', 3), + ('b', '2024-09-06T06:00:04Z', 4), + ('b', '2024-09-06T06:00:05Z', 5); + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count\]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug; + +select count(1) from count_where_bug; + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where `tag` = 'b'; + +select count(1) from count_where_bug where `tag` = 'b'; + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count\]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where num != 3; + +select count(1) from count_where_bug where num != 3; + +drop table count_where_bug; + +-- partition-ed Append table +create table count_where_bug ( + `tag` String, + ts TimestampMillisecond time index, + num Int64, + primary key (`tag`), +) PARTITION ON COLUMNS (`tag`) ( + tag <= 'a', + tag > 'a' + ) +engine=mito with('append_mode'='true'); + +insert into count_where_bug (`tag`, ts, num) +values ('a', '2024-09-06T06:00:01Z', 1), + ('a', '2024-09-06T06:00:02Z', 2), + ('a', '2024-09-06T06:00:03Z', 3), + ('b', '2024-09-06T06:00:04Z', 4), + ('b', '2024-09-06T06:00:05Z', 5); + +-- This should use statistics +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count\]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug; + +select count(1) from count_where_bug; + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where `tag` = 'b'; + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where `tag` = 'b'; + +select count(1) from count_where_bug where `tag` = 'b'; + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +-- SQLNESS REPLACE (\{count\[count]:)\d+(\}) {count[count]:REDACTED} +-- SQLNESS REPLACE "partition_count":\{(.*?)\} "partition_count":REDACTED +explain analyze +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + +select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z'; + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (Hash.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze +select count(1) from count_where_bug where num != 3; + +select count(1) from count_where_bug where num != 3; + +drop table count_where_bug; \ No newline at end of file