fix: unconditional statistics (#4694)

* fix: unconditional statistics

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add more sqlness case

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
Ruihang Xia
2024-09-07 12:28:11 +08:00
committed by GitHub
parent 5d9f8a3be7
commit d2d62e0c6f
7 changed files with 93 additions and 1 deletions

View File

@@ -709,6 +709,10 @@ impl ScanInput {
rows_in_files + rows_in_memtables
}
pub(crate) fn predicate(&self) -> Option<Predicate> {
self.predicate.clone()
}
/// Retrieves [`PartitionRange`] from memtable and files
pub(crate) fn partition_ranges(&self) -> Vec<PartitionRange> {
let mut id = 0;

View File

@@ -515,6 +515,11 @@ impl RegionScanner for SeqScan {
self.properties.partitions = ranges;
Ok(())
}
fn has_predicate(&self) -> bool {
let predicate = self.stream_ctx.input.predicate();
predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false)
}
}
impl DisplayAs for SeqScan {

View File

@@ -228,6 +228,11 @@ impl RegionScanner for UnorderedScan {
Ok(stream)
}
fn has_predicate(&self) -> bool {
let predicate = self.stream_ctx.input.predicate();
predicate.map(|p| !p.exprs().is_empty()).unwrap_or(false)
}
}
impl DisplayAs for UnorderedScan {

View File

@@ -233,6 +233,9 @@ pub trait RegionScanner: Debug + DisplayAs + Send {
/// # Panics
/// Panics if the `partition` is out of bound.
fn scan_partition(&self, partition: usize) -> Result<SendableRecordBatchStream, BoxedError>;
/// Check if there is any predicate that may be executed in this scanner.
fn has_predicate(&self) -> bool;
}
pub type RegionScannerRef = Box<dyn RegionScanner>;
@@ -367,6 +370,10 @@ impl RegionScanner for SinglePartitionScanner {
))
})
}
fn has_predicate(&self) -> bool {
false
}
}
impl DisplayAs for SinglePartitionScanner {

View File

@@ -180,7 +180,7 @@ impl ExecutionPlan for RegionScanExec {
}
fn statistics(&self) -> DfResult<Statistics> {
let statistics = if self.append_mode {
let statistics = if self.append_mode && !self.scanner.lock().unwrap().has_predicate() {
let column_statistics = self
.arrow_schema
.fields

View File

@@ -54,3 +54,50 @@ drop table test;
Affected Rows: 0
-- Append table
create table count_where_bug (
tag String,
ts TimestampMillisecond time index,
num Int64,
primary key (tag),
) engine=mito with('append_mode'='true');
Affected Rows: 0
insert into count_where_bug (tag, ts, num)
values ('a', '2024-09-06T06:00:01Z', 1),
('a', '2024-09-06T06:00:02Z', 2),
('a', '2024-09-06T06:00:03Z', 3),
('b', '2024-09-06T06:00:04Z', 4),
('b', '2024-09-06T06:00:05Z', 5);
Affected Rows: 5
select count(1) from count_where_bug where tag = 'b';
+-----------------+
| COUNT(Int64(1)) |
+-----------------+
| 2 |
+-----------------+
select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z';
+-----------------+
| COUNT(Int64(1)) |
+-----------------+
| 1 |
+-----------------+
select count(1) from count_where_bug where num != 3;
+-----------------+
| COUNT(Int64(1)) |
+-----------------+
| 4 |
+-----------------+
drop table count_where_bug;
Affected Rows: 0

View File

@@ -17,3 +17,27 @@ select count(*) from (select * from test cross join "HelloWorld");
drop table "HelloWorld";
drop table test;
-- Append table
create table count_where_bug (
tag String,
ts TimestampMillisecond time index,
num Int64,
primary key (tag),
) engine=mito with('append_mode'='true');
insert into count_where_bug (tag, ts, num)
values ('a', '2024-09-06T06:00:01Z', 1),
('a', '2024-09-06T06:00:02Z', 2),
('a', '2024-09-06T06:00:03Z', 3),
('b', '2024-09-06T06:00:04Z', 4),
('b', '2024-09-06T06:00:05Z', 5);
select count(1) from count_where_bug where tag = 'b';
select count(1) from count_where_bug where ts > '2024-09-06T06:00:04Z';
select count(1) from count_where_bug where num != 3;
drop table count_where_bug;