mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-06-02 13:20:40 +00:00
perf: count(*) for append-only tables (#4545)
* feat: support fast count(*) for append-only tables * fix: total_rows stats in time series memtable * fix: sqlness result changes for SinglePartitionScanner -> StreamScanAdapter * fix: some cr comments
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
use std::any::Any;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use common_query::stream::StreamScanAdapter;
|
||||
use common_recordbatch::OrderOption;
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::datasource::{TableProvider, TableType as DfTableType};
|
||||
@@ -25,10 +26,8 @@ use datafusion_expr::expr::Expr;
|
||||
use datafusion_expr::TableProviderFilterPushDown as DfTableProviderFilterPushDown;
|
||||
use datafusion_physical_expr::expressions::Column;
|
||||
use datafusion_physical_expr::PhysicalSortExpr;
|
||||
use store_api::region_engine::SinglePartitionScanner;
|
||||
use store_api::storage::ScanRequest;
|
||||
|
||||
use crate::table::scan::RegionScanExec;
|
||||
use crate::table::{TableRef, TableType};
|
||||
|
||||
/// Adapt greptime's [TableRef] to DataFusion's [TableProvider].
|
||||
@@ -114,12 +113,9 @@ impl TableProvider for DfTableProviderAdapter {
|
||||
.collect::<Vec<_>>()
|
||||
});
|
||||
|
||||
let scanner = Box::new(SinglePartitionScanner::new(stream));
|
||||
let mut plan = RegionScanExec::new(scanner);
|
||||
if let Some(sort_expr) = sort_expr {
|
||||
plan = plan.with_output_ordering(sort_expr);
|
||||
}
|
||||
Ok(Arc::new(plan))
|
||||
Ok(Arc::new(
|
||||
StreamScanAdapter::new(stream).with_output_ordering(sort_expr),
|
||||
))
|
||||
}
|
||||
|
||||
fn supports_filters_pushdown(
|
||||
|
||||
@@ -29,7 +29,8 @@ use datafusion::physical_plan::{
|
||||
DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
|
||||
RecordBatchStream as DfRecordBatchStream,
|
||||
};
|
||||
use datafusion_common::DataFusionError;
|
||||
use datafusion_common::stats::Precision;
|
||||
use datafusion_common::{ColumnStatistics, DataFusionError, Statistics};
|
||||
use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalSortExpr};
|
||||
use datatypes::arrow::datatypes::SchemaRef as ArrowSchemaRef;
|
||||
use futures::{Stream, StreamExt};
|
||||
@@ -46,6 +47,8 @@ pub struct RegionScanExec {
|
||||
output_ordering: Option<Vec<PhysicalSortExpr>>,
|
||||
metric: ExecutionPlanMetricsSet,
|
||||
properties: PlanProperties,
|
||||
append_mode: bool,
|
||||
total_rows: usize,
|
||||
}
|
||||
|
||||
impl RegionScanExec {
|
||||
@@ -64,12 +67,16 @@ impl RegionScanExec {
|
||||
Partitioning::UnknownPartitioning(num_output_partition),
|
||||
ExecutionMode::Bounded,
|
||||
);
|
||||
let append_mode = scanner_props.append_mode();
|
||||
let total_rows = scanner_props.total_rows();
|
||||
Self {
|
||||
scanner: Mutex::new(scanner),
|
||||
arrow_schema,
|
||||
output_ordering: None,
|
||||
metric: ExecutionPlanMetricsSet::new(),
|
||||
properties,
|
||||
append_mode,
|
||||
total_rows,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -152,6 +159,28 @@ impl ExecutionPlan for RegionScanExec {
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metric.clone_inner())
|
||||
}
|
||||
|
||||
fn statistics(&self) -> DfResult<Statistics> {
|
||||
let statistics = if self.append_mode {
|
||||
let column_statistics = self
|
||||
.arrow_schema
|
||||
.fields
|
||||
.iter()
|
||||
.map(|_| ColumnStatistics {
|
||||
distinct_count: Precision::Exact(self.total_rows),
|
||||
..Default::default()
|
||||
})
|
||||
.collect();
|
||||
Statistics {
|
||||
num_rows: Precision::Exact(self.total_rows),
|
||||
total_byte_size: Default::default(),
|
||||
column_statistics,
|
||||
}
|
||||
} else {
|
||||
Statistics::new_unknown(&self.arrow_schema)
|
||||
};
|
||||
Ok(statistics)
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for RegionScanExec {
|
||||
@@ -257,7 +286,7 @@ mod test {
|
||||
RecordBatches::try_new(schema.clone(), vec![batch1.clone(), batch2.clone()]).unwrap();
|
||||
let stream = recordbatches.as_stream();
|
||||
|
||||
let scanner = Box::new(SinglePartitionScanner::new(stream));
|
||||
let scanner = Box::new(SinglePartitionScanner::new(stream, false));
|
||||
let plan = RegionScanExec::new(scanner);
|
||||
let actual: SchemaRef = Arc::new(
|
||||
plan.properties
|
||||
|
||||
Reference in New Issue
Block a user