feat: expose scanner metrics to df execution metrics (#5699)

* feat: add metrics list to scanner

* chore: add report metrics method

* feat: use df metrics in PartitionMetrics

* feat: pass execution metrics to scan partition

* refactor: remove PartitionMetricsList

* feat: better debug format for ScanMetricsSet

* feat: do not expose all metrics to execution metrics by default

* refactor: use struct destruction

* feat: add metrics list to scanner

* chore: Add custom Debug for ScanMetricsSet and partition metrics display

* test: update sqlness result
This commit is contained in:
Yingwen
2025-03-28 07:40:39 +08:00
committed by GitHub
parent 76a58a07e1
commit dbc25dd8da
7 changed files with 428 additions and 121 deletions

View File

@@ -26,6 +26,7 @@ use common_error::ext::{BoxedError, PlainError};
use common_error::status_code::StatusCode;
use common_recordbatch::SendableRecordBatchStream;
use common_time::Timestamp;
use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
use datafusion_physical_plan::{DisplayAs, DisplayFormatType};
use datatypes::schema::SchemaRef;
use futures::future::join_all;
@@ -342,7 +343,11 @@ pub trait RegionScanner: Debug + DisplayAs + Send {
///
/// # Panics
/// Panics if the `partition` is out of bound.
fn scan_partition(&self, partition: usize) -> Result<SendableRecordBatchStream, BoxedError>;
fn scan_partition(
&self,
metrics_set: &ExecutionPlanMetricsSet,
partition: usize,
) -> Result<SendableRecordBatchStream, BoxedError>;
/// Check if there is any predicate that may be executed in this scanner.
fn has_predicate(&self) -> bool;
@@ -562,7 +567,11 @@ impl RegionScanner for SinglePartitionScanner {
Ok(())
}
fn scan_partition(&self, _partition: usize) -> Result<SendableRecordBatchStream, BoxedError> {
fn scan_partition(
&self,
_metrics_set: &ExecutionPlanMetricsSet,
_partition: usize,
) -> Result<SendableRecordBatchStream, BoxedError> {
let mut stream = self.stream.lock().unwrap();
stream.take().ok_or_else(|| {
BoxedError::new(PlainError::new(