mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-24 17:00:37 +00:00
feat: implement metrics for Scan plan (#1812)
* add metrics in some interfaces Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * calc elapsed time and rows Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
@@ -22,6 +22,7 @@ use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::error::Result as DfResult;
|
||||
pub use datafusion::execution::context::{SessionContext, TaskContext};
|
||||
use datafusion::physical_plan::expressions::PhysicalSortExpr;
|
||||
use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
|
||||
pub use datafusion::physical_plan::Partitioning;
|
||||
use datafusion::physical_plan::Statistics;
|
||||
use datatypes::schema::SchemaRef;
|
||||
@@ -69,6 +70,10 @@ pub trait PhysicalPlan: Debug + Send + Sync {
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
) -> Result<SendableRecordBatchStream>;
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Adapt DataFusion's [`ExecutionPlan`](DfPhysicalPlan) to GreptimeDB's [`PhysicalPlan`].
|
||||
@@ -76,11 +81,16 @@ pub trait PhysicalPlan: Debug + Send + Sync {
|
||||
pub struct PhysicalPlanAdapter {
|
||||
schema: SchemaRef,
|
||||
df_plan: Arc<dyn DfPhysicalPlan>,
|
||||
metric: ExecutionPlanMetricsSet,
|
||||
}
|
||||
|
||||
impl PhysicalPlanAdapter {
|
||||
pub fn new(schema: SchemaRef, df_plan: Arc<dyn DfPhysicalPlan>) -> Self {
|
||||
Self { schema, df_plan }
|
||||
Self {
|
||||
schema,
|
||||
df_plan,
|
||||
metric: ExecutionPlanMetricsSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn df_plan(&self) -> Arc<dyn DfPhysicalPlan> {
|
||||
@@ -127,15 +137,21 @@ impl PhysicalPlan for PhysicalPlanAdapter {
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let baseline_metric = BaselineMetrics::new(&self.metric, partition);
|
||||
|
||||
let df_plan = self.df_plan.clone();
|
||||
let stream = df_plan
|
||||
.execute(partition, context)
|
||||
.context(error::GeneralDataFusionSnafu)?;
|
||||
let adapter = RecordBatchStreamAdapter::try_new(stream)
|
||||
let adapter = RecordBatchStreamAdapter::try_new_with_metrics(stream, baseline_metric)
|
||||
.context(error::ConvertDfRecordBatchStreamSnafu)?;
|
||||
|
||||
Ok(Box::pin(adapter))
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
Some(self.metric.clone_inner())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -196,6 +212,10 @@ impl DfPhysicalPlan for DfPhysicalPlanAdapter {
|
||||
fn statistics(&self) -> Statistics {
|
||||
Statistics::default()
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<MetricsSet> {
|
||||
self.0.metrics()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -20,6 +20,7 @@ use std::task::{Context, Poll};
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion::parquet::arrow::async_reader::{AsyncFileReader, ParquetRecordBatchStream};
|
||||
use datafusion::physical_plan::metrics::BaselineMetrics;
|
||||
use datafusion::physical_plan::RecordBatchStream as DfRecordBatchStream;
|
||||
use datafusion_common::DataFusionError;
|
||||
use datatypes::schema::{Schema, SchemaRef};
|
||||
@@ -115,13 +116,31 @@ impl Stream for DfRecordBatchStreamAdapter {
|
||||
pub struct RecordBatchStreamAdapter {
|
||||
schema: SchemaRef,
|
||||
stream: DfSendableRecordBatchStream,
|
||||
metrics: Option<BaselineMetrics>,
|
||||
}
|
||||
|
||||
impl RecordBatchStreamAdapter {
|
||||
pub fn try_new(stream: DfSendableRecordBatchStream) -> Result<Self> {
|
||||
let schema =
|
||||
Arc::new(Schema::try_from(stream.schema()).context(error::SchemaConversionSnafu)?);
|
||||
Ok(Self { schema, stream })
|
||||
Ok(Self {
|
||||
schema,
|
||||
stream,
|
||||
metrics: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn try_new_with_metrics(
|
||||
stream: DfSendableRecordBatchStream,
|
||||
metrics: BaselineMetrics,
|
||||
) -> Result<Self> {
|
||||
let schema =
|
||||
Arc::new(Schema::try_from(stream.schema()).context(error::SchemaConversionSnafu)?);
|
||||
Ok(Self {
|
||||
schema,
|
||||
stream,
|
||||
metrics: Some(metrics),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,6 +154,12 @@ impl Stream for RecordBatchStreamAdapter {
|
||||
type Item = Result<RecordBatch>;
|
||||
|
||||
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let timer = self
|
||||
.metrics
|
||||
.as_ref()
|
||||
.map(|m| m.elapsed_compute().clone())
|
||||
.unwrap_or_default();
|
||||
let _guard = timer.timer();
|
||||
match Pin::new(&mut self.stream).poll_next(cx) {
|
||||
Poll::Pending => Poll::Pending,
|
||||
Poll::Ready(Some(df_record_batch)) => {
|
||||
|
||||
Reference in New Issue
Block a user