mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-07 22:02:56 +00:00
refactor: Remove PhysicalPlan trait and use ExecutionPlan directly (#3894)
* refactor: remove PhysicalPlan * refactor: remove physical_plan mod * refactor: import * fix merge error Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: Ruihang Xia <waynestxia@gmail.com>
This commit is contained in:
@@ -16,20 +16,19 @@ use std::any::Any;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::DfPhysicalPlanAdapter;
|
||||
use common_query::DfPhysicalPlan;
|
||||
use common_recordbatch::OrderOption;
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::datasource::{TableProvider, TableType as DfTableType};
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion::execution::context::SessionState;
|
||||
use datafusion::physical_plan::ExecutionPlan;
|
||||
use datafusion_expr::expr::Expr as DfExpr;
|
||||
use datafusion_expr::TableProviderFilterPushDown as DfTableProviderFilterPushDown;
|
||||
use datafusion_physical_expr::expressions::Column;
|
||||
use datafusion_physical_expr::PhysicalSortExpr;
|
||||
use store_api::storage::ScanRequest;
|
||||
|
||||
use super::scan::StreamScanAdapter;
|
||||
use crate::table::scan::StreamScanAdapter;
|
||||
use crate::table::{TableRef, TableType};
|
||||
|
||||
/// Adapt greptime's [TableRef] to DataFusion's [TableProvider].
|
||||
@@ -84,7 +83,7 @@ impl TableProvider for DfTableProviderAdapter {
|
||||
projection: Option<&Vec<usize>>,
|
||||
filters: &[DfExpr],
|
||||
limit: Option<usize>,
|
||||
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
|
||||
) -> DfResult<Arc<dyn ExecutionPlan>> {
|
||||
let filters: Vec<Expr> = filters.iter().map(Clone::clone).map(Into::into).collect();
|
||||
let request = {
|
||||
let mut request = self.scan_req.lock().unwrap();
|
||||
@@ -115,7 +114,7 @@ impl TableProvider for DfTableProviderAdapter {
|
||||
if let Some(sort_expr) = sort_expr {
|
||||
stream_adapter = stream_adapter.with_output_ordering(sort_expr);
|
||||
}
|
||||
Ok(Arc::new(DfPhysicalPlanAdapter(Arc::new(stream_adapter))))
|
||||
Ok(Arc::new(stream_adapter))
|
||||
}
|
||||
|
||||
fn supports_filters_pushdown(
|
||||
|
||||
@@ -13,30 +13,32 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::pin::Pin;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::task::{Context, Poll};
|
||||
|
||||
use common_query::error as query_error;
|
||||
use common_query::error::Result as QueryResult;
|
||||
use common_query::physical_plan::{Partitioning, PhysicalPlan, PhysicalPlanRef};
|
||||
use common_recordbatch::adapter::RecordBatchMetrics;
|
||||
use common_recordbatch::error::Result as RecordBatchResult;
|
||||
use common_recordbatch::{OrderOption, RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||
use common_query::error::ExecuteRepeatedlySnafu;
|
||||
use common_recordbatch::{DfRecordBatch, DfSendableRecordBatchStream, SendableRecordBatchStream};
|
||||
use common_telemetry::tracing::Span;
|
||||
use common_telemetry::tracing_context::TracingContext;
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion::execution::context::TaskContext;
|
||||
use datafusion::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
|
||||
use datafusion::physical_plan::{ExecutionMode, PlanProperties};
|
||||
use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortExpr};
|
||||
use datafusion::physical_plan::{
|
||||
DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties,
|
||||
RecordBatchStream as DfRecordBatchStream,
|
||||
};
|
||||
use datafusion_common::DataFusionError;
|
||||
use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalSortExpr};
|
||||
use datatypes::arrow::datatypes::SchemaRef as ArrowSchemaRef;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use futures::{Stream, StreamExt};
|
||||
use snafu::OptionExt;
|
||||
|
||||
use crate::table::metrics::MemoryUsageMetrics;
|
||||
|
||||
/// Adapt greptime's [SendableRecordBatchStream] to GreptimeDB's [PhysicalPlan].
|
||||
/// Adapt greptime's [SendableRecordBatchStream] to [ExecutionPlan].
|
||||
pub struct StreamScanAdapter {
|
||||
stream: Mutex<Option<SendableRecordBatchStream>>,
|
||||
schema: SchemaRef,
|
||||
@@ -76,39 +78,40 @@ impl StreamScanAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
impl PhysicalPlan for StreamScanAdapter {
|
||||
impl ExecutionPlan for StreamScanAdapter {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
fn schema(&self) -> ArrowSchemaRef {
|
||||
self.schema.arrow_schema().clone()
|
||||
}
|
||||
|
||||
fn properties(&self) -> &PlanProperties {
|
||||
&self.properties
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<PhysicalPlanRef> {
|
||||
fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
fn with_new_children(&self, _children: Vec<PhysicalPlanRef>) -> QueryResult<PhysicalPlanRef> {
|
||||
Ok(Arc::new(Self::new(
|
||||
self.stream.lock().unwrap().take().unwrap(),
|
||||
)))
|
||||
fn with_new_children(
|
||||
self: Arc<Self>,
|
||||
_children: Vec<Arc<dyn ExecutionPlan>>,
|
||||
) -> DfResult<Arc<dyn ExecutionPlan>> {
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
) -> QueryResult<SendableRecordBatchStream> {
|
||||
) -> DfResult<DfSendableRecordBatchStream> {
|
||||
let tracing_context = TracingContext::from_json(context.session_id().as_str());
|
||||
let span = tracing_context.attach(common_telemetry::tracing::info_span!("stream_adapter"));
|
||||
|
||||
let mut stream = self.stream.lock().unwrap();
|
||||
let stream = stream.take().context(query_error::ExecuteRepeatedlySnafu)?;
|
||||
let stream = stream.take().context(ExecuteRepeatedlySnafu)?;
|
||||
let mem_usage_metrics = MemoryUsageMetrics::new(&self.metric, partition);
|
||||
Ok(Box::pin(StreamWithMetricWrapper {
|
||||
stream,
|
||||
@@ -122,6 +125,12 @@ impl PhysicalPlan for StreamScanAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for StreamScanAdapter {
|
||||
fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{:?}", self)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StreamWithMetricWrapper {
|
||||
stream: SendableRecordBatchStream,
|
||||
metric: MemoryUsageMetrics,
|
||||
@@ -129,49 +138,51 @@ pub struct StreamWithMetricWrapper {
|
||||
}
|
||||
|
||||
impl Stream for StreamWithMetricWrapper {
|
||||
type Item = RecordBatchResult<RecordBatch>;
|
||||
type Item = DfResult<DfRecordBatch>;
|
||||
|
||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
|
||||
let this = self.get_mut();
|
||||
let _enter = this.span.enter();
|
||||
let poll = this.stream.poll_next_unpin(cx);
|
||||
if let Poll::Ready(Some(Ok(record_batch))) = &poll {
|
||||
let batch_mem_size = record_batch
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|vec_ref| vec_ref.memory_size())
|
||||
.sum::<usize>();
|
||||
// we don't record elapsed time here
|
||||
// since it's calling storage api involving I/O ops
|
||||
this.metric.record_mem_usage(batch_mem_size);
|
||||
this.metric.record_output(record_batch.num_rows());
|
||||
match this.stream.poll_next_unpin(cx) {
|
||||
Poll::Ready(Some(result)) => match result {
|
||||
Ok(record_batch) => {
|
||||
let batch_mem_size = record_batch
|
||||
.columns()
|
||||
.iter()
|
||||
.map(|vec_ref| vec_ref.memory_size())
|
||||
.sum::<usize>();
|
||||
// we don't record elapsed time here
|
||||
// since it's calling storage api involving I/O ops
|
||||
this.metric.record_mem_usage(batch_mem_size);
|
||||
this.metric.record_output(record_batch.num_rows());
|
||||
Poll::Ready(Some(Ok(record_batch.into_df_record_batch())))
|
||||
}
|
||||
Err(e) => Poll::Ready(Some(Err(DataFusionError::External(Box::new(e))))),
|
||||
},
|
||||
Poll::Ready(None) => Poll::Ready(None),
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
|
||||
poll
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.stream.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl RecordBatchStream for StreamWithMetricWrapper {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.stream.schema()
|
||||
}
|
||||
|
||||
fn metrics(&self) -> Option<RecordBatchMetrics> {
|
||||
self.stream.metrics()
|
||||
}
|
||||
|
||||
fn output_ordering(&self) -> Option<&[OrderOption]> {
|
||||
self.stream.output_ordering()
|
||||
impl DfRecordBatchStream for StreamWithMetricWrapper {
|
||||
fn schema(&self) -> ArrowSchemaRef {
|
||||
self.stream.schema().arrow_schema().clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use common_recordbatch::{util, RecordBatch, RecordBatches};
|
||||
use common_recordbatch::{RecordBatch, RecordBatches};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema};
|
||||
use datatypes::vectors::Int32Vector;
|
||||
use futures::TryStreamExt;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -211,9 +222,9 @@ mod test {
|
||||
assert_eq!(actual, schema);
|
||||
|
||||
let stream = scan.execute(0, ctx.task_ctx()).unwrap();
|
||||
let recordbatches = util::collect(stream).await.unwrap();
|
||||
assert_eq!(recordbatches[0], batch1);
|
||||
assert_eq!(recordbatches[1], batch2);
|
||||
let recordbatches = stream.try_collect::<Vec<_>>().await.unwrap();
|
||||
assert_eq!(batch1.df_record_batch(), &recordbatches[0]);
|
||||
assert_eq!(batch2.df_record_batch(), &recordbatches[1]);
|
||||
|
||||
let result = scan.execute(0, ctx.task_ctx());
|
||||
assert!(result.is_err());
|
||||
|
||||
Reference in New Issue
Block a user