refactor: make table scan return physical plan (#326)

* refactor: return PhysicalPlan in Table trait's scan method, to support partitioned execution in Frontend's distribute read

* refactor: pub use necessary DataFusion types

* refactor: replace old "PhysicalPlan" and its adapters

Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
This commit is contained in:
LFC
2022-10-25 11:34:53 +08:00
committed by GitHub
parent 64dac51e83
commit 2ca667cbdf
39 changed files with 920 additions and 600 deletions

View File

@@ -17,6 +17,7 @@ common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
datatypes = { path = "../datatypes" }
futures = "0.3"

View File

@@ -426,6 +426,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
#[cfg(test)]
mod tests {
use common_query::physical_plan::RuntimeEnv;
use common_recordbatch::util;
use datafusion_common::field_util::FieldExt;
use datafusion_common::field_util::SchemaExt;
@@ -520,6 +521,10 @@ mod tests {
assert_eq!(2, table.insert(insert_req).await.unwrap());
let stream = table.scan(&None, &[], None).await.unwrap();
let stream = stream
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
let batches = util::collect(stream).await.unwrap();
assert_eq!(1, batches.len());
@@ -555,6 +560,10 @@ mod tests {
assert_eq!(2, table.insert(insert_req).await.unwrap());
let stream = table.scan(&None, &[], None).await.unwrap();
let stream = stream
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
let batches = util::collect(stream).await.unwrap();
assert_eq!(1, batches.len());
@@ -612,6 +621,10 @@ mod tests {
assert_eq!(2, table.insert(insert_req).await.unwrap());
let stream = table.scan(&None, &[], None).await.unwrap();
let stream = stream
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
let batches = util::collect(stream).await.unwrap();
assert_eq!(1, batches.len());
assert_eq!(batches[0].df_recordbatch.num_columns(), 4);
@@ -633,6 +646,10 @@ mod tests {
// Scan with projections: cpu and memory
let stream = table.scan(&Some(vec![1, 2]), &[], None).await.unwrap();
let stream = stream
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
let batches = util::collect(stream).await.unwrap();
assert_eq!(1, batches.len());
assert_eq!(batches[0].df_recordbatch.num_columns(), 2);
@@ -650,6 +667,10 @@ mod tests {
// Scan with projections: only ts
let stream = table.scan(&Some(vec![3]), &[], None).await.unwrap();
let stream = stream
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
let batches = util::collect(stream).await.unwrap();
assert_eq!(1, batches.len());
assert_eq!(batches[0].df_recordbatch.num_columns(), 1);
@@ -692,6 +713,10 @@ mod tests {
assert_eq!(test_batch_size, table.insert(insert_req).await.unwrap());
let stream = table.scan(&None, &[], None).await.unwrap();
let stream = stream
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
let batches = util::collect(stream).await.unwrap();
let mut total = 0;
for batch in batches {

View File

@@ -8,8 +8,9 @@ use std::sync::Arc;
use arc_swap::ArcSwap;
use async_trait::async_trait;
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
use common_recordbatch::error::{Error as RecordBatchError, Result as RecordBatchResult};
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use common_recordbatch::{RecordBatch, RecordBatchStream};
use common_telemetry::logging;
use datatypes::schema::{ColumnSchema, SchemaBuilder};
use datatypes::vectors::VectorRef;
@@ -25,6 +26,7 @@ use store_api::storage::{
use table::error::{Error as TableError, MissingColumnSnafu, Result as TableResult};
use table::metadata::{FilterPushDownType, TableInfoRef, TableMetaBuilder};
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequest};
use table::table::scan::SimpleTableScan;
use table::{
metadata::{TableInfo, TableType},
table::Table,
@@ -154,7 +156,7 @@ impl<R: Region> Table for MitoTable<R> {
projection: &Option<Vec<usize>>,
filters: &[Expr],
_limit: Option<usize>,
) -> TableResult<SendableRecordBatchStream> {
) -> TableResult<PhysicalPlanRef> {
let read_ctx = ReadContext::default();
let snapshot = self.region.snapshot(&read_ctx).map_err(TableError::new)?;
@@ -180,7 +182,8 @@ impl<R: Region> Table for MitoTable<R> {
}
});
Ok(Box::pin(ChunkStream { schema, stream }))
let stream = Box::pin(ChunkStream { schema, stream });
Ok(Arc::new(SimpleTableScan::new(stream)))
}
// Alter table changes the schemas of the table. The altering happens as cloning a new schema,