mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-16 02:02:56 +00:00
refactor: make table scan return physical plan (#326)
* refactor: return PhysicalPlan in Table trait's scan method, to support partitioned execution in Frontend's distribute read * refactor: pub use necessary DataFusion types * refactor: replace old "PhysicalPlan" and its adapters Co-authored-by: luofucong <luofucong@greptime.com> Co-authored-by: Yingwen <realevenyag@gmail.com>
This commit is contained in:
@@ -17,6 +17,7 @@ common-query = { path = "../common/query" }
|
||||
common-recordbatch = { path = "../common/recordbatch" }
|
||||
common-telemetry = { path = "../common/telemetry" }
|
||||
common-time = { path = "../common/time" }
|
||||
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2", features = ["simd"] }
|
||||
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", branch = "arrow2" }
|
||||
datatypes = { path = "../datatypes" }
|
||||
futures = "0.3"
|
||||
|
||||
@@ -426,6 +426,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use common_query::physical_plan::RuntimeEnv;
|
||||
use common_recordbatch::util;
|
||||
use datafusion_common::field_util::FieldExt;
|
||||
use datafusion_common::field_util::SchemaExt;
|
||||
@@ -520,6 +521,10 @@ mod tests {
|
||||
assert_eq!(2, table.insert(insert_req).await.unwrap());
|
||||
|
||||
let stream = table.scan(&None, &[], None).await.unwrap();
|
||||
let stream = stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
let batches = util::collect(stream).await.unwrap();
|
||||
assert_eq!(1, batches.len());
|
||||
|
||||
@@ -555,6 +560,10 @@ mod tests {
|
||||
assert_eq!(2, table.insert(insert_req).await.unwrap());
|
||||
|
||||
let stream = table.scan(&None, &[], None).await.unwrap();
|
||||
let stream = stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
let batches = util::collect(stream).await.unwrap();
|
||||
assert_eq!(1, batches.len());
|
||||
|
||||
@@ -612,6 +621,10 @@ mod tests {
|
||||
assert_eq!(2, table.insert(insert_req).await.unwrap());
|
||||
|
||||
let stream = table.scan(&None, &[], None).await.unwrap();
|
||||
let stream = stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
let batches = util::collect(stream).await.unwrap();
|
||||
assert_eq!(1, batches.len());
|
||||
assert_eq!(batches[0].df_recordbatch.num_columns(), 4);
|
||||
@@ -633,6 +646,10 @@ mod tests {
|
||||
|
||||
// Scan with projections: cpu and memory
|
||||
let stream = table.scan(&Some(vec![1, 2]), &[], None).await.unwrap();
|
||||
let stream = stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
let batches = util::collect(stream).await.unwrap();
|
||||
assert_eq!(1, batches.len());
|
||||
assert_eq!(batches[0].df_recordbatch.num_columns(), 2);
|
||||
@@ -650,6 +667,10 @@ mod tests {
|
||||
|
||||
// Scan with projections: only ts
|
||||
let stream = table.scan(&Some(vec![3]), &[], None).await.unwrap();
|
||||
let stream = stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
let batches = util::collect(stream).await.unwrap();
|
||||
assert_eq!(1, batches.len());
|
||||
assert_eq!(batches[0].df_recordbatch.num_columns(), 1);
|
||||
@@ -692,6 +713,10 @@ mod tests {
|
||||
assert_eq!(test_batch_size, table.insert(insert_req).await.unwrap());
|
||||
|
||||
let stream = table.scan(&None, &[], None).await.unwrap();
|
||||
let stream = stream
|
||||
.execute(0, Arc::new(RuntimeEnv::default()))
|
||||
.await
|
||||
.unwrap();
|
||||
let batches = util::collect(stream).await.unwrap();
|
||||
let mut total = 0;
|
||||
for batch in batches {
|
||||
|
||||
@@ -8,8 +8,9 @@ use std::sync::Arc;
|
||||
use arc_swap::ArcSwap;
|
||||
use async_trait::async_trait;
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_recordbatch::error::{Error as RecordBatchError, Result as RecordBatchResult};
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream};
|
||||
use common_telemetry::logging;
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder};
|
||||
use datatypes::vectors::VectorRef;
|
||||
@@ -25,6 +26,7 @@ use store_api::storage::{
|
||||
use table::error::{Error as TableError, MissingColumnSnafu, Result as TableResult};
|
||||
use table::metadata::{FilterPushDownType, TableInfoRef, TableMetaBuilder};
|
||||
use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, InsertRequest};
|
||||
use table::table::scan::SimpleTableScan;
|
||||
use table::{
|
||||
metadata::{TableInfo, TableType},
|
||||
table::Table,
|
||||
@@ -154,7 +156,7 @@ impl<R: Region> Table for MitoTable<R> {
|
||||
projection: &Option<Vec<usize>>,
|
||||
filters: &[Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> TableResult<SendableRecordBatchStream> {
|
||||
) -> TableResult<PhysicalPlanRef> {
|
||||
let read_ctx = ReadContext::default();
|
||||
let snapshot = self.region.snapshot(&read_ctx).map_err(TableError::new)?;
|
||||
|
||||
@@ -180,7 +182,8 @@ impl<R: Region> Table for MitoTable<R> {
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Box::pin(ChunkStream { schema, stream }))
|
||||
let stream = Box::pin(ChunkStream { schema, stream });
|
||||
Ok(Arc::new(SimpleTableScan::new(stream)))
|
||||
}
|
||||
|
||||
// Alter table changes the schemas of the table. The altering happens as cloning a new schema,
|
||||
|
||||
Reference in New Issue
Block a user