refactor: make table scan return physical plan (#326)

* refactor: return PhysicalPlan in Table trait's scan method, to support partitioned execution in Frontend's distribute read

* refactor: pub use necessary DataFusion types

* refactor: replace old "PhysicalPlan" and its adapters

Co-authored-by: luofucong <luofucong@greptime.com>
Co-authored-by: Yingwen <realevenyag@gmail.com>
This commit is contained in:
LFC
2022-10-25 11:34:53 +08:00
committed by GitHub
parent 64dac51e83
commit 2ca667cbdf
39 changed files with 920 additions and 600 deletions

View File

@@ -103,6 +103,18 @@ pub enum Error {
#[snafu(display("Illegal catalog manager state: {}", msg))]
IllegalManagerState { backtrace: Backtrace, msg: String },
#[snafu(display("Failed to scan system catalog table, source: {}", source))]
SystemCatalogTableScan {
#[snafu(backtrace)]
source: table::error::Error,
},
#[snafu(display("Failed to execute system catalog table scan, source: {}", source))]
SystemCatalogTableScanExec {
#[snafu(backtrace)]
source: common_query::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -131,7 +143,10 @@ impl ErrorExt for Error {
| Error::CreateSystemCatalog { source, .. }
| Error::InsertTableRecord { source, .. }
| Error::OpenTable { source, .. }
| Error::CreateTable { source, .. } => source.status_code(),
| Error::CreateTable { source, .. }
| Error::SystemCatalogTableScan { source } => source.status_code(),
Error::SystemCatalogTableScanExec { source } => source.status_code(),
}
}

View File

@@ -3,6 +3,8 @@ use std::collections::HashMap;
use std::sync::Arc;
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
use common_query::physical_plan::RuntimeEnv;
use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::debug;
use common_time::timestamp::Timestamp;
@@ -22,7 +24,7 @@ use crate::consts::{
SYSTEM_CATALOG_TABLE_NAME,
};
use crate::error::{
CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
self, CreateSystemCatalogSnafu, EmptyValueSnafu, Error, InvalidEntryTypeSnafu, InvalidKeySnafu,
OpenSystemCatalogSnafu, Result, ValueDeserializeSnafu,
};
@@ -51,7 +53,7 @@ impl Table for SystemCatalogTable {
_projection: &Option<Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> table::Result<SendableRecordBatchStream> {
) -> table::Result<PhysicalPlanRef> {
panic!("System catalog table does not support scan!")
}
@@ -111,7 +113,15 @@ impl SystemCatalogTable {
/// Create a stream of all entries inside system catalog table
pub async fn records(&self) -> Result<SendableRecordBatchStream> {
let full_projection = None;
let stream = self.table.scan(&full_projection, &[], None).await.unwrap();
let scan = self
.table
.scan(&full_projection, &[], None)
.await
.context(error::SystemCatalogTableScanSnafu)?;
let stream = scan
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.context(error::SystemCatalogTableScanExecSnafu)?;
Ok(stream)
}
}

View File

@@ -7,8 +7,9 @@ use std::task::{Context, Poll};
use async_stream::stream;
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
use common_recordbatch::error::Result as RecordBatchResult;
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
use common_recordbatch::{RecordBatch, RecordBatchStream};
use datatypes::prelude::{ConcreteDataType, VectorBuilder};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::value::Value;
@@ -17,6 +18,7 @@ use futures::Stream;
use snafu::ResultExt;
use table::engine::TableEngineRef;
use table::metadata::{TableId, TableInfoRef};
use table::table::scan::SimpleTableScan;
use table::{Table, TableRef};
use crate::consts::{INFORMATION_SCHEMA_NAME, SYSTEM_CATALOG_TABLE_NAME};
@@ -62,7 +64,7 @@ impl Table for Tables {
_projection: &Option<Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> table::error::Result<SendableRecordBatchStream> {
) -> table::error::Result<PhysicalPlanRef> {
let catalogs = self.catalogs.clone();
let schema_ref = self.schema.clone();
let engine_name = self.engine_name.clone();
@@ -89,10 +91,11 @@ impl Table for Tables {
}
});
Ok(Box::pin(TablesRecordBatchStream {
let stream = Box::pin(TablesRecordBatchStream {
schema: self.schema.clone(),
stream: Box::pin(stream),
}))
});
Ok(Arc::new(SimpleTableScan::new(stream)))
}
}
@@ -277,6 +280,7 @@ fn build_schema_for_tables() -> Schema {
#[cfg(test)]
mod tests {
use common_query::physical_plan::RuntimeEnv;
use datatypes::arrow::array::Utf8Array;
use datatypes::arrow::datatypes::DataType;
use futures_util::StreamExt;
@@ -298,7 +302,11 @@ mod tests {
catalog_list.register_catalog("test_catalog".to_string(), catalog_provider);
let tables = Tables::new(catalog_list, "test_engine".to_string());
let mut tables_stream = tables.scan(&None, &[], None).await.unwrap();
let tables_stream = tables.scan(&None, &[], None).await.unwrap();
let mut tables_stream = tables_stream
.execute(0, Arc::new(RuntimeEnv::default()))
.await
.unwrap();
if let Some(t) = tables_stream.next().await {
let batch = t.unwrap().df_recordbatch;
assert_eq!(1, batch.num_rows());