mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-30 20:00:36 +00:00
refactor: add scan_to_stream() to Table trait to postpone the stream generation (#1639)
* add scan_to_stream to Table Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * impl parquet stream Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * reorganise adapters Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * implement scan_to_stream for mito table Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * clean up Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add location info Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix: table scan * UT pass Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * impl project record batch Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix information schema Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix clippy Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * resolve CR comments Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * remove one todo Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix errors generated by merge commit Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add output_ordering method to record batch stream Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix rustfmt Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * enhance error types Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com> Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
This commit is contained in:
@@ -79,6 +79,9 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Duplicated call to plan execute method. table: {}", table))]
|
||||
DuplicatedExecuteCall { location: Location, table: String },
|
||||
|
||||
#[snafu(display(
|
||||
"Not allowed to remove index column {} from table {}",
|
||||
column_name,
|
||||
@@ -141,7 +144,9 @@ impl ErrorExt for Error {
|
||||
Error::RemoveColumnInIndex { .. } | Error::BuildColumnDescriptor { .. } => {
|
||||
StatusCode::InvalidArguments
|
||||
}
|
||||
Error::TablesRecordBatch { .. } => StatusCode::Unexpected,
|
||||
Error::TablesRecordBatch { .. } | Error::DuplicatedExecuteCall { .. } => {
|
||||
StatusCode::Unexpected
|
||||
}
|
||||
Error::ColumnExists { .. } => StatusCode::TableColumnExists,
|
||||
Error::SchemaBuild { source, .. } => source.status_code(),
|
||||
Error::TableOperation { source } => source.status_code(),
|
||||
|
||||
@@ -22,8 +22,9 @@ use std::sync::Arc;
|
||||
use async_trait::async_trait;
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_recordbatch::SendableRecordBatchStream;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use store_api::storage::RegionNumber;
|
||||
use store_api::storage::{RegionNumber, ScanRequest};
|
||||
|
||||
use crate::error::{Result, UnsupportedSnafu};
|
||||
use crate::metadata::{FilterPushDownType, TableId, TableInfoRef, TableType};
|
||||
@@ -73,6 +74,8 @@ pub trait Table: Send + Sync {
|
||||
limit: Option<usize>,
|
||||
) -> Result<PhysicalPlanRef>;
|
||||
|
||||
async fn scan_to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream>;
|
||||
|
||||
/// Tests whether the table provider can make use of any or all filter expressions
|
||||
/// to optimise data retrieval.
|
||||
fn supports_filters_pushdown(&self, filters: &[&Expr]) -> Result<Vec<FilterPushDownType>> {
|
||||
|
||||
@@ -13,39 +13,46 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use common_query::logical_plan::Expr;
|
||||
use common_query::physical_plan::{DfPhysicalPlanAdapter, PhysicalPlanAdapter, PhysicalPlanRef};
|
||||
use common_query::physical_plan::DfPhysicalPlanAdapter;
|
||||
use common_query::DfPhysicalPlan;
|
||||
use common_telemetry::debug;
|
||||
use common_recordbatch::OrderOption;
|
||||
use datafusion::arrow::datatypes::SchemaRef as DfSchemaRef;
|
||||
use datafusion::datasource::datasource::TableProviderFilterPushDown as DfTableProviderFilterPushDown;
|
||||
use datafusion::datasource::{TableProvider, TableType as DfTableType};
|
||||
use datafusion::error::Result as DfResult;
|
||||
use datafusion::execution::context::SessionState;
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion_expr::expr::Expr as DfExpr;
|
||||
use datatypes::schema::{SchemaRef as TableSchemaRef, SchemaRef};
|
||||
use snafu::prelude::*;
|
||||
use datafusion_physical_expr::expressions::Column;
|
||||
use datafusion_physical_expr::PhysicalSortExpr;
|
||||
use store_api::storage::ScanRequest;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
use crate::metadata::TableInfoRef;
|
||||
use crate::table::{FilterPushDownType, Table, TableRef, TableType};
|
||||
use super::scan::StreamScanAdapter;
|
||||
use crate::table::{TableRef, TableType};
|
||||
|
||||
/// Greptime Table -> datafusion TableProvider
|
||||
/// Adapt greptime's [TableRef] to DataFusion's [TableProvider].
|
||||
pub struct DfTableProviderAdapter {
|
||||
table: TableRef,
|
||||
scan_req: Arc<Mutex<ScanRequest>>,
|
||||
}
|
||||
|
||||
impl DfTableProviderAdapter {
|
||||
pub fn new(table: TableRef) -> Self {
|
||||
Self { table }
|
||||
Self {
|
||||
table,
|
||||
scan_req: Arc::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn table(&self) -> TableRef {
|
||||
self.table.clone()
|
||||
}
|
||||
|
||||
pub fn with_ordering_hint(&self, order_opts: &[OrderOption]) {
|
||||
self.scan_req.lock().unwrap().output_ordering = Some(order_opts.to_vec());
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
@@ -74,8 +81,36 @@ impl TableProvider for DfTableProviderAdapter {
|
||||
limit: Option<usize>,
|
||||
) -> DfResult<Arc<dyn DfPhysicalPlan>> {
|
||||
let filters: Vec<Expr> = filters.iter().map(Clone::clone).map(Into::into).collect();
|
||||
let inner = self.table.scan(projection, &filters, limit).await?;
|
||||
Ok(Arc::new(DfPhysicalPlanAdapter(inner)))
|
||||
let request = {
|
||||
let mut request = self.scan_req.lock().unwrap();
|
||||
request.filters = filters;
|
||||
request.projection = projection.cloned();
|
||||
request.limit = limit;
|
||||
request.clone()
|
||||
};
|
||||
let stream = self.table.scan_to_stream(request).await?;
|
||||
|
||||
// build sort physical expr
|
||||
let schema = stream.schema();
|
||||
let sort_expr = stream.output_ordering().map(|order_opts| {
|
||||
order_opts
|
||||
.iter()
|
||||
.map(|order_opt| {
|
||||
let col_name = schema.column_name_by_index(order_opt.index);
|
||||
let col_expr = Arc::new(Column::new(col_name, order_opt.index));
|
||||
PhysicalSortExpr {
|
||||
expr: col_expr,
|
||||
options: order_opt.options,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
});
|
||||
|
||||
let mut stream_adapter = StreamScanAdapter::new(stream);
|
||||
if let Some(sort_expr) = sort_expr {
|
||||
stream_adapter = stream_adapter.with_output_ordering(sort_expr);
|
||||
}
|
||||
Ok(Arc::new(DfPhysicalPlanAdapter(Arc::new(stream_adapter))))
|
||||
}
|
||||
|
||||
fn supports_filters_pushdown(
|
||||
@@ -92,100 +127,3 @@ impl TableProvider for DfTableProviderAdapter {
|
||||
.map(|v| v.into_iter().map(Into::into).collect::<Vec<_>>())?)
|
||||
}
|
||||
}
|
||||
|
||||
/// Datafusion TableProvider -> greptime Table
|
||||
pub struct TableAdapter {
|
||||
schema: TableSchemaRef,
|
||||
table_provider: Arc<dyn TableProvider>,
|
||||
}
|
||||
|
||||
impl TableAdapter {
|
||||
pub fn new(table_provider: Arc<dyn TableProvider>) -> Result<Self> {
|
||||
Ok(Self {
|
||||
schema: Arc::new(
|
||||
table_provider
|
||||
.schema()
|
||||
.try_into()
|
||||
.context(error::SchemaConversionSnafu)?,
|
||||
),
|
||||
table_provider,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl Table for TableAdapter {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> TableSchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn table_info(&self) -> TableInfoRef {
|
||||
unreachable!("Should not call table_info of TableAdaptor directly")
|
||||
}
|
||||
|
||||
fn table_type(&self) -> TableType {
|
||||
match self.table_provider.table_type() {
|
||||
DfTableType::Base => TableType::Base,
|
||||
DfTableType::View => TableType::View,
|
||||
DfTableType::Temporary => TableType::Temporary,
|
||||
}
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
&self,
|
||||
projection: Option<&Vec<usize>>,
|
||||
filters: &[Expr],
|
||||
limit: Option<usize>,
|
||||
) -> Result<PhysicalPlanRef> {
|
||||
let ctx = SessionContext::new();
|
||||
let filters: Vec<DfExpr> = filters.iter().map(|e| e.df_expr().clone()).collect();
|
||||
debug!("TableScan filter size: {}", filters.len());
|
||||
let execution_plan = self
|
||||
.table_provider
|
||||
.scan(&ctx.state(), projection, &filters, limit)
|
||||
.await
|
||||
.context(error::DatafusionSnafu)?;
|
||||
let schema: SchemaRef = Arc::new(
|
||||
execution_plan
|
||||
.schema()
|
||||
.try_into()
|
||||
.context(error::SchemaConversionSnafu)?,
|
||||
);
|
||||
Ok(Arc::new(PhysicalPlanAdapter::new(schema, execution_plan)))
|
||||
}
|
||||
|
||||
fn supports_filters_pushdown(&self, filters: &[&Expr]) -> Result<Vec<FilterPushDownType>> {
|
||||
self.table_provider
|
||||
.supports_filters_pushdown(&filters.iter().map(|x| x.df_expr()).collect::<Vec<_>>())
|
||||
.context(error::DatafusionSnafu)
|
||||
.map(|v| v.into_iter().map(Into::into).collect::<Vec<_>>())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use datafusion::arrow;
|
||||
use datafusion::datasource::empty::EmptyTable;
|
||||
|
||||
use super::*;
|
||||
use crate::metadata::TableType::Base;
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn test_table_adaptor_info() {
|
||||
let df_table = Arc::new(EmptyTable::new(Arc::new(arrow::datatypes::Schema::empty())));
|
||||
let table_adapter = TableAdapter::new(df_table).unwrap();
|
||||
let _ = table_adapter.table_info();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_table_adaptor_type() {
|
||||
let df_table = Arc::new(EmptyTable::new(Arc::new(arrow::datatypes::Schema::empty())));
|
||||
let table_adapter = TableAdapter::new(df_table).unwrap();
|
||||
assert_eq!(Base, table_adapter.table_type());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::sync::Arc;
|
||||
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_recordbatch::error::Result as RecordBatchResult;
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream};
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||
use datafusion::arrow::compute::SortOptions;
|
||||
use datafusion::arrow::record_batch::RecordBatch as DfRecordBatch;
|
||||
use datafusion_common::from_slice::FromSlice;
|
||||
@@ -29,11 +29,11 @@ use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, SchemaBuilder, SchemaRef};
|
||||
use futures::task::{Context, Poll};
|
||||
use futures::Stream;
|
||||
use store_api::storage::RegionNumber;
|
||||
use store_api::storage::{RegionNumber, ScanRequest};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::metadata::{TableId, TableInfoBuilder, TableInfoRef, TableMetaBuilder, TableType};
|
||||
use crate::table::scan::SimpleTableScan;
|
||||
use crate::table::scan::StreamScanAdapter;
|
||||
use crate::table::{Expr, Table};
|
||||
|
||||
const NUMBER_COLUMN: &str = "number";
|
||||
@@ -132,10 +132,18 @@ impl Table for NumbersTable {
|
||||
)
|
||||
.into()];
|
||||
Ok(Arc::new(
|
||||
SimpleTableScan::new(stream).with_output_ordering(output_ordering),
|
||||
StreamScanAdapter::new(stream).with_output_ordering(output_ordering),
|
||||
))
|
||||
}
|
||||
|
||||
async fn scan_to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
Ok(Box::pin(NumbersStream {
|
||||
limit: request.limit.unwrap_or(100) as u32,
|
||||
schema: self.schema.clone(),
|
||||
already_run: false,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn flush(&self, _region_number: Option<RegionNumber>, _wait: Option<bool>) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -25,22 +25,23 @@ use datafusion_physical_expr::PhysicalSortExpr;
|
||||
use datatypes::schema::SchemaRef;
|
||||
use snafu::OptionExt;
|
||||
|
||||
pub struct SimpleTableScan {
|
||||
/// Adapt greptime's [SendableRecordBatchStream] to DataFusion's [PhysicalPlan].
|
||||
pub struct StreamScanAdapter {
|
||||
stream: Mutex<Option<SendableRecordBatchStream>>,
|
||||
schema: SchemaRef,
|
||||
output_ordering: Option<Vec<PhysicalSortExpr>>,
|
||||
}
|
||||
|
||||
impl Debug for SimpleTableScan {
|
||||
impl Debug for StreamScanAdapter {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SimpleTableScan")
|
||||
f.debug_struct("StreamScanAdapter")
|
||||
.field("stream", &"<SendableRecordBatchStream>")
|
||||
.field("schema", &self.schema)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl SimpleTableScan {
|
||||
impl StreamScanAdapter {
|
||||
pub fn new(stream: SendableRecordBatchStream) -> Self {
|
||||
let schema = stream.schema();
|
||||
|
||||
@@ -57,7 +58,7 @@ impl SimpleTableScan {
|
||||
}
|
||||
}
|
||||
|
||||
impl PhysicalPlan for SimpleTableScan {
|
||||
impl PhysicalPlan for StreamScanAdapter {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
@@ -126,7 +127,7 @@ mod test {
|
||||
RecordBatches::try_new(schema.clone(), vec![batch1.clone(), batch2.clone()]).unwrap();
|
||||
let stream = recordbatches.as_stream();
|
||||
|
||||
let scan = SimpleTableScan::new(stream);
|
||||
let scan = StreamScanAdapter::new(stream);
|
||||
|
||||
assert_eq!(scan.schema(), schema);
|
||||
|
||||
|
||||
@@ -16,11 +16,12 @@ use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_recordbatch::EmptyRecordBatchStream;
|
||||
use common_recordbatch::{EmptyRecordBatchStream, SendableRecordBatchStream};
|
||||
use store_api::storage::ScanRequest;
|
||||
|
||||
use crate::metadata::{TableInfo, TableInfoBuilder, TableInfoRef, TableMetaBuilder, TableType};
|
||||
use crate::requests::{CreateTableRequest, InsertRequest};
|
||||
use crate::table::scan::SimpleTableScan;
|
||||
use crate::table::scan::StreamScanAdapter;
|
||||
use crate::{Result, Table};
|
||||
|
||||
pub struct EmptyTable {
|
||||
@@ -82,7 +83,11 @@ impl Table for EmptyTable {
|
||||
_filters: &[common_query::prelude::Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> Result<PhysicalPlanRef> {
|
||||
let scan = SimpleTableScan::new(Box::pin(EmptyRecordBatchStream::new(self.schema())));
|
||||
let scan = StreamScanAdapter::new(Box::pin(EmptyRecordBatchStream::new(self.schema())));
|
||||
Ok(Arc::new(scan))
|
||||
}
|
||||
|
||||
async fn scan_to_stream(&self, _: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
Ok(Box::pin(EmptyRecordBatchStream::new(self.schema())))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,20 +21,20 @@ use common_error::prelude::BoxedError;
|
||||
use common_query::physical_plan::PhysicalPlanRef;
|
||||
use common_query::prelude::Expr;
|
||||
use common_recordbatch::error::Result as RecordBatchResult;
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream};
|
||||
use common_recordbatch::{RecordBatch, RecordBatchStream, SendableRecordBatchStream};
|
||||
use datatypes::prelude::*;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::vectors::UInt32Vector;
|
||||
use futures::task::{Context, Poll};
|
||||
use futures::Stream;
|
||||
use snafu::prelude::*;
|
||||
use store_api::storage::RegionNumber;
|
||||
use store_api::storage::{RegionNumber, ScanRequest};
|
||||
|
||||
use crate::error::{Result, SchemaConversionSnafu, TableProjectionSnafu, TablesRecordBatchSnafu};
|
||||
use crate::metadata::{
|
||||
TableId, TableInfoBuilder, TableInfoRef, TableMetaBuilder, TableType, TableVersion,
|
||||
};
|
||||
use crate::table::scan::SimpleTableScan;
|
||||
use crate::table::scan::StreamScanAdapter;
|
||||
use crate::{ColumnStatistics, Table, TableStatistics};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -167,12 +167,43 @@ impl Table for MemTable {
|
||||
)
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?;
|
||||
Ok(Arc::new(SimpleTableScan::new(Box::pin(MemtableStream {
|
||||
Ok(Arc::new(StreamScanAdapter::new(Box::pin(MemtableStream {
|
||||
schema: recordbatch.schema.clone(),
|
||||
recordbatch: Some(recordbatch),
|
||||
}))))
|
||||
}
|
||||
|
||||
async fn scan_to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
|
||||
let df_recordbatch = if let Some(indices) = request.projection {
|
||||
self.recordbatch
|
||||
.df_record_batch()
|
||||
.project(&indices)
|
||||
.context(TableProjectionSnafu)?
|
||||
} else {
|
||||
self.recordbatch.df_record_batch().clone()
|
||||
};
|
||||
|
||||
let rows = df_recordbatch.num_rows();
|
||||
let limit = if let Some(limit) = request.limit {
|
||||
limit.min(rows)
|
||||
} else {
|
||||
rows
|
||||
};
|
||||
let df_recordbatch = df_recordbatch.slice(0, limit);
|
||||
|
||||
let recordbatch = RecordBatch::try_from_df_record_batch(
|
||||
Arc::new(Schema::try_from(df_recordbatch.schema()).context(SchemaConversionSnafu)?),
|
||||
df_recordbatch,
|
||||
)
|
||||
.map_err(BoxedError::new)
|
||||
.context(TablesRecordBatchSnafu)?;
|
||||
|
||||
Ok(Box::pin(MemtableStream {
|
||||
schema: recordbatch.schema.clone(),
|
||||
recordbatch: Some(recordbatch),
|
||||
}))
|
||||
}
|
||||
|
||||
fn statistics(&self) -> Option<TableStatistics> {
|
||||
let df_recordbatch = self.recordbatch.df_record_batch();
|
||||
let num_rows = df_recordbatch.num_rows();
|
||||
|
||||
Reference in New Issue
Block a user