refactor: add scan_to_stream() to Table trait to postpone the stream generation (#1639)

* add scan_to_stream to Table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl parquet stream

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* reorganise adapters

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* implement scan_to_stream for mito table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add location info

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: table scan

* UT pass

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl project record batch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix information schema

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* resolve CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove one todo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix errors generated by merge commit

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add output_ordering method to record batch stream

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix rustfmt

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* enhance error types

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
This commit is contained in:
Ruihang Xia
2023-05-29 20:03:47 +08:00
committed by GitHub
parent 0eaae634fa
commit b27c569ae0
34 changed files with 824 additions and 327 deletions

View File

@@ -54,5 +54,6 @@ paste = "1.0"
rand.workspace = true
statrs = "0.16"
stats-cli = "3.0"
store-api = { path = "../store-api" }
streaming-stats = "0.2"
tokio-stream = "0.1"

View File

@@ -34,8 +34,8 @@ pub enum Error {
#[snafu(display("General catalog error: {}", source))]
Catalog {
#[snafu(backtrace)]
source: catalog::error::Error,
location: Location,
},
#[snafu(display("Catalog not found: {}", catalog))]
@@ -49,35 +49,49 @@ pub enum Error {
#[snafu(display("Failed to do vector computation, source: {}", source))]
VectorComputation {
#[snafu(backtrace)]
source: datatypes::error::Error,
location: Location,
},
#[snafu(display("Failed to create RecordBatch, source: {}", source))]
CreateRecordBatch {
#[snafu(backtrace)]
source: common_recordbatch::error::Error,
location: Location,
},
#[snafu(display("Failure during query execution, source: {}", source))]
QueryExecution { source: BoxedError },
QueryExecution {
source: BoxedError,
location: Location,
},
#[snafu(display("Failure during query planning, source: {}", source))]
QueryPlan { source: BoxedError },
QueryPlan {
source: BoxedError,
location: Location,
},
#[snafu(display("Failure during query parsing, query: {}, source: {}", query, source))]
QueryParse { query: String, source: BoxedError },
QueryParse {
query: String,
source: BoxedError,
location: Location,
},
#[snafu(display("Illegal access to catalog: {} and schema: {}", catalog, schema))]
QueryAccessDenied { catalog: String, schema: String },
QueryAccessDenied {
catalog: String,
schema: String,
location: Location,
},
#[snafu(display("The SQL string has multiple statements, query: {}", query))]
MultipleStatements { query: String, location: Location },
#[snafu(display("Failed to convert Datafusion schema: {}", source))]
ConvertDatafusionSchema {
#[snafu(backtrace)]
source: datatypes::error::Error,
location: Location,
},
#[snafu(display("Failed to parse timestamp `{}`: {}", raw, source))]
@@ -102,7 +116,7 @@ pub enum Error {
#[snafu(display("General SQL error: {}", source))]
Sql {
#[snafu(backtrace)]
location: Location,
source: sql::error::Error,
},
@@ -122,21 +136,21 @@ pub enum Error {
#[snafu(display("Failed to convert value to sql value: {}", value))]
ConvertSqlValue {
value: Value,
#[snafu(backtrace)]
source: sql::error::Error,
location: Location,
},
#[snafu(display("Failed to convert concrete type to sql type: {:?}", datatype))]
ConvertSqlType {
datatype: ConcreteDataType,
#[snafu(backtrace)]
source: sql::error::Error,
location: Location,
},
#[snafu(display("Failed to parse SQL, source: {}", source))]
ParseSql {
#[snafu(backtrace)]
source: sql::error::Error,
location: Location,
},
#[snafu(display("Missing required field: {}", name))]
@@ -150,32 +164,32 @@ pub enum Error {
#[snafu(display("Failed to build data source backend, source: {}", source))]
BuildBackend {
#[snafu(backtrace)]
source: common_datasource::error::Error,
location: Location,
},
#[snafu(display("Failed to list objects, source: {}", source))]
ListObjects {
#[snafu(backtrace)]
source: common_datasource::error::Error,
location: Location,
},
#[snafu(display("Failed to parse file format: {}", source))]
ParseFileFormat {
#[snafu(backtrace)]
source: common_datasource::error::Error,
location: Location,
},
#[snafu(display("Failed to infer schema: {}", source))]
InferSchema {
#[snafu(backtrace)]
source: common_datasource::error::Error,
location: Location,
},
#[snafu(display("Failed to convert datafusion schema, source: {}", source))]
ConvertSchema {
#[snafu(backtrace)]
source: datatypes::error::Error,
location: Location,
},
}
@@ -201,15 +215,15 @@ impl ErrorExt for Error {
ParseFileFormat { source, .. } | InferSchema { source, .. } => source.status_code(),
QueryAccessDenied { .. } => StatusCode::AccessDenied,
Catalog { source } => source.status_code(),
VectorComputation { source } | ConvertDatafusionSchema { source } => {
Catalog { source, .. } => source.status_code(),
VectorComputation { source, .. } | ConvertDatafusionSchema { source, .. } => {
source.status_code()
}
ParseSql { source } => source.status_code(),
CreateRecordBatch { source } => source.status_code(),
QueryExecution { source } | QueryPlan { source } => source.status_code(),
ParseSql { source, .. } => source.status_code(),
CreateRecordBatch { source, .. } => source.status_code(),
QueryExecution { source, .. } | QueryPlan { source, .. } => source.status_code(),
DataFusion { .. } | MissingTimestampColumn { .. } => StatusCode::Internal,
Sql { source } => source.status_code(),
Sql { source, .. } => source.status_code(),
PlanSql { .. } => StatusCode::PlanQuery,
ConvertSqlType { source, .. } | ConvertSqlValue { source, .. } => source.status_code(),
}

View File

@@ -18,13 +18,14 @@ use std::sync::Arc;
use catalog::local::{new_memory_catalog_list, MemoryCatalogProvider, MemorySchemaProvider};
use common_query::physical_plan::PhysicalPlanRef;
use common_query::prelude::Expr;
use common_recordbatch::RecordBatch;
use common_recordbatch::{RecordBatch, SendableRecordBatchStream};
use common_time::range::TimestampRange;
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::vectors::{Int64Vector, TimestampMillisecondVector};
use store_api::storage::ScanRequest;
use table::metadata::{FilterPushDownType, TableInfoRef};
use table::predicate::TimeRangePredicateBuilder;
use table::test_util::MemTable;
@@ -69,6 +70,14 @@ impl Table for MemTableWrapper {
self.inner.scan(projection, filters, limit).await
}
async fn scan_to_stream(
&self,
request: ScanRequest,
) -> table::Result<SendableRecordBatchStream> {
*self.filter.write().await = request.filters.clone();
self.inner.scan_to_stream(request).await
}
fn supports_filters_pushdown(
&self,
filters: &[&Expr],