refactor: add scan_to_stream() to Table trait to postpone the stream generation (#1639)

* add scan_to_stream to Table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl parquet stream

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* reorganise adapters

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* implement scan_to_stream for mito table

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* clean up

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add location info

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix: table scan

* UT pass

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* impl project record batch

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix information schema

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix clippy

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* resolve CR comments

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* remove one todo

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix errors generated by merge commit

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* add output_ordering method to record batch stream

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* fix rustfmt

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

* enhance error types

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>

---------

Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
Co-authored-by: Lei, HUANG <mrsatangel@gmail.com>
This commit is contained in:
Ruihang Xia
2023-05-29 20:03:47 +08:00
committed by GitHub
parent 0eaae634fa
commit b27c569ae0
34 changed files with 824 additions and 327 deletions

View File

@@ -10,6 +10,7 @@ bytes = "1.1"
common-base = { path = "../common/base" }
common-error = { path = "../common/error" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-time = { path = "../common/time" }
datatypes = { path = "../datatypes" }
derive_builder = "0.11"

View File

@@ -16,6 +16,7 @@ use std::collections::{HashMap, HashSet};
use common_error::ext::ErrorExt;
use common_query::logical_plan::Expr;
use common_recordbatch::OrderOption;
use datatypes::vectors::VectorRef;
use crate::storage::{ColumnDescriptor, RegionDescriptor, SequenceNumber};
@@ -38,7 +39,7 @@ pub trait WriteRequest: Send {
fn delete(&mut self, keys: HashMap<String, VectorRef>) -> Result<(), Self::Error>;
}
#[derive(Default)]
#[derive(Default, Clone, Debug)]
pub struct ScanRequest {
/// Max sequence number to read, None for latest sequence.
///
@@ -49,6 +50,13 @@ pub struct ScanRequest {
pub projection: Option<Vec<usize>>,
/// Filters pushed down
pub filters: Vec<Expr>,
/// Expected output ordering. This is only a hint and isn't guaranteed.
pub output_ordering: Option<Vec<OrderOption>>,
/// limit can be used to reduce the amount scanned
/// from the datasource as a performance optimization.
/// If set, it contains the amount of rows needed by the caller,
/// The data source should return *at least* this number of rows if available.
pub limit: Option<usize>,
}
#[derive(Debug)]