feat: add orc stream (#1981)

* add orc stream #1820

* update orc stream

* fix: create orcstreamadapter with opt projection

* fix: license header

* docs: delete comment
This commit is contained in:
gobraves
2023-07-21 13:54:02 +08:00
committed by GitHub
parent 953b8a0132
commit e3ac3298b1
6 changed files with 320 additions and 13 deletions

View File

@@ -16,6 +16,7 @@ use std::sync::Arc;
use common_datasource::file_format::csv::{CsvConfigBuilder, CsvFormat, CsvOpener};
use common_datasource::file_format::json::{JsonFormat, JsonOpener};
use common_datasource::file_format::orc::{OrcFormat, OrcOpener};
use common_datasource::file_format::parquet::{DefaultParquetFileReaderFactory, ParquetFormat};
use common_datasource::file_format::Format;
use common_query::prelude::Expr;
@@ -85,6 +86,14 @@ fn build_json_opener(
))
}
fn build_orc_opener(output_schema: Arc<ArrowSchema>, config: &ScanPlanConfig) -> Result<OrcOpener> {
Ok(OrcOpener::new(
config.store.clone(),
output_schema,
config.projection.cloned(),
))
}
fn build_record_batch_stream<T: FileOpener + Send + 'static>(
opener: T,
file_schema: Arc<ArrowSchema>,
@@ -213,6 +222,22 @@ fn new_parquet_stream_with_exec_plan(
))
}
fn new_orc_stream(
_ctx: &CreateScanPlanContext,
config: &ScanPlanConfig,
_format: &OrcFormat,
) -> Result<SendableRecordBatchStream> {
let file_schema = config.file_schema.arrow_schema().clone();
let opener = build_orc_opener(file_schema.clone(), config)?;
build_record_batch_stream(
opener,
file_schema,
config.files,
config.projection,
config.limit,
)
}
#[derive(Debug, Clone)]
pub struct ScanPlanConfig<'a> {
pub file_schema: SchemaRef,
@@ -232,6 +257,6 @@ pub fn create_stream(
Format::Csv(format) => new_csv_stream(ctx, config, format),
Format::Json(format) => new_json_stream(ctx, config, format),
Format::Parquet(format) => new_parquet_stream_with_exec_plan(ctx, config, format),
_ => error::UnsupportedFormatSnafu { format: *format }.fail(),
Format::Orc(format) => new_orc_stream(ctx, config, format),
}
}