mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-29 19:30:37 +00:00
feat: add orc stream (#1981)
* add orc stream #1820 * update orc stream * fix: create orcstreamadapter with opt projection * fix: license header * docs: delete comment
This commit is contained in:
@@ -16,6 +16,7 @@ use std::sync::Arc;
|
||||
|
||||
use common_datasource::file_format::csv::{CsvConfigBuilder, CsvFormat, CsvOpener};
|
||||
use common_datasource::file_format::json::{JsonFormat, JsonOpener};
|
||||
use common_datasource::file_format::orc::{OrcFormat, OrcOpener};
|
||||
use common_datasource::file_format::parquet::{DefaultParquetFileReaderFactory, ParquetFormat};
|
||||
use common_datasource::file_format::Format;
|
||||
use common_query::prelude::Expr;
|
||||
@@ -85,6 +86,14 @@ fn build_json_opener(
|
||||
))
|
||||
}
|
||||
|
||||
fn build_orc_opener(output_schema: Arc<ArrowSchema>, config: &ScanPlanConfig) -> Result<OrcOpener> {
|
||||
Ok(OrcOpener::new(
|
||||
config.store.clone(),
|
||||
output_schema,
|
||||
config.projection.cloned(),
|
||||
))
|
||||
}
|
||||
|
||||
fn build_record_batch_stream<T: FileOpener + Send + 'static>(
|
||||
opener: T,
|
||||
file_schema: Arc<ArrowSchema>,
|
||||
@@ -213,6 +222,22 @@ fn new_parquet_stream_with_exec_plan(
|
||||
))
|
||||
}
|
||||
|
||||
fn new_orc_stream(
|
||||
_ctx: &CreateScanPlanContext,
|
||||
config: &ScanPlanConfig,
|
||||
_format: &OrcFormat,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let file_schema = config.file_schema.arrow_schema().clone();
|
||||
let opener = build_orc_opener(file_schema.clone(), config)?;
|
||||
build_record_batch_stream(
|
||||
opener,
|
||||
file_schema,
|
||||
config.files,
|
||||
config.projection,
|
||||
config.limit,
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ScanPlanConfig<'a> {
|
||||
pub file_schema: SchemaRef,
|
||||
@@ -232,6 +257,6 @@ pub fn create_stream(
|
||||
Format::Csv(format) => new_csv_stream(ctx, config, format),
|
||||
Format::Json(format) => new_json_stream(ctx, config, format),
|
||||
Format::Parquet(format) => new_parquet_stream_with_exec_plan(ctx, config, format),
|
||||
_ => error::UnsupportedFormatSnafu { format: *format }.fail(),
|
||||
Format::Orc(format) => new_orc_stream(ctx, config, format),
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user