mito2/memtable/bulk/
row_group_reader.rs1use std::sync::Arc;
16
17use bytes::Bytes;
18use parquet::arrow::ProjectionMask;
19use parquet::arrow::arrow_reader::{
20 ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReader,
21 ParquetRecordBatchReaderBuilder, RowSelection,
22};
23use parquet::file::metadata::ParquetMetaData;
24use snafu::ResultExt;
25
26use crate::error;
27use crate::error::ReadDataPartSnafu;
28use crate::memtable::bulk::chunk_reader::MemtableChunkReader;
29use crate::memtable::bulk::context::BulkIterContextRef;
30use crate::sst::parquet::DEFAULT_READ_BATCH_SIZE;
31
32pub(crate) struct MemtableRowGroupReaderBuilder {
33 projection: ProjectionMask,
34 parquet_metadata: Arc<ParquetMetaData>,
35 arrow_metadata: ArrowReaderMetadata,
36 data: Bytes,
37}
38
39impl MemtableRowGroupReaderBuilder {
40 pub(crate) fn try_new(
41 context: &BulkIterContextRef,
42 projection: ProjectionMask,
43 parquet_metadata: Arc<ParquetMetaData>,
44 data: Bytes,
45 ) -> error::Result<Self> {
46 let arrow_reader_options =
48 ArrowReaderOptions::new().with_schema(context.read_format().arrow_schema().clone());
49 let arrow_metadata =
50 ArrowReaderMetadata::try_new(parquet_metadata.clone(), arrow_reader_options)
51 .context(ReadDataPartSnafu)?;
52 Ok(Self {
53 projection,
54 parquet_metadata,
55 arrow_metadata,
56 data,
57 })
58 }
59
60 pub(crate) fn build_row_group_reader(
62 &self,
63 row_group_idx: usize,
64 row_selection: Option<RowSelection>,
65 ) -> error::Result<ParquetRecordBatchReader> {
66 let chunk_reader = MemtableChunkReader::new(self.data.clone());
67
68 let mut builder = ParquetRecordBatchReaderBuilder::new_with_metadata(
69 chunk_reader,
70 self.arrow_metadata.clone(),
71 )
72 .with_row_groups(vec![row_group_idx])
73 .with_projection(self.projection.clone())
74 .with_batch_size(DEFAULT_READ_BATCH_SIZE);
75
76 if let Some(selection) = row_selection {
77 builder = builder.with_row_selection(selection);
78 }
79
80 builder.build().context(ReadDataPartSnafu)
81 }
82
83 pub(crate) fn compute_skip_fields(
85 &self,
86 context: &BulkIterContextRef,
87 row_group_idx: usize,
88 ) -> bool {
89 use crate::sst::parquet::file_range::{PreFilterMode, row_group_contains_delete};
90
91 match context.pre_filter_mode() {
92 PreFilterMode::All => false,
93 PreFilterMode::SkipFields => true,
94 PreFilterMode::SkipFieldsOnDelete => {
95 row_group_contains_delete(&self.parquet_metadata, row_group_idx, "memtable")
97 .unwrap_or(true)
98 }
99 }
100 }
101}