mito2/memtable/bulk/
context.rs1use std::collections::VecDeque;
18use std::sync::Arc;
19
20use common_recordbatch::filter::SimpleFilterEvaluator;
21use mito_codec::row_converter::build_primary_key_codec;
22use parquet::file::metadata::ParquetMetaData;
23use store_api::metadata::RegionMetadataRef;
24use store_api::storage::ColumnId;
25use table::predicate::Predicate;
26
27use crate::error::Result;
28use crate::sst::parquet::file_range::{PreFilterMode, RangeBase};
29use crate::sst::parquet::flat_format::FlatReadFormat;
30use crate::sst::parquet::prefilter::{CachedPrimaryKeyFilter, build_bulk_filter_plan};
31use crate::sst::parquet::stats::RowGroupPruningStats;
32
33pub(crate) type BulkIterContextRef = Arc<BulkIterContext>;
34
35pub struct BulkIterContext {
36 pub(crate) base: RangeBase,
37 pub(crate) predicate: Option<Predicate>,
38 pk_filters: Option<Arc<Vec<SimpleFilterEvaluator>>>,
41}
42
43impl BulkIterContext {
44 pub fn new(
45 region_metadata: RegionMetadataRef,
46 projection: Option<&[ColumnId]>,
47 predicate: Option<Predicate>,
48 skip_auto_convert: bool,
49 ) -> Result<Self> {
50 Self::new_with_pre_filter_mode(
51 region_metadata,
52 projection,
53 predicate,
54 skip_auto_convert,
55 PreFilterMode::All,
56 )
57 }
58
59 pub fn new_with_pre_filter_mode(
60 region_metadata: RegionMetadataRef,
61 projection: Option<&[ColumnId]>,
62 predicate: Option<Predicate>,
63 skip_auto_convert: bool,
64 pre_filter_mode: PreFilterMode,
65 ) -> Result<Self> {
66 let codec = build_primary_key_codec(®ion_metadata);
67
68 let read_format = if let Some(column_ids) = projection {
69 FlatReadFormat::new(
70 region_metadata.clone(),
71 column_ids.iter().copied(),
72 None,
73 "memtable",
74 skip_auto_convert,
75 )?
76 } else {
77 FlatReadFormat::new(
78 region_metadata.clone(),
79 region_metadata
80 .column_metadatas
81 .iter()
82 .map(|col| col.column_id),
83 None,
84 "memtable",
85 skip_auto_convert,
86 )?
87 };
88
89 let dyn_filters = predicate
90 .as_ref()
91 .map(|pred| pred.dyn_filters().as_ref().clone())
92 .unwrap_or_default();
93
94 let filter_plan = build_bulk_filter_plan(&read_format, predicate.as_ref());
95
96 Ok(Self {
97 base: RangeBase {
98 filters: filter_plan.remaining_simple_filters,
99 dyn_filters,
100 read_format,
101 prune_schema: region_metadata.schema.clone(),
102 expected_metadata: Some(region_metadata),
103 codec,
104 compat_batch: None,
106 compaction_projection_mapper: None,
107 pre_filter_mode,
108 partition_filter: None,
109 },
110 predicate,
111 pk_filters: filter_plan.pk_filters,
112 })
113 }
114
115 pub(crate) fn row_groups_to_read(
117 &self,
118 file_meta: &Arc<ParquetMetaData>,
119 skip_fields: bool,
120 ) -> VecDeque<usize> {
121 let region_meta = self.base.read_format.metadata();
122 let row_groups = file_meta.row_groups();
123 let stats =
125 RowGroupPruningStats::new(row_groups, &self.base.read_format, None, skip_fields);
126 if let Some(predicate) = self.predicate.as_ref() {
127 predicate
128 .prune_with_stats(&stats, region_meta.schema.arrow_schema())
129 .iter()
130 .zip(0..file_meta.num_row_groups())
131 .filter_map(|(selected, row_group)| {
132 if !*selected {
133 return None;
134 }
135 Some(row_group)
136 })
137 .collect::<VecDeque<_>>()
138 } else {
139 (0..file_meta.num_row_groups()).collect()
140 }
141 }
142
143 pub(crate) fn build_pk_filter(&self) -> Option<CachedPrimaryKeyFilter> {
146 let pk_filters = self.pk_filters.as_ref()?;
147 let metadata = self.base.read_format.metadata();
148 let inner = self
150 .base
151 .codec
152 .primary_key_filter(metadata, Arc::clone(pk_filters), false);
153 Some(CachedPrimaryKeyFilter::new(inner))
154 }
155
156 pub(crate) fn read_format(&self) -> &FlatReadFormat {
157 &self.base.read_format
158 }
159
160 pub(crate) fn pre_filter_mode(&self) -> PreFilterMode {
162 self.base.pre_filter_mode
163 }
164
165 pub(crate) fn region_id(&self) -> store_api::storage::RegionId {
167 self.base.read_format.metadata().region_id
168 }
169}