mito2/memtable/bulk/
context.rs1use std::collections::VecDeque;
18use std::sync::Arc;
19
20use common_recordbatch::filter::SimpleFilterEvaluator;
21use mito_codec::row_converter::build_primary_key_codec;
22use parquet::file::metadata::ParquetMetaData;
23use store_api::metadata::RegionMetadataRef;
24use store_api::storage::ColumnId;
25use table::predicate::Predicate;
26
27use crate::error::Result;
28use crate::sst::parquet::file_range::{PreFilterMode, RangeBase};
29use crate::sst::parquet::flat_format::FlatReadFormat;
30use crate::sst::parquet::prefilter::CachedPrimaryKeyFilter;
31use crate::sst::parquet::reader::SimpleFilterContext;
32use crate::sst::parquet::stats::RowGroupPruningStats;
33
34pub(crate) type BulkIterContextRef = Arc<BulkIterContext>;
35
36pub struct BulkIterContext {
37 pub(crate) base: RangeBase,
38 pub(crate) predicate: Option<Predicate>,
39 pk_filters: Option<Arc<Vec<SimpleFilterEvaluator>>>,
42}
43
44impl BulkIterContext {
45 pub fn new(
46 region_metadata: RegionMetadataRef,
47 projection: Option<&[ColumnId]>,
48 predicate: Option<Predicate>,
49 skip_auto_convert: bool,
50 ) -> Result<Self> {
51 Self::new_with_pre_filter_mode(
52 region_metadata,
53 projection,
54 predicate,
55 skip_auto_convert,
56 PreFilterMode::All,
57 )
58 }
59
60 pub fn new_with_pre_filter_mode(
61 region_metadata: RegionMetadataRef,
62 projection: Option<&[ColumnId]>,
63 predicate: Option<Predicate>,
64 skip_auto_convert: bool,
65 pre_filter_mode: PreFilterMode,
66 ) -> Result<Self> {
67 let codec = build_primary_key_codec(®ion_metadata);
68
69 let simple_filters: Vec<SimpleFilterContext> = predicate
70 .as_ref()
71 .iter()
72 .flat_map(|predicate| {
73 predicate
74 .exprs()
75 .iter()
76 .filter_map(|expr| SimpleFilterContext::new_opt(®ion_metadata, None, expr))
77 })
78 .collect();
79
80 let read_format = if let Some(column_ids) = projection {
81 FlatReadFormat::new(
82 region_metadata.clone(),
83 column_ids.iter().copied(),
84 None,
85 "memtable",
86 skip_auto_convert,
87 )?
88 } else {
89 FlatReadFormat::new(
90 region_metadata.clone(),
91 region_metadata
92 .column_metadatas
93 .iter()
94 .map(|col| col.column_id),
95 None,
96 "memtable",
97 skip_auto_convert,
98 )?
99 };
100
101 let dyn_filters = predicate
102 .as_ref()
103 .map(|pred| pred.dyn_filters().as_ref().clone())
104 .unwrap_or_default();
105
106 let pk_filters = Self::extract_pk_filters(&read_format, &simple_filters);
108
109 Ok(Self {
110 base: RangeBase {
111 filters: simple_filters,
112 dyn_filters,
113 read_format,
114 prune_schema: region_metadata.schema.clone(),
115 expected_metadata: Some(region_metadata),
116 codec,
117 compat_batch: None,
119 compaction_projection_mapper: None,
120 pre_filter_mode,
121 partition_filter: None,
122 },
123 predicate,
124 pk_filters,
125 })
126 }
127
128 pub(crate) fn row_groups_to_read(
130 &self,
131 file_meta: &Arc<ParquetMetaData>,
132 skip_fields: bool,
133 ) -> VecDeque<usize> {
134 let region_meta = self.base.read_format.metadata();
135 let row_groups = file_meta.row_groups();
136 let stats =
138 RowGroupPruningStats::new(row_groups, &self.base.read_format, None, skip_fields);
139 if let Some(predicate) = self.predicate.as_ref() {
140 predicate
141 .prune_with_stats(&stats, region_meta.schema.arrow_schema())
142 .iter()
143 .zip(0..file_meta.num_row_groups())
144 .filter_map(|(selected, row_group)| {
145 if !*selected {
146 return None;
147 }
148 Some(row_group)
149 })
150 .collect::<VecDeque<_>>()
151 } else {
152 (0..file_meta.num_row_groups()).collect()
153 }
154 }
155
156 fn extract_pk_filters(
158 read_format: &FlatReadFormat,
159 filters: &[SimpleFilterContext],
160 ) -> Option<Arc<Vec<SimpleFilterEvaluator>>> {
161 if read_format.batch_has_raw_pk_columns() {
162 return None;
163 }
164 let metadata = read_format.metadata();
165 if metadata.primary_key.is_empty() {
166 return None;
167 }
168
169 let pk_filters: Vec<_> = filters
170 .iter()
171 .filter_map(|f| f.primary_key_prefilter())
172 .collect();
173 if pk_filters.is_empty() {
174 return None;
175 }
176
177 Some(Arc::new(pk_filters))
178 }
179
180 pub(crate) fn build_pk_filter(&self) -> Option<CachedPrimaryKeyFilter> {
183 let pk_filters = self.pk_filters.as_ref()?;
184 let metadata = self.base.read_format.metadata();
185 let inner = self
187 .base
188 .codec
189 .primary_key_filter(metadata, Arc::clone(pk_filters), false);
190 Some(CachedPrimaryKeyFilter::new(inner))
191 }
192
193 pub(crate) fn read_format(&self) -> &FlatReadFormat {
194 &self.base.read_format
195 }
196
197 pub(crate) fn pre_filter_mode(&self) -> PreFilterMode {
199 self.base.pre_filter_mode
200 }
201
202 pub(crate) fn region_id(&self) -> store_api::storage::RegionId {
204 self.base.read_format.metadata().region_id
205 }
206}