Skip to main content

mito2/memtable/bulk/
context.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Context for iterating bulk memtable.
16
17use std::collections::VecDeque;
18use std::sync::Arc;
19
20use common_recordbatch::filter::SimpleFilterEvaluator;
21use mito_codec::row_converter::build_primary_key_codec;
22use parquet::file::metadata::ParquetMetaData;
23use store_api::metadata::RegionMetadataRef;
24use store_api::storage::ColumnId;
25use table::predicate::Predicate;
26
27use crate::error::Result;
28use crate::sst::parquet::file_range::{PreFilterMode, RangeBase};
29use crate::sst::parquet::flat_format::FlatReadFormat;
30use crate::sst::parquet::prefilter::CachedPrimaryKeyFilter;
31use crate::sst::parquet::reader::SimpleFilterContext;
32use crate::sst::parquet::stats::RowGroupPruningStats;
33
34pub(crate) type BulkIterContextRef = Arc<BulkIterContext>;
35
36pub struct BulkIterContext {
37    pub(crate) base: RangeBase,
38    pub(crate) predicate: Option<Predicate>,
39    /// Pre-extracted primary key filters for PK prefiltering.
40    /// `None` if PK prefiltering is not applicable.
41    pk_filters: Option<Arc<Vec<SimpleFilterEvaluator>>>,
42}
43
44impl BulkIterContext {
45    pub fn new(
46        region_metadata: RegionMetadataRef,
47        projection: Option<&[ColumnId]>,
48        predicate: Option<Predicate>,
49        skip_auto_convert: bool,
50    ) -> Result<Self> {
51        Self::new_with_pre_filter_mode(
52            region_metadata,
53            projection,
54            predicate,
55            skip_auto_convert,
56            PreFilterMode::All,
57        )
58    }
59
60    pub fn new_with_pre_filter_mode(
61        region_metadata: RegionMetadataRef,
62        projection: Option<&[ColumnId]>,
63        predicate: Option<Predicate>,
64        skip_auto_convert: bool,
65        pre_filter_mode: PreFilterMode,
66    ) -> Result<Self> {
67        let codec = build_primary_key_codec(&region_metadata);
68
69        let simple_filters: Vec<SimpleFilterContext> = predicate
70            .as_ref()
71            .iter()
72            .flat_map(|predicate| {
73                predicate
74                    .exprs()
75                    .iter()
76                    .filter_map(|expr| SimpleFilterContext::new_opt(&region_metadata, None, expr))
77            })
78            .collect();
79
80        let read_format = if let Some(column_ids) = projection {
81            FlatReadFormat::new(
82                region_metadata.clone(),
83                column_ids.iter().copied(),
84                None,
85                "memtable",
86                skip_auto_convert,
87            )?
88        } else {
89            FlatReadFormat::new(
90                region_metadata.clone(),
91                region_metadata
92                    .column_metadatas
93                    .iter()
94                    .map(|col| col.column_id),
95                None,
96                "memtable",
97                skip_auto_convert,
98            )?
99        };
100
101        let dyn_filters = predicate
102            .as_ref()
103            .map(|pred| pred.dyn_filters().as_ref().clone())
104            .unwrap_or_default();
105
106        // Pre-extract PK filters if applicable.
107        let pk_filters = Self::extract_pk_filters(&read_format, &simple_filters);
108
109        Ok(Self {
110            base: RangeBase {
111                filters: simple_filters,
112                dyn_filters,
113                read_format,
114                prune_schema: region_metadata.schema.clone(),
115                expected_metadata: Some(region_metadata),
116                codec,
117                // we don't need to compat batch since all batch in memtable have the same schema.
118                compat_batch: None,
119                compaction_projection_mapper: None,
120                pre_filter_mode,
121                partition_filter: None,
122            },
123            predicate,
124            pk_filters,
125        })
126    }
127
128    /// Prunes row groups by stats.
129    pub(crate) fn row_groups_to_read(
130        &self,
131        file_meta: &Arc<ParquetMetaData>,
132        skip_fields: bool,
133    ) -> VecDeque<usize> {
134        let region_meta = self.base.read_format.metadata();
135        let row_groups = file_meta.row_groups();
136        // expected_metadata is set to None since we always expect region metadata of memtable is up-to-date.
137        let stats =
138            RowGroupPruningStats::new(row_groups, &self.base.read_format, None, skip_fields);
139        if let Some(predicate) = self.predicate.as_ref() {
140            predicate
141                .prune_with_stats(&stats, region_meta.schema.arrow_schema())
142                .iter()
143                .zip(0..file_meta.num_row_groups())
144                .filter_map(|(selected, row_group)| {
145                    if !*selected {
146                        return None;
147                    }
148                    Some(row_group)
149                })
150                .collect::<VecDeque<_>>()
151        } else {
152            (0..file_meta.num_row_groups()).collect()
153        }
154    }
155
156    /// Extracts PK filters if flat format with dictionary-encoded PKs is used.
157    fn extract_pk_filters(
158        read_format: &FlatReadFormat,
159        filters: &[SimpleFilterContext],
160    ) -> Option<Arc<Vec<SimpleFilterEvaluator>>> {
161        if read_format.batch_has_raw_pk_columns() {
162            return None;
163        }
164        let metadata = read_format.metadata();
165        if metadata.primary_key.is_empty() {
166            return None;
167        }
168
169        let pk_filters: Vec<_> = filters
170            .iter()
171            .filter_map(|f| f.primary_key_prefilter())
172            .collect();
173        if pk_filters.is_empty() {
174            return None;
175        }
176
177        Some(Arc::new(pk_filters))
178    }
179
180    /// Builds a fresh PK filter for a new iterator. Returns `None` if PK
181    /// prefiltering is not applicable.
182    pub(crate) fn build_pk_filter(&self) -> Option<CachedPrimaryKeyFilter> {
183        let pk_filters = self.pk_filters.as_ref()?;
184        let metadata = self.base.read_format.metadata();
185        // Parquet PK prefilter always supports the partition column.
186        let inner = self
187            .base
188            .codec
189            .primary_key_filter(metadata, Arc::clone(pk_filters), false);
190        Some(CachedPrimaryKeyFilter::new(inner))
191    }
192
193    pub(crate) fn read_format(&self) -> &FlatReadFormat {
194        &self.base.read_format
195    }
196
197    /// Returns the pre-filter mode.
198    pub(crate) fn pre_filter_mode(&self) -> PreFilterMode {
199        self.base.pre_filter_mode
200    }
201
202    /// Returns the region id.
203    pub(crate) fn region_id(&self) -> store_api::storage::RegionId {
204        self.base.read_format.metadata().region_id
205    }
206}