Skip to main content

mito2/
memtable.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Memtables are write buffers for regions.
16
17use std::collections::BTreeMap;
18use std::fmt;
19use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
20use std::sync::{Arc, Mutex};
21use std::time::Duration;
22
23pub use bulk::part::EncodedBulkPart;
24use bytes::Bytes;
25use common_time::Timestamp;
26use datatypes::arrow::datatypes::SchemaRef;
27use datatypes::arrow::record_batch::RecordBatch;
28use mito_codec::key_values::KeyValue;
29pub use mito_codec::key_values::KeyValues;
30use mito_codec::row_converter::{PrimaryKeyCodec, build_primary_key_codec};
31use snafu::ensure;
32use store_api::codec::PrimaryKeyEncoding;
33use store_api::metadata::RegionMetadataRef;
34use store_api::storage::{ColumnId, SequenceNumber, SequenceRange};
35
36use crate::config::MitoConfig;
37use crate::error::{Result, UnsupportedOperationSnafu};
38use crate::flush::WriteBufferManagerRef;
39use crate::memtable::bulk::{BulkMemtableBuilder, CompactDispatcher};
40use crate::memtable::time_series::TimeSeriesMemtableBuilder;
41use crate::metrics::WRITE_BUFFER_BYTES;
42use crate::read::Batch;
43use crate::read::batch_adapter::BatchToRecordBatchAdapter;
44use crate::read::prune::PruneTimeIterator;
45use crate::read::scan_region::PredicateGroup;
46use crate::region::options::{MemtableOptions, MergeMode, RegionOptions};
47use crate::sst::FormatType;
48use crate::sst::file::FileTimeRange;
49use crate::sst::parquet::SstInfo;
50use crate::sst::parquet::file_range::PreFilterMode;
51
52mod builder;
53pub mod bulk;
54pub mod simple_bulk_memtable;
55mod stats;
56pub mod time_partition;
57pub mod time_series;
58pub(crate) mod version;
59
60pub use bulk::part::{
61    BulkPart, BulkPartEncoder, BulkPartMeta, UnorderedPart, record_batch_estimated_size,
62    sort_primary_key_record_batch,
63};
64#[cfg(any(test, feature = "test"))]
65pub use time_partition::filter_record_batch;
66
67/// Id for memtables.
68///
69/// Should be unique under the same region.
70pub type MemtableId = u32;
71
72/// Options for querying ranges from a memtable.
73#[derive(Clone)]
74pub struct RangesOptions {
75    /// Whether the ranges are being queried for flush.
76    pub for_flush: bool,
77    /// Mode to pre-filter columns in ranges.
78    pub pre_filter_mode: PreFilterMode,
79    /// Predicate to filter the data.
80    pub predicate: PredicateGroup,
81    /// Sequence range to filter the data.
82    pub sequence: Option<SequenceRange>,
83}
84
85impl Default for RangesOptions {
86    fn default() -> Self {
87        Self {
88            for_flush: false,
89            pre_filter_mode: PreFilterMode::All,
90            predicate: PredicateGroup::default(),
91            sequence: None,
92        }
93    }
94}
95
96impl RangesOptions {
97    /// Creates a new [RangesOptions] for flushing.
98    pub fn for_flush() -> Self {
99        Self {
100            for_flush: true,
101            pre_filter_mode: PreFilterMode::All,
102            predicate: PredicateGroup::default(),
103            sequence: None,
104        }
105    }
106
107    /// Sets the pre-filter mode.
108    #[must_use]
109    pub fn with_pre_filter_mode(mut self, pre_filter_mode: PreFilterMode) -> Self {
110        self.pre_filter_mode = pre_filter_mode;
111        self
112    }
113
114    /// Sets the predicate.
115    #[must_use]
116    pub fn with_predicate(mut self, predicate: PredicateGroup) -> Self {
117        self.predicate = predicate;
118        self
119    }
120
121    /// Sets the sequence range.
122    #[must_use]
123    pub fn with_sequence(mut self, sequence: Option<SequenceRange>) -> Self {
124        self.sequence = sequence;
125        self
126    }
127}
128
129#[derive(Debug, Default, Clone)]
130pub struct MemtableStats {
131    /// The estimated bytes allocated by this memtable from heap.
132    pub estimated_bytes: usize,
133    /// The inclusive time range that this memtable contains. It is None if
134    /// and only if the memtable is empty.
135    pub time_range: Option<(Timestamp, Timestamp)>,
136    /// Total rows in memtable
137    pub num_rows: usize,
138    /// Total number of ranges in the memtable.
139    pub num_ranges: usize,
140    /// The maximum sequence number in the memtable.
141    pub max_sequence: SequenceNumber,
142    /// Number of estimated timeseries in memtable.
143    pub series_count: usize,
144}
145
146impl MemtableStats {
147    /// Attaches the time range to the stats.
148    #[cfg(any(test, feature = "test"))]
149    pub fn with_time_range(mut self, time_range: Option<(Timestamp, Timestamp)>) -> Self {
150        self.time_range = time_range;
151        self
152    }
153
154    #[cfg(feature = "test")]
155    pub fn with_max_sequence(mut self, max_sequence: SequenceNumber) -> Self {
156        self.max_sequence = max_sequence;
157        self
158    }
159
160    /// Returns the estimated bytes allocated by this memtable.
161    pub fn bytes_allocated(&self) -> usize {
162        self.estimated_bytes
163    }
164
165    /// Returns the time range of the memtable.
166    pub fn time_range(&self) -> Option<(Timestamp, Timestamp)> {
167        self.time_range
168    }
169
170    /// Returns the num of total rows in memtable.
171    pub fn num_rows(&self) -> usize {
172        self.num_rows
173    }
174
175    /// Returns the number of ranges in the memtable.
176    pub fn num_ranges(&self) -> usize {
177        self.num_ranges
178    }
179
180    /// Returns the maximum sequence number in the memtable.
181    pub fn max_sequence(&self) -> SequenceNumber {
182        self.max_sequence
183    }
184
185    /// Series count in memtable.
186    pub fn series_count(&self) -> usize {
187        self.series_count
188    }
189}
190
191pub type BoxedBatchIterator = Box<dyn Iterator<Item = Result<Batch>> + Send>;
192
193pub type BoxedRecordBatchIterator = Box<dyn Iterator<Item = Result<RecordBatch>> + Send>;
194
195/// Ranges in a memtable.
196#[derive(Default)]
197pub struct MemtableRanges {
198    /// Range IDs and ranges.
199    pub ranges: BTreeMap<usize, MemtableRange>,
200}
201
202impl MemtableRanges {
203    /// Returns the total number of rows across all ranges.
204    pub fn num_rows(&self) -> usize {
205        self.ranges.values().map(|r| r.stats().num_rows()).sum()
206    }
207
208    /// Returns the total series count across all ranges.
209    pub fn series_count(&self) -> usize {
210        self.ranges.values().map(|r| r.stats().series_count()).sum()
211    }
212
213    /// Returns the maximum sequence number across all ranges.
214    pub fn max_sequence(&self) -> SequenceNumber {
215        self.ranges
216            .values()
217            .map(|r| r.stats().max_sequence())
218            .max()
219            .unwrap_or(0)
220    }
221}
222
223impl IterBuilder for MemtableRanges {
224    fn build(&self, _metrics: Option<MemScanMetrics>) -> Result<BoxedBatchIterator> {
225        ensure!(
226            self.ranges.len() == 1,
227            UnsupportedOperationSnafu {
228                err_msg: format!(
229                    "Building an iterator from MemtableRanges expects 1 range, but got {}",
230                    self.ranges.len()
231                ),
232            }
233        );
234
235        self.ranges.values().next().unwrap().build_iter()
236    }
237
238    fn is_record_batch(&self) -> bool {
239        self.ranges.values().all(|range| range.is_record_batch())
240    }
241}
242
243/// In memory write buffer.
244pub trait Memtable: Send + Sync + fmt::Debug {
245    /// Returns the id of this memtable.
246    fn id(&self) -> MemtableId;
247
248    /// Writes key values into the memtable.
249    fn write(&self, kvs: &KeyValues) -> Result<()>;
250
251    /// Writes one key value pair into the memtable.
252    fn write_one(&self, key_value: KeyValue) -> Result<()>;
253
254    /// Writes an encoded batch of into memtable.
255    fn write_bulk(&self, part: crate::memtable::bulk::part::BulkPart) -> Result<()>;
256
257    /// Returns the ranges in the memtable.
258    ///
259    /// The returned map contains the range id and the range after applying the predicate.
260    fn ranges(
261        &self,
262        projection: Option<&[ColumnId]>,
263        options: RangesOptions,
264    ) -> Result<MemtableRanges>;
265
266    /// Returns true if the memtable is empty.
267    fn is_empty(&self) -> bool;
268
269    /// Turns a mutable memtable into an immutable memtable.
270    fn freeze(&self) -> Result<()>;
271
272    /// Returns the [MemtableStats] info of Memtable.
273    fn stats(&self) -> MemtableStats;
274
275    /// Forks this (immutable) memtable and returns a new mutable memtable with specific memtable `id`.
276    ///
277    /// A region must freeze the memtable before invoking this method.
278    fn fork(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef;
279
280    /// Compacts the memtable.
281    ///
282    /// The `for_flush` is true when the flush job calls this method.
283    fn compact(&self, for_flush: bool) -> Result<()> {
284        let _ = for_flush;
285        Ok(())
286    }
287}
288
289pub type MemtableRef = Arc<dyn Memtable>;
290
291/// Builder to build a new [Memtable].
292pub trait MemtableBuilder: Send + Sync + fmt::Debug {
293    /// Builds a new memtable instance.
294    fn build(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef;
295
296    /// Returns true if the memtable supports bulk insert and benefits from it.
297    fn use_bulk_insert(&self, metadata: &RegionMetadataRef) -> bool {
298        let _metadata = metadata;
299        false
300    }
301}
302
303pub type MemtableBuilderRef = Arc<dyn MemtableBuilder>;
304
305/// Memtable memory allocation tracker.
306#[derive(Default)]
307pub struct AllocTracker {
308    write_buffer_manager: Option<WriteBufferManagerRef>,
309    /// Bytes allocated by the tracker.
310    bytes_allocated: AtomicUsize,
311    /// Whether allocating is done.
312    is_done_allocating: AtomicBool,
313}
314
315impl fmt::Debug for AllocTracker {
316    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
317        f.debug_struct("AllocTracker")
318            .field("bytes_allocated", &self.bytes_allocated)
319            .field("is_done_allocating", &self.is_done_allocating)
320            .finish()
321    }
322}
323
324impl AllocTracker {
325    /// Returns a new [AllocTracker].
326    pub fn new(write_buffer_manager: Option<WriteBufferManagerRef>) -> AllocTracker {
327        AllocTracker {
328            write_buffer_manager,
329            bytes_allocated: AtomicUsize::new(0),
330            is_done_allocating: AtomicBool::new(false),
331        }
332    }
333
334    /// Tracks `bytes` memory is allocated.
335    pub(crate) fn on_allocation(&self, bytes: usize) {
336        self.bytes_allocated.fetch_add(bytes, Ordering::Relaxed);
337        WRITE_BUFFER_BYTES.add(bytes as i64);
338        if let Some(write_buffer_manager) = &self.write_buffer_manager {
339            write_buffer_manager.reserve_mem(bytes);
340        }
341    }
342
343    /// Marks we have finished allocating memory so we can free it from
344    /// the write buffer's limit.
345    ///
346    /// The region MUST ensure that it calls this method inside the region writer's write lock.
347    pub(crate) fn done_allocating(&self) {
348        if let Some(write_buffer_manager) = &self.write_buffer_manager
349            && self
350                .is_done_allocating
351                .compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
352                .is_ok()
353        {
354            write_buffer_manager.schedule_free_mem(self.bytes_allocated.load(Ordering::Relaxed));
355        }
356    }
357
358    /// Returns bytes allocated.
359    pub(crate) fn bytes_allocated(&self) -> usize {
360        self.bytes_allocated.load(Ordering::Relaxed)
361    }
362
363    /// Returns the write buffer manager.
364    pub(crate) fn write_buffer_manager(&self) -> Option<WriteBufferManagerRef> {
365        self.write_buffer_manager.clone()
366    }
367}
368
369impl Drop for AllocTracker {
370    fn drop(&mut self) {
371        if !self.is_done_allocating.load(Ordering::Relaxed) {
372            self.done_allocating();
373        }
374
375        let bytes_allocated = self.bytes_allocated.load(Ordering::Relaxed);
376        WRITE_BUFFER_BYTES.sub(bytes_allocated as i64);
377
378        // Memory tracked by this tracker is freed.
379        if let Some(write_buffer_manager) = &self.write_buffer_manager {
380            write_buffer_manager.free_mem(bytes_allocated);
381        }
382    }
383}
384
385/// Provider of memtable builders for regions.
386#[derive(Clone)]
387pub(crate) struct MemtableBuilderProvider {
388    write_buffer_manager: Option<WriteBufferManagerRef>,
389    config: Arc<MitoConfig>,
390    compact_dispatcher: Arc<CompactDispatcher>,
391}
392
393impl MemtableBuilderProvider {
394    pub(crate) fn new(
395        write_buffer_manager: Option<WriteBufferManagerRef>,
396        config: Arc<MitoConfig>,
397    ) -> Self {
398        let compact_dispatcher =
399            Arc::new(CompactDispatcher::new(config.max_background_compactions));
400
401        Self {
402            write_buffer_manager,
403            config,
404            compact_dispatcher,
405        }
406    }
407
408    pub(crate) fn builder_for_options(&self, options: &RegionOptions) -> MemtableBuilderRef {
409        let dedup = options.need_dedup();
410        let merge_mode = options.merge_mode();
411        let primary_key_encoding = options.primary_key_encoding();
412        let flat_format = options
413            .sst_format
414            .map(|format| format == FormatType::Flat)
415            .unwrap_or(self.config.default_flat_format);
416        if flat_format {
417            if options.memtable.is_some()
418                && !matches!(&options.memtable, Some(MemtableOptions::Bulk(_)))
419            {
420                common_telemetry::info!(
421                    "Overriding memtable config, use BulkMemtable under flat format"
422                );
423            }
424
425            return Arc::new(self.bulk_memtable_builder(dedup, merge_mode, options));
426        }
427
428        if primary_key_encoding == PrimaryKeyEncoding::Sparse {
429            if options.memtable.is_some()
430                && !matches!(&options.memtable, Some(MemtableOptions::Bulk(_)))
431            {
432                common_telemetry::info!(
433                    "Overriding memtable config, use BulkMemtable for sparse primary key encoding"
434                );
435            }
436            return Arc::new(self.bulk_memtable_builder(dedup, merge_mode, options));
437        }
438
439        // The format is not flat.
440        match &options.memtable {
441            Some(MemtableOptions::Bulk(config)) => Arc::new(
442                BulkMemtableBuilder::new(self.write_buffer_manager.clone(), !dedup, merge_mode)
443                    .with_config(config.clone())
444                    .with_compact_dispatcher(self.compact_dispatcher.clone()),
445            ),
446            Some(MemtableOptions::TimeSeries) => Arc::new(TimeSeriesMemtableBuilder::new(
447                self.write_buffer_manager.clone(),
448                dedup,
449                merge_mode,
450            )),
451            None => self.default_primary_key_memtable_builder(dedup, merge_mode),
452        }
453    }
454
455    fn bulk_memtable_builder(
456        &self,
457        dedup: bool,
458        merge_mode: MergeMode,
459        options: &RegionOptions,
460    ) -> BulkMemtableBuilder {
461        let mut builder = BulkMemtableBuilder::new(
462            self.write_buffer_manager.clone(),
463            !dedup, // append_mode: true if not dedup, false if dedup
464            merge_mode,
465        )
466        .with_compact_dispatcher(self.compact_dispatcher.clone());
467
468        if let Some(MemtableOptions::Bulk(config)) = &options.memtable {
469            builder = builder.with_config(config.clone());
470        }
471
472        builder
473    }
474
475    fn default_primary_key_memtable_builder(
476        &self,
477        dedup: bool,
478        merge_mode: MergeMode,
479    ) -> MemtableBuilderRef {
480        Arc::new(TimeSeriesMemtableBuilder::new(
481            self.write_buffer_manager.clone(),
482            dedup,
483            merge_mode,
484        ))
485    }
486}
487
488/// Metrics for scanning a memtable.
489#[derive(Clone, Default)]
490pub struct MemScanMetrics(Arc<Mutex<MemScanMetricsData>>);
491
492impl MemScanMetrics {
493    /// Merges the metrics.
494    pub(crate) fn merge_inner(&self, inner: &MemScanMetricsData) {
495        let mut metrics = self.0.lock().unwrap();
496        metrics.total_series += inner.total_series;
497        metrics.num_rows += inner.num_rows;
498        metrics.num_batches += inner.num_batches;
499        metrics.scan_cost += inner.scan_cost;
500        metrics.prefilter_cost += inner.prefilter_cost;
501        metrics.prefilter_rows_filtered += inner.prefilter_rows_filtered;
502    }
503
504    /// Gets the metrics data.
505    pub(crate) fn data(&self) -> MemScanMetricsData {
506        self.0.lock().unwrap().clone()
507    }
508}
509
510#[derive(Clone, Default)]
511pub(crate) struct MemScanMetricsData {
512    /// Total series in the memtable.
513    pub(crate) total_series: usize,
514    /// Number of rows read.
515    pub(crate) num_rows: usize,
516    /// Number of batch read.
517    pub(crate) num_batches: usize,
518    /// Duration to scan the memtable.
519    pub(crate) scan_cost: Duration,
520    /// Duration of prefilter in memtable scan.
521    pub(crate) prefilter_cost: Duration,
522    /// Number of rows filtered by prefilter in memtable scan.
523    pub(crate) prefilter_rows_filtered: usize,
524}
525
526/// Encoded range in the memtable.
527pub struct EncodedRange {
528    /// Encoded file data.
529    pub data: Bytes,
530    /// Metadata of the encoded range.
531    pub sst_info: SstInfo,
532}
533
534/// Builder to build an iterator to read the range.
535/// The builder should know the projection and the predicate to build the iterator.
536pub trait IterBuilder: Send + Sync {
537    /// Returns the iterator to read the range.
538    fn build(&self, metrics: Option<MemScanMetrics>) -> Result<BoxedBatchIterator>;
539
540    /// Returns whether the iterator is a record batch iterator.
541    fn is_record_batch(&self) -> bool {
542        false
543    }
544
545    /// Returns the record batch iterator to read the range.
546    /// ## Note
547    /// Implementations should ensure the iterator yields data within given time range.
548    fn build_record_batch(
549        &self,
550        time_range: Option<(Timestamp, Timestamp)>,
551        metrics: Option<MemScanMetrics>,
552    ) -> Result<BoxedRecordBatchIterator> {
553        let _metrics = metrics;
554        let _ = time_range;
555        UnsupportedOperationSnafu {
556            err_msg: "Record batch iterator is not supported by this memtable",
557        }
558        .fail()
559    }
560
561    /// Returns a cheap schema hint for record batches yielded by this builder.
562    fn record_batch_schema_hint(&self) -> Option<SchemaRef> {
563        None
564    }
565
566    /// Returns the [EncodedRange] if the range is already encoded into SST.
567    fn encoded_range(&self) -> Option<EncodedRange> {
568        None
569    }
570}
571
572pub type BoxedIterBuilder = Box<dyn IterBuilder>;
573
574/// Computes the column IDs to read based on the projection.
575///
576/// If `projection` is `Some`, returns those column IDs. If `None`, returns all column IDs
577/// from the metadata.
578pub fn read_column_ids_from_projection(
579    metadata: &RegionMetadataRef,
580    projection: Option<&[ColumnId]>,
581) -> Vec<ColumnId> {
582    if let Some(projection) = projection {
583        projection.to_vec()
584    } else {
585        metadata
586            .column_metadatas
587            .iter()
588            .map(|c| c.column_id)
589            .collect()
590    }
591}
592
593/// Context to adapt batch iterators to record batch iterators for flat scan.
594pub struct BatchToRecordBatchContext {
595    metadata: RegionMetadataRef,
596    codec: Arc<dyn PrimaryKeyCodec>,
597    read_column_ids: Vec<ColumnId>,
598}
599
600impl BatchToRecordBatchContext {
601    /// Creates a new context for adapting batch iterators.
602    pub fn new(metadata: RegionMetadataRef, mut read_column_ids: Vec<ColumnId>) -> Self {
603        if read_column_ids.is_empty() {
604            read_column_ids.push(metadata.time_index_column().column_id);
605        }
606
607        let codec = build_primary_key_codec(&metadata);
608        Self {
609            metadata,
610            codec,
611            read_column_ids,
612        }
613    }
614
615    fn adapt_iter(&self, iter: BoxedBatchIterator) -> BoxedRecordBatchIterator {
616        Box::new(BatchToRecordBatchAdapter::new(
617            iter,
618            self.metadata.clone(),
619            self.codec.clone(),
620            &self.read_column_ids,
621        ))
622    }
623}
624
625/// Context shared by ranges of the same memtable.
626pub struct MemtableRangeContext {
627    /// Id of the memtable.
628    id: MemtableId,
629    /// Iterator builder.
630    builder: BoxedIterBuilder,
631    /// All filters.
632    predicate: PredicateGroup,
633    /// Optional context to adapt batch iterators for flat scans.
634    batch_to_record_batch: Option<Arc<BatchToRecordBatchContext>>,
635}
636
637pub type MemtableRangeContextRef = Arc<MemtableRangeContext>;
638
639impl MemtableRangeContext {
640    /// Creates a new [MemtableRangeContext].
641    pub fn new(id: MemtableId, builder: BoxedIterBuilder, predicate: PredicateGroup) -> Self {
642        Self::new_with_batch_to_record_batch(id, builder, predicate, None)
643    }
644
645    /// Creates a new [MemtableRangeContext] with optional adapter context.
646    pub fn new_with_batch_to_record_batch(
647        id: MemtableId,
648        builder: BoxedIterBuilder,
649        predicate: PredicateGroup,
650        batch_to_record_batch: Option<Arc<BatchToRecordBatchContext>>,
651    ) -> Self {
652        Self {
653            id,
654            builder,
655            predicate,
656            batch_to_record_batch,
657        }
658    }
659}
660
661/// A range in the memtable.
662#[derive(Clone)]
663pub struct MemtableRange {
664    /// Shared context.
665    context: MemtableRangeContextRef,
666    /// Statistics for this memtable range.
667    stats: MemtableStats,
668}
669
670impl MemtableRange {
671    /// Creates a new range from context and stats.
672    pub fn new(context: MemtableRangeContextRef, stats: MemtableStats) -> Self {
673        Self { context, stats }
674    }
675
676    /// Returns the statistics for this range.
677    pub fn stats(&self) -> &MemtableStats {
678        &self.stats
679    }
680
681    /// Returns the id of the memtable to read.
682    pub fn id(&self) -> MemtableId {
683        self.context.id
684    }
685
686    /// Builds an iterator to read the range.
687    /// Filters the result by the specific time range, this ensures memtable won't return
688    /// rows out of the time range when new rows are inserted.
689    pub fn build_prune_iter(
690        &self,
691        time_range: FileTimeRange,
692        metrics: Option<MemScanMetrics>,
693    ) -> Result<BoxedBatchIterator> {
694        let iter = self.context.builder.build(metrics)?;
695        let time_filters = self.context.predicate.time_filters();
696        Ok(Box::new(PruneTimeIterator::new(
697            iter,
698            time_range,
699            time_filters,
700        )))
701    }
702
703    /// Builds an iterator to read all rows in range.
704    pub fn build_iter(&self) -> Result<BoxedBatchIterator> {
705        self.context.builder.build(None)
706    }
707
708    /// Builds a record batch iterator to read rows in range.
709    ///
710    /// For mutable memtables (adapter path), applies time-range pruning to ensure rows
711    /// outside the time range are filtered, matching the behavior of `build_prune_iter`.
712    pub fn build_record_batch_iter(
713        &self,
714        time_range: Option<FileTimeRange>,
715        metrics: Option<MemScanMetrics>,
716    ) -> Result<BoxedRecordBatchIterator> {
717        if self.context.builder.is_record_batch() {
718            return self.context.builder.build_record_batch(time_range, metrics);
719        }
720
721        if let Some(context) = self.context.batch_to_record_batch.as_ref() {
722            let iter = self.context.builder.build(metrics)?;
723            let iter: BoxedBatchIterator = if let Some(time_range) = time_range {
724                let time_filters = self.context.predicate.time_filters();
725                Box::new(PruneTimeIterator::new(iter, time_range, time_filters))
726            } else {
727                iter
728            };
729            return Ok(context.adapt_iter(iter));
730        }
731
732        UnsupportedOperationSnafu {
733            err_msg: "Record batch iterator is not supported by this memtable",
734        }
735        .fail()
736    }
737
738    /// Returns a cheap schema hint for record batches yielded by this range.
739    pub fn record_batch_schema_hint(&self) -> Option<SchemaRef> {
740        self.context.builder.record_batch_schema_hint()
741    }
742
743    /// Returns whether the iterator is a record batch iterator.
744    pub fn is_record_batch(&self) -> bool {
745        self.context.builder.is_record_batch()
746    }
747
748    pub fn num_rows(&self) -> usize {
749        self.stats.num_rows
750    }
751
752    /// Returns the encoded range if available.
753    pub fn encoded(&self) -> Option<EncodedRange> {
754        self.context.builder.encoded_range()
755    }
756}
757
758#[cfg(test)]
759mod tests {
760    use std::sync::Arc;
761
762    use super::*;
763    use crate::flush::{WriteBufferManager, WriteBufferManagerImpl};
764    use crate::memtable::bulk::BulkMemtableConfig;
765
766    #[test]
767    fn test_alloc_tracker_without_manager() {
768        let tracker = AllocTracker::new(None);
769        assert_eq!(0, tracker.bytes_allocated());
770        tracker.on_allocation(100);
771        assert_eq!(100, tracker.bytes_allocated());
772        tracker.on_allocation(200);
773        assert_eq!(300, tracker.bytes_allocated());
774
775        tracker.done_allocating();
776        assert_eq!(300, tracker.bytes_allocated());
777    }
778
779    #[test]
780    fn test_alloc_tracker_with_manager() {
781        let manager = Arc::new(WriteBufferManagerImpl::new(1000));
782        {
783            let tracker = AllocTracker::new(Some(manager.clone() as WriteBufferManagerRef));
784
785            tracker.on_allocation(100);
786            assert_eq!(100, tracker.bytes_allocated());
787            assert_eq!(100, manager.memory_usage());
788            assert_eq!(100, manager.mutable_usage());
789
790            for _ in 0..2 {
791                // Done allocating won't free the same memory multiple times.
792                tracker.done_allocating();
793                assert_eq!(100, manager.memory_usage());
794                assert_eq!(0, manager.mutable_usage());
795            }
796        }
797
798        assert_eq!(0, manager.memory_usage());
799        assert_eq!(0, manager.mutable_usage());
800    }
801
802    #[test]
803    fn test_alloc_tracker_without_done_allocating() {
804        let manager = Arc::new(WriteBufferManagerImpl::new(1000));
805        {
806            let tracker = AllocTracker::new(Some(manager.clone() as WriteBufferManagerRef));
807
808            tracker.on_allocation(100);
809            assert_eq!(100, tracker.bytes_allocated());
810            assert_eq!(100, manager.memory_usage());
811            assert_eq!(100, manager.mutable_usage());
812        }
813
814        assert_eq!(0, manager.memory_usage());
815        assert_eq!(0, manager.mutable_usage());
816    }
817
818    #[test]
819    fn test_forced_bulk_memtable_preserves_bulk_config() {
820        let provider = MemtableBuilderProvider::new(None, Arc::new(MitoConfig::default()));
821        let config = BulkMemtableConfig {
822            merge_threshold: 7,
823            encode_row_threshold: 11,
824            encode_bytes_threshold: 13,
825            max_merge_groups: 17,
826        };
827        let options = RegionOptions {
828            memtable: Some(MemtableOptions::Bulk(config.clone())),
829            primary_key_encoding: Some(PrimaryKeyEncoding::Sparse),
830            ..Default::default()
831        };
832
833        let builder =
834            provider.bulk_memtable_builder(options.need_dedup(), options.merge_mode(), &options);
835
836        assert_eq!(&config, builder.config());
837    }
838}