Skip to main content

mito2/
memtable.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Memtables are write buffers for regions.
16
17use std::collections::BTreeMap;
18use std::fmt;
19use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
20use std::sync::{Arc, Mutex};
21use std::time::Duration;
22
23pub use bulk::part::EncodedBulkPart;
24use bytes::Bytes;
25use common_time::Timestamp;
26use datatypes::arrow::record_batch::RecordBatch;
27use mito_codec::key_values::KeyValue;
28pub use mito_codec::key_values::KeyValues;
29use mito_codec::row_converter::{PrimaryKeyCodec, build_primary_key_codec};
30use snafu::ensure;
31use store_api::codec::PrimaryKeyEncoding;
32use store_api::metadata::RegionMetadataRef;
33use store_api::storage::{ColumnId, SequenceNumber, SequenceRange};
34
35use crate::config::MitoConfig;
36use crate::error::{Result, UnsupportedOperationSnafu};
37use crate::flush::WriteBufferManagerRef;
38use crate::memtable::bulk::{BulkMemtableBuilder, CompactDispatcher};
39use crate::memtable::time_series::TimeSeriesMemtableBuilder;
40use crate::metrics::WRITE_BUFFER_BYTES;
41use crate::read::Batch;
42use crate::read::batch_adapter::BatchToRecordBatchAdapter;
43use crate::read::prune::PruneTimeIterator;
44use crate::read::scan_region::PredicateGroup;
45use crate::region::options::{MemtableOptions, MergeMode, RegionOptions};
46use crate::sst::FormatType;
47use crate::sst::file::FileTimeRange;
48use crate::sst::parquet::SstInfo;
49use crate::sst::parquet::file_range::PreFilterMode;
50
51mod builder;
52pub mod bulk;
53pub mod simple_bulk_memtable;
54mod stats;
55pub mod time_partition;
56pub mod time_series;
57pub(crate) mod version;
58
59pub use bulk::part::{
60    BulkPart, BulkPartEncoder, BulkPartMeta, UnorderedPart, record_batch_estimated_size,
61    sort_primary_key_record_batch,
62};
63#[cfg(any(test, feature = "test"))]
64pub use time_partition::filter_record_batch;
65
66/// Id for memtables.
67///
68/// Should be unique under the same region.
69pub type MemtableId = u32;
70
71/// Options for querying ranges from a memtable.
72#[derive(Clone)]
73pub struct RangesOptions {
74    /// Whether the ranges are being queried for flush.
75    pub for_flush: bool,
76    /// Mode to pre-filter columns in ranges.
77    pub pre_filter_mode: PreFilterMode,
78    /// Predicate to filter the data.
79    pub predicate: PredicateGroup,
80    /// Sequence range to filter the data.
81    pub sequence: Option<SequenceRange>,
82}
83
84impl Default for RangesOptions {
85    fn default() -> Self {
86        Self {
87            for_flush: false,
88            pre_filter_mode: PreFilterMode::All,
89            predicate: PredicateGroup::default(),
90            sequence: None,
91        }
92    }
93}
94
95impl RangesOptions {
96    /// Creates a new [RangesOptions] for flushing.
97    pub fn for_flush() -> Self {
98        Self {
99            for_flush: true,
100            pre_filter_mode: PreFilterMode::All,
101            predicate: PredicateGroup::default(),
102            sequence: None,
103        }
104    }
105
106    /// Sets the pre-filter mode.
107    #[must_use]
108    pub fn with_pre_filter_mode(mut self, pre_filter_mode: PreFilterMode) -> Self {
109        self.pre_filter_mode = pre_filter_mode;
110        self
111    }
112
113    /// Sets the predicate.
114    #[must_use]
115    pub fn with_predicate(mut self, predicate: PredicateGroup) -> Self {
116        self.predicate = predicate;
117        self
118    }
119
120    /// Sets the sequence range.
121    #[must_use]
122    pub fn with_sequence(mut self, sequence: Option<SequenceRange>) -> Self {
123        self.sequence = sequence;
124        self
125    }
126}
127
128#[derive(Debug, Default, Clone)]
129pub struct MemtableStats {
130    /// The estimated bytes allocated by this memtable from heap.
131    pub estimated_bytes: usize,
132    /// The inclusive time range that this memtable contains. It is None if
133    /// and only if the memtable is empty.
134    pub time_range: Option<(Timestamp, Timestamp)>,
135    /// Total rows in memtable
136    pub num_rows: usize,
137    /// Total number of ranges in the memtable.
138    pub num_ranges: usize,
139    /// The maximum sequence number in the memtable.
140    pub max_sequence: SequenceNumber,
141    /// Number of estimated timeseries in memtable.
142    pub series_count: usize,
143}
144
145impl MemtableStats {
146    /// Attaches the time range to the stats.
147    #[cfg(any(test, feature = "test"))]
148    pub fn with_time_range(mut self, time_range: Option<(Timestamp, Timestamp)>) -> Self {
149        self.time_range = time_range;
150        self
151    }
152
153    #[cfg(feature = "test")]
154    pub fn with_max_sequence(mut self, max_sequence: SequenceNumber) -> Self {
155        self.max_sequence = max_sequence;
156        self
157    }
158
159    /// Returns the estimated bytes allocated by this memtable.
160    pub fn bytes_allocated(&self) -> usize {
161        self.estimated_bytes
162    }
163
164    /// Returns the time range of the memtable.
165    pub fn time_range(&self) -> Option<(Timestamp, Timestamp)> {
166        self.time_range
167    }
168
169    /// Returns the num of total rows in memtable.
170    pub fn num_rows(&self) -> usize {
171        self.num_rows
172    }
173
174    /// Returns the number of ranges in the memtable.
175    pub fn num_ranges(&self) -> usize {
176        self.num_ranges
177    }
178
179    /// Returns the maximum sequence number in the memtable.
180    pub fn max_sequence(&self) -> SequenceNumber {
181        self.max_sequence
182    }
183
184    /// Series count in memtable.
185    pub fn series_count(&self) -> usize {
186        self.series_count
187    }
188}
189
190pub type BoxedBatchIterator = Box<dyn Iterator<Item = Result<Batch>> + Send>;
191
192pub type BoxedRecordBatchIterator = Box<dyn Iterator<Item = Result<RecordBatch>> + Send>;
193
194/// Ranges in a memtable.
195#[derive(Default)]
196pub struct MemtableRanges {
197    /// Range IDs and ranges.
198    pub ranges: BTreeMap<usize, MemtableRange>,
199}
200
201impl MemtableRanges {
202    /// Returns the total number of rows across all ranges.
203    pub fn num_rows(&self) -> usize {
204        self.ranges.values().map(|r| r.stats().num_rows()).sum()
205    }
206
207    /// Returns the total series count across all ranges.
208    pub fn series_count(&self) -> usize {
209        self.ranges.values().map(|r| r.stats().series_count()).sum()
210    }
211
212    /// Returns the maximum sequence number across all ranges.
213    pub fn max_sequence(&self) -> SequenceNumber {
214        self.ranges
215            .values()
216            .map(|r| r.stats().max_sequence())
217            .max()
218            .unwrap_or(0)
219    }
220}
221
222impl IterBuilder for MemtableRanges {
223    fn build(&self, _metrics: Option<MemScanMetrics>) -> Result<BoxedBatchIterator> {
224        ensure!(
225            self.ranges.len() == 1,
226            UnsupportedOperationSnafu {
227                err_msg: format!(
228                    "Building an iterator from MemtableRanges expects 1 range, but got {}",
229                    self.ranges.len()
230                ),
231            }
232        );
233
234        self.ranges.values().next().unwrap().build_iter()
235    }
236
237    fn is_record_batch(&self) -> bool {
238        self.ranges.values().all(|range| range.is_record_batch())
239    }
240}
241
242/// In memory write buffer.
243pub trait Memtable: Send + Sync + fmt::Debug {
244    /// Returns the id of this memtable.
245    fn id(&self) -> MemtableId;
246
247    /// Writes key values into the memtable.
248    fn write(&self, kvs: &KeyValues) -> Result<()>;
249
250    /// Writes one key value pair into the memtable.
251    fn write_one(&self, key_value: KeyValue) -> Result<()>;
252
253    /// Writes an encoded batch of into memtable.
254    fn write_bulk(&self, part: crate::memtable::bulk::part::BulkPart) -> Result<()>;
255
256    /// Returns the ranges in the memtable.
257    ///
258    /// The returned map contains the range id and the range after applying the predicate.
259    fn ranges(
260        &self,
261        projection: Option<&[ColumnId]>,
262        options: RangesOptions,
263    ) -> Result<MemtableRanges>;
264
265    /// Returns true if the memtable is empty.
266    fn is_empty(&self) -> bool;
267
268    /// Turns a mutable memtable into an immutable memtable.
269    fn freeze(&self) -> Result<()>;
270
271    /// Returns the [MemtableStats] info of Memtable.
272    fn stats(&self) -> MemtableStats;
273
274    /// Forks this (immutable) memtable and returns a new mutable memtable with specific memtable `id`.
275    ///
276    /// A region must freeze the memtable before invoking this method.
277    fn fork(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef;
278
279    /// Compacts the memtable.
280    ///
281    /// The `for_flush` is true when the flush job calls this method.
282    fn compact(&self, for_flush: bool) -> Result<()> {
283        let _ = for_flush;
284        Ok(())
285    }
286}
287
288pub type MemtableRef = Arc<dyn Memtable>;
289
290/// Builder to build a new [Memtable].
291pub trait MemtableBuilder: Send + Sync + fmt::Debug {
292    /// Builds a new memtable instance.
293    fn build(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef;
294
295    /// Returns true if the memtable supports bulk insert and benefits from it.
296    fn use_bulk_insert(&self, metadata: &RegionMetadataRef) -> bool {
297        let _metadata = metadata;
298        false
299    }
300}
301
302pub type MemtableBuilderRef = Arc<dyn MemtableBuilder>;
303
304/// Memtable memory allocation tracker.
305#[derive(Default)]
306pub struct AllocTracker {
307    write_buffer_manager: Option<WriteBufferManagerRef>,
308    /// Bytes allocated by the tracker.
309    bytes_allocated: AtomicUsize,
310    /// Whether allocating is done.
311    is_done_allocating: AtomicBool,
312}
313
314impl fmt::Debug for AllocTracker {
315    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
316        f.debug_struct("AllocTracker")
317            .field("bytes_allocated", &self.bytes_allocated)
318            .field("is_done_allocating", &self.is_done_allocating)
319            .finish()
320    }
321}
322
323impl AllocTracker {
324    /// Returns a new [AllocTracker].
325    pub fn new(write_buffer_manager: Option<WriteBufferManagerRef>) -> AllocTracker {
326        AllocTracker {
327            write_buffer_manager,
328            bytes_allocated: AtomicUsize::new(0),
329            is_done_allocating: AtomicBool::new(false),
330        }
331    }
332
333    /// Tracks `bytes` memory is allocated.
334    pub(crate) fn on_allocation(&self, bytes: usize) {
335        self.bytes_allocated.fetch_add(bytes, Ordering::Relaxed);
336        WRITE_BUFFER_BYTES.add(bytes as i64);
337        if let Some(write_buffer_manager) = &self.write_buffer_manager {
338            write_buffer_manager.reserve_mem(bytes);
339        }
340    }
341
342    /// Marks we have finished allocating memory so we can free it from
343    /// the write buffer's limit.
344    ///
345    /// The region MUST ensure that it calls this method inside the region writer's write lock.
346    pub(crate) fn done_allocating(&self) {
347        if let Some(write_buffer_manager) = &self.write_buffer_manager
348            && self
349                .is_done_allocating
350                .compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
351                .is_ok()
352        {
353            write_buffer_manager.schedule_free_mem(self.bytes_allocated.load(Ordering::Relaxed));
354        }
355    }
356
357    /// Returns bytes allocated.
358    pub(crate) fn bytes_allocated(&self) -> usize {
359        self.bytes_allocated.load(Ordering::Relaxed)
360    }
361
362    /// Returns the write buffer manager.
363    pub(crate) fn write_buffer_manager(&self) -> Option<WriteBufferManagerRef> {
364        self.write_buffer_manager.clone()
365    }
366}
367
368impl Drop for AllocTracker {
369    fn drop(&mut self) {
370        if !self.is_done_allocating.load(Ordering::Relaxed) {
371            self.done_allocating();
372        }
373
374        let bytes_allocated = self.bytes_allocated.load(Ordering::Relaxed);
375        WRITE_BUFFER_BYTES.sub(bytes_allocated as i64);
376
377        // Memory tracked by this tracker is freed.
378        if let Some(write_buffer_manager) = &self.write_buffer_manager {
379            write_buffer_manager.free_mem(bytes_allocated);
380        }
381    }
382}
383
384/// Provider of memtable builders for regions.
385#[derive(Clone)]
386pub(crate) struct MemtableBuilderProvider {
387    write_buffer_manager: Option<WriteBufferManagerRef>,
388    config: Arc<MitoConfig>,
389    compact_dispatcher: Arc<CompactDispatcher>,
390}
391
392impl MemtableBuilderProvider {
393    pub(crate) fn new(
394        write_buffer_manager: Option<WriteBufferManagerRef>,
395        config: Arc<MitoConfig>,
396    ) -> Self {
397        let compact_dispatcher =
398            Arc::new(CompactDispatcher::new(config.max_background_compactions));
399
400        Self {
401            write_buffer_manager,
402            config,
403            compact_dispatcher,
404        }
405    }
406
407    pub(crate) fn builder_for_options(&self, options: &RegionOptions) -> MemtableBuilderRef {
408        let dedup = options.need_dedup();
409        let merge_mode = options.merge_mode();
410        let primary_key_encoding = options.primary_key_encoding();
411        let flat_format = options
412            .sst_format
413            .map(|format| format == FormatType::Flat)
414            .unwrap_or(self.config.default_flat_format);
415        if flat_format {
416            if options.memtable.is_some()
417                && !matches!(&options.memtable, Some(MemtableOptions::Bulk(_)))
418            {
419                common_telemetry::info!(
420                    "Overriding memtable config, use BulkMemtable under flat format"
421                );
422            }
423
424            return Arc::new(self.bulk_memtable_builder(dedup, merge_mode, options));
425        }
426
427        if primary_key_encoding == PrimaryKeyEncoding::Sparse {
428            if options.memtable.is_some()
429                && !matches!(&options.memtable, Some(MemtableOptions::Bulk(_)))
430            {
431                common_telemetry::info!(
432                    "Overriding memtable config, use BulkMemtable for sparse primary key encoding"
433                );
434            }
435            return Arc::new(self.bulk_memtable_builder(dedup, merge_mode, options));
436        }
437
438        // The format is not flat.
439        match &options.memtable {
440            Some(MemtableOptions::Bulk(config)) => Arc::new(
441                BulkMemtableBuilder::new(self.write_buffer_manager.clone(), !dedup, merge_mode)
442                    .with_config(config.clone())
443                    .with_compact_dispatcher(self.compact_dispatcher.clone()),
444            ),
445            Some(MemtableOptions::TimeSeries) => Arc::new(TimeSeriesMemtableBuilder::new(
446                self.write_buffer_manager.clone(),
447                dedup,
448                merge_mode,
449            )),
450            None => self.default_primary_key_memtable_builder(dedup, merge_mode),
451        }
452    }
453
454    fn bulk_memtable_builder(
455        &self,
456        dedup: bool,
457        merge_mode: MergeMode,
458        options: &RegionOptions,
459    ) -> BulkMemtableBuilder {
460        let mut builder = BulkMemtableBuilder::new(
461            self.write_buffer_manager.clone(),
462            !dedup, // append_mode: true if not dedup, false if dedup
463            merge_mode,
464        )
465        .with_compact_dispatcher(self.compact_dispatcher.clone());
466
467        if let Some(MemtableOptions::Bulk(config)) = &options.memtable {
468            builder = builder.with_config(config.clone());
469        }
470
471        builder
472    }
473
474    fn default_primary_key_memtable_builder(
475        &self,
476        dedup: bool,
477        merge_mode: MergeMode,
478    ) -> MemtableBuilderRef {
479        Arc::new(TimeSeriesMemtableBuilder::new(
480            self.write_buffer_manager.clone(),
481            dedup,
482            merge_mode,
483        ))
484    }
485}
486
487/// Metrics for scanning a memtable.
488#[derive(Clone, Default)]
489pub struct MemScanMetrics(Arc<Mutex<MemScanMetricsData>>);
490
491impl MemScanMetrics {
492    /// Merges the metrics.
493    pub(crate) fn merge_inner(&self, inner: &MemScanMetricsData) {
494        let mut metrics = self.0.lock().unwrap();
495        metrics.total_series += inner.total_series;
496        metrics.num_rows += inner.num_rows;
497        metrics.num_batches += inner.num_batches;
498        metrics.scan_cost += inner.scan_cost;
499        metrics.prefilter_cost += inner.prefilter_cost;
500        metrics.prefilter_rows_filtered += inner.prefilter_rows_filtered;
501    }
502
503    /// Gets the metrics data.
504    pub(crate) fn data(&self) -> MemScanMetricsData {
505        self.0.lock().unwrap().clone()
506    }
507}
508
509#[derive(Clone, Default)]
510pub(crate) struct MemScanMetricsData {
511    /// Total series in the memtable.
512    pub(crate) total_series: usize,
513    /// Number of rows read.
514    pub(crate) num_rows: usize,
515    /// Number of batch read.
516    pub(crate) num_batches: usize,
517    /// Duration to scan the memtable.
518    pub(crate) scan_cost: Duration,
519    /// Duration of prefilter in memtable scan.
520    pub(crate) prefilter_cost: Duration,
521    /// Number of rows filtered by prefilter in memtable scan.
522    pub(crate) prefilter_rows_filtered: usize,
523}
524
525/// Encoded range in the memtable.
526pub struct EncodedRange {
527    /// Encoded file data.
528    pub data: Bytes,
529    /// Metadata of the encoded range.
530    pub sst_info: SstInfo,
531}
532
533/// Builder to build an iterator to read the range.
534/// The builder should know the projection and the predicate to build the iterator.
535pub trait IterBuilder: Send + Sync {
536    /// Returns the iterator to read the range.
537    fn build(&self, metrics: Option<MemScanMetrics>) -> Result<BoxedBatchIterator>;
538
539    /// Returns whether the iterator is a record batch iterator.
540    fn is_record_batch(&self) -> bool {
541        false
542    }
543
544    /// Returns the record batch iterator to read the range.
545    /// ## Note
546    /// Implementations should ensure the iterator yields data within given time range.
547    fn build_record_batch(
548        &self,
549        time_range: Option<(Timestamp, Timestamp)>,
550        metrics: Option<MemScanMetrics>,
551    ) -> Result<BoxedRecordBatchIterator> {
552        let _metrics = metrics;
553        let _ = time_range;
554        UnsupportedOperationSnafu {
555            err_msg: "Record batch iterator is not supported by this memtable",
556        }
557        .fail()
558    }
559
560    /// Returns the [EncodedRange] if the range is already encoded into SST.
561    fn encoded_range(&self) -> Option<EncodedRange> {
562        None
563    }
564}
565
566pub type BoxedIterBuilder = Box<dyn IterBuilder>;
567
568/// Computes the column IDs to read based on the projection.
569///
570/// If `projection` is `Some`, returns those column IDs. If `None`, returns all column IDs
571/// from the metadata.
572pub fn read_column_ids_from_projection(
573    metadata: &RegionMetadataRef,
574    projection: Option<&[ColumnId]>,
575) -> Vec<ColumnId> {
576    if let Some(projection) = projection {
577        projection.to_vec()
578    } else {
579        metadata
580            .column_metadatas
581            .iter()
582            .map(|c| c.column_id)
583            .collect()
584    }
585}
586
587/// Context to adapt batch iterators to record batch iterators for flat scan.
588pub struct BatchToRecordBatchContext {
589    metadata: RegionMetadataRef,
590    codec: Arc<dyn PrimaryKeyCodec>,
591    read_column_ids: Vec<ColumnId>,
592}
593
594impl BatchToRecordBatchContext {
595    /// Creates a new context for adapting batch iterators.
596    pub fn new(metadata: RegionMetadataRef, mut read_column_ids: Vec<ColumnId>) -> Self {
597        if read_column_ids.is_empty() {
598            read_column_ids.push(metadata.time_index_column().column_id);
599        }
600
601        let codec = build_primary_key_codec(&metadata);
602        Self {
603            metadata,
604            codec,
605            read_column_ids,
606        }
607    }
608
609    fn adapt_iter(&self, iter: BoxedBatchIterator) -> BoxedRecordBatchIterator {
610        Box::new(BatchToRecordBatchAdapter::new(
611            iter,
612            self.metadata.clone(),
613            self.codec.clone(),
614            &self.read_column_ids,
615        ))
616    }
617}
618
619/// Context shared by ranges of the same memtable.
620pub struct MemtableRangeContext {
621    /// Id of the memtable.
622    id: MemtableId,
623    /// Iterator builder.
624    builder: BoxedIterBuilder,
625    /// All filters.
626    predicate: PredicateGroup,
627    /// Optional context to adapt batch iterators for flat scans.
628    batch_to_record_batch: Option<Arc<BatchToRecordBatchContext>>,
629}
630
631pub type MemtableRangeContextRef = Arc<MemtableRangeContext>;
632
633impl MemtableRangeContext {
634    /// Creates a new [MemtableRangeContext].
635    pub fn new(id: MemtableId, builder: BoxedIterBuilder, predicate: PredicateGroup) -> Self {
636        Self::new_with_batch_to_record_batch(id, builder, predicate, None)
637    }
638
639    /// Creates a new [MemtableRangeContext] with optional adapter context.
640    pub fn new_with_batch_to_record_batch(
641        id: MemtableId,
642        builder: BoxedIterBuilder,
643        predicate: PredicateGroup,
644        batch_to_record_batch: Option<Arc<BatchToRecordBatchContext>>,
645    ) -> Self {
646        Self {
647            id,
648            builder,
649            predicate,
650            batch_to_record_batch,
651        }
652    }
653}
654
655/// A range in the memtable.
656#[derive(Clone)]
657pub struct MemtableRange {
658    /// Shared context.
659    context: MemtableRangeContextRef,
660    /// Statistics for this memtable range.
661    stats: MemtableStats,
662}
663
664impl MemtableRange {
665    /// Creates a new range from context and stats.
666    pub fn new(context: MemtableRangeContextRef, stats: MemtableStats) -> Self {
667        Self { context, stats }
668    }
669
670    /// Returns the statistics for this range.
671    pub fn stats(&self) -> &MemtableStats {
672        &self.stats
673    }
674
675    /// Returns the id of the memtable to read.
676    pub fn id(&self) -> MemtableId {
677        self.context.id
678    }
679
680    /// Builds an iterator to read the range.
681    /// Filters the result by the specific time range, this ensures memtable won't return
682    /// rows out of the time range when new rows are inserted.
683    pub fn build_prune_iter(
684        &self,
685        time_range: FileTimeRange,
686        metrics: Option<MemScanMetrics>,
687    ) -> Result<BoxedBatchIterator> {
688        let iter = self.context.builder.build(metrics)?;
689        let time_filters = self.context.predicate.time_filters();
690        Ok(Box::new(PruneTimeIterator::new(
691            iter,
692            time_range,
693            time_filters,
694        )))
695    }
696
697    /// Builds an iterator to read all rows in range.
698    pub fn build_iter(&self) -> Result<BoxedBatchIterator> {
699        self.context.builder.build(None)
700    }
701
702    /// Builds a record batch iterator to read rows in range.
703    ///
704    /// For mutable memtables (adapter path), applies time-range pruning to ensure rows
705    /// outside the time range are filtered, matching the behavior of `build_prune_iter`.
706    pub fn build_record_batch_iter(
707        &self,
708        time_range: Option<FileTimeRange>,
709        metrics: Option<MemScanMetrics>,
710    ) -> Result<BoxedRecordBatchIterator> {
711        if self.context.builder.is_record_batch() {
712            return self.context.builder.build_record_batch(time_range, metrics);
713        }
714
715        if let Some(context) = self.context.batch_to_record_batch.as_ref() {
716            let iter = self.context.builder.build(metrics)?;
717            let iter: BoxedBatchIterator = if let Some(time_range) = time_range {
718                let time_filters = self.context.predicate.time_filters();
719                Box::new(PruneTimeIterator::new(iter, time_range, time_filters))
720            } else {
721                iter
722            };
723            return Ok(context.adapt_iter(iter));
724        }
725
726        UnsupportedOperationSnafu {
727            err_msg: "Record batch iterator is not supported by this memtable",
728        }
729        .fail()
730    }
731
732    /// Returns whether the iterator is a record batch iterator.
733    pub fn is_record_batch(&self) -> bool {
734        self.context.builder.is_record_batch()
735    }
736
737    pub fn num_rows(&self) -> usize {
738        self.stats.num_rows
739    }
740
741    /// Returns the encoded range if available.
742    pub fn encoded(&self) -> Option<EncodedRange> {
743        self.context.builder.encoded_range()
744    }
745}
746
747#[cfg(test)]
748mod tests {
749    use std::sync::Arc;
750
751    use super::*;
752    use crate::flush::{WriteBufferManager, WriteBufferManagerImpl};
753    use crate::memtable::bulk::BulkMemtableConfig;
754
755    #[test]
756    fn test_alloc_tracker_without_manager() {
757        let tracker = AllocTracker::new(None);
758        assert_eq!(0, tracker.bytes_allocated());
759        tracker.on_allocation(100);
760        assert_eq!(100, tracker.bytes_allocated());
761        tracker.on_allocation(200);
762        assert_eq!(300, tracker.bytes_allocated());
763
764        tracker.done_allocating();
765        assert_eq!(300, tracker.bytes_allocated());
766    }
767
768    #[test]
769    fn test_alloc_tracker_with_manager() {
770        let manager = Arc::new(WriteBufferManagerImpl::new(1000));
771        {
772            let tracker = AllocTracker::new(Some(manager.clone() as WriteBufferManagerRef));
773
774            tracker.on_allocation(100);
775            assert_eq!(100, tracker.bytes_allocated());
776            assert_eq!(100, manager.memory_usage());
777            assert_eq!(100, manager.mutable_usage());
778
779            for _ in 0..2 {
780                // Done allocating won't free the same memory multiple times.
781                tracker.done_allocating();
782                assert_eq!(100, manager.memory_usage());
783                assert_eq!(0, manager.mutable_usage());
784            }
785        }
786
787        assert_eq!(0, manager.memory_usage());
788        assert_eq!(0, manager.mutable_usage());
789    }
790
791    #[test]
792    fn test_alloc_tracker_without_done_allocating() {
793        let manager = Arc::new(WriteBufferManagerImpl::new(1000));
794        {
795            let tracker = AllocTracker::new(Some(manager.clone() as WriteBufferManagerRef));
796
797            tracker.on_allocation(100);
798            assert_eq!(100, tracker.bytes_allocated());
799            assert_eq!(100, manager.memory_usage());
800            assert_eq!(100, manager.mutable_usage());
801        }
802
803        assert_eq!(0, manager.memory_usage());
804        assert_eq!(0, manager.mutable_usage());
805    }
806
807    #[test]
808    fn test_forced_bulk_memtable_preserves_bulk_config() {
809        let provider = MemtableBuilderProvider::new(None, Arc::new(MitoConfig::default()));
810        let config = BulkMemtableConfig {
811            merge_threshold: 7,
812            encode_row_threshold: 11,
813            encode_bytes_threshold: 13,
814            max_merge_groups: 17,
815        };
816        let options = RegionOptions {
817            memtable: Some(MemtableOptions::Bulk(config.clone())),
818            primary_key_encoding: Some(PrimaryKeyEncoding::Sparse),
819            ..Default::default()
820        };
821
822        let builder =
823            provider.bulk_memtable_builder(options.need_dedup(), options.merge_mode(), &options);
824
825        assert_eq!(&config, builder.config());
826    }
827}