1pub(crate) mod chunk_reader;
18#[allow(unused)]
19pub mod context;
20#[allow(unused)]
21pub mod part;
22pub mod part_reader;
23mod row_group_reader;
24
25use std::collections::{BTreeMap, HashSet};
26use std::sync::atomic::{AtomicI64, AtomicU64, AtomicUsize, Ordering};
27use std::sync::{Arc, LazyLock, Mutex, RwLock};
28use std::time::Instant;
29
30fn env_usize(name: &str, default: usize) -> usize {
32 std::env::var(name)
33 .ok()
34 .and_then(|v| v.parse().ok())
35 .unwrap_or(default)
36}
37
38use common_time::Timestamp;
39use datatypes::arrow::datatypes::SchemaRef;
40use mito_codec::key_values::KeyValue;
41use rayon::prelude::*;
42use store_api::metadata::RegionMetadataRef;
43use store_api::storage::{ColumnId, FileId, RegionId, SequenceRange};
44use tokio::sync::Semaphore;
45
46use crate::error::{Result, UnsupportedOperationSnafu};
47use crate::flush::WriteBufferManagerRef;
48use crate::memtable::bulk::context::BulkIterContext;
49use crate::memtable::bulk::part::{
50 BulkPart, BulkPartEncodeMetrics, BulkPartEncoder, MultiBulkPart, UnorderedPart,
51};
52use crate::memtable::bulk::part_reader::BulkPartBatchIter;
53use crate::memtable::stats::WriteMetrics;
54use crate::memtable::{
55 AllocTracker, BoxedBatchIterator, BoxedRecordBatchIterator, EncodedBulkPart, EncodedRange,
56 IterBuilder, KeyValues, MemScanMetrics, Memtable, MemtableBuilder, MemtableId, MemtableRange,
57 MemtableRangeContext, MemtableRanges, MemtableRef, MemtableStats, RangesOptions,
58};
59use crate::read::flat_dedup::{FlatDedupIterator, FlatLastNonNull, FlatLastRow};
60use crate::read::flat_merge::FlatMergeIterator;
61use crate::region::options::MergeMode;
62use crate::sst::parquet::flat_format::field_column_start;
63use crate::sst::parquet::{DEFAULT_READ_BATCH_SIZE, DEFAULT_ROW_GROUP_SIZE};
64use crate::sst::{FlatSchemaOptions, to_flat_sst_arrow_schema};
65
66const DEFAULT_MERGE_THRESHOLD: usize = 16;
68
69static MERGE_THRESHOLD: LazyLock<usize> =
71 LazyLock::new(|| env_usize("GREPTIME_BULK_MERGE_THRESHOLD", DEFAULT_MERGE_THRESHOLD));
72
73const DEFAULT_MAX_MERGE_GROUPS: usize = 32;
75
76static MAX_MERGE_GROUPS: LazyLock<usize> =
78 LazyLock::new(|| env_usize("GREPTIME_BULK_MAX_MERGE_GROUPS", DEFAULT_MAX_MERGE_GROUPS));
79
80pub(crate) static ENCODE_ROW_THRESHOLD: LazyLock<usize> = LazyLock::new(|| {
83 env_usize(
84 "GREPTIME_BULK_ENCODE_ROW_THRESHOLD",
85 10 * DEFAULT_ROW_GROUP_SIZE,
86 )
87});
88
89const DEFAULT_ENCODE_BYTES_THRESHOLD: usize = 64 * 1024 * 1024;
91
92static ENCODE_BYTES_THRESHOLD: LazyLock<usize> = LazyLock::new(|| {
95 env_usize(
96 "GREPTIME_BULK_ENCODE_BYTES_THRESHOLD",
97 DEFAULT_ENCODE_BYTES_THRESHOLD,
98 )
99});
100
101#[derive(Debug, Clone)]
103pub struct BulkMemtableConfig {
104 pub merge_threshold: usize,
106 pub encode_row_threshold: usize,
108 pub encode_bytes_threshold: usize,
110 pub max_merge_groups: usize,
112}
113
114impl Default for BulkMemtableConfig {
115 fn default() -> Self {
116 Self {
117 merge_threshold: *MERGE_THRESHOLD,
118 encode_row_threshold: *ENCODE_ROW_THRESHOLD,
119 encode_bytes_threshold: *ENCODE_BYTES_THRESHOLD,
120 max_merge_groups: *MAX_MERGE_GROUPS,
121 }
122 }
123}
124
125enum MergedPart {
127 Multi(MultiBulkPart),
129 Encoded(EncodedBulkPart),
131}
132
133struct CollectedParts {
135 groups: Vec<Vec<PartToMerge>>,
137}
138
139#[derive(Default)]
141struct BulkParts {
142 unordered_part: UnorderedPart,
144 parts: Vec<BulkPartWrapper>,
146}
147
148impl BulkParts {
149 fn num_parts(&self) -> usize {
151 let unordered_count = if self.unordered_part.is_empty() { 0 } else { 1 };
152 self.parts.len() + unordered_count
153 }
154
155 fn is_empty(&self) -> bool {
157 self.unordered_part.is_empty() && self.parts.is_empty()
158 }
159
160 fn should_merge_parts(&self, merge_threshold: usize) -> bool {
163 let mut bulk_count = 0;
164 let mut encoded_count = 0;
165
166 for wrapper in &self.parts {
167 if wrapper.merging {
168 continue;
169 }
170
171 if wrapper.part.is_encoded() {
172 encoded_count += 1;
173 } else {
174 bulk_count += 1;
175 }
176
177 if bulk_count >= merge_threshold || encoded_count >= merge_threshold {
179 return true;
180 }
181 }
182
183 false
184 }
185
186 fn should_compact_unordered_part(&self) -> bool {
188 self.unordered_part.should_compact()
189 }
190
191 fn collect_parts_to_merge(
195 &mut self,
196 merge_threshold: usize,
197 max_merge_groups: usize,
198 ) -> CollectedParts {
199 let mut bulk_indices: Vec<(usize, usize)> = Vec::new();
201 let mut encoded_indices: Vec<(usize, usize)> = Vec::new();
202
203 for (idx, wrapper) in self.parts.iter().enumerate() {
204 if wrapper.merging {
205 continue;
206 }
207 let num_rows = wrapper.part.num_rows();
208 if wrapper.part.is_encoded() {
209 encoded_indices.push((idx, num_rows));
210 } else {
211 bulk_indices.push((idx, num_rows));
212 }
213 }
214
215 let mut groups = Vec::new();
216
217 if bulk_indices.len() >= merge_threshold {
219 groups.extend(self.collect_and_group_parts(
220 bulk_indices,
221 merge_threshold,
222 max_merge_groups,
223 ));
224 }
225
226 if encoded_indices.len() >= merge_threshold {
228 groups.extend(self.collect_and_group_parts(
229 encoded_indices,
230 merge_threshold,
231 max_merge_groups,
232 ));
233 }
234
235 CollectedParts { groups }
236 }
237
238 fn collect_and_group_parts(
240 &mut self,
241 mut indices: Vec<(usize, usize)>,
242 merge_threshold: usize,
243 max_merge_groups: usize,
244 ) -> Vec<Vec<PartToMerge>> {
245 if indices.is_empty() {
246 return Vec::new();
247 }
248
249 indices.sort_unstable_by_key(|(_, num_rows)| *num_rows);
251
252 indices
254 .chunks(merge_threshold)
255 .take(max_merge_groups)
256 .map(|chunk| {
257 chunk
258 .iter()
259 .map(|(idx, _)| {
260 let wrapper = &mut self.parts[*idx];
261 wrapper.merging = true;
262 wrapper.part.clone()
263 })
264 .collect()
265 })
266 .collect()
267 }
268
269 fn install_merged_parts<I>(
272 &mut self,
273 merged_parts: I,
274 merged_file_ids: &HashSet<FileId>,
275 ) -> usize
276 where
277 I: IntoIterator<Item = MergedPart>,
278 {
279 let mut total_output_rows = 0;
280
281 for merged_part in merged_parts {
282 match merged_part {
283 MergedPart::Encoded(encoded_part) => {
284 total_output_rows += encoded_part.metadata().num_rows;
285 self.parts.push(BulkPartWrapper {
286 part: PartToMerge::Encoded {
287 part: encoded_part,
288 file_id: FileId::random(),
289 },
290 merging: false,
291 });
292 }
293 MergedPart::Multi(multi_part) => {
294 total_output_rows += multi_part.num_rows();
295 self.parts.push(BulkPartWrapper {
296 part: PartToMerge::Multi {
297 part: multi_part,
298 file_id: FileId::random(),
299 },
300 merging: false,
301 });
302 }
303 }
304 }
305
306 self.parts
307 .retain(|wrapper| !merged_file_ids.contains(&wrapper.file_id()));
308
309 total_output_rows
310 }
311
312 fn reset_merging_flags(&mut self, file_ids: &HashSet<FileId>) {
315 for wrapper in &mut self.parts {
316 if file_ids.contains(&wrapper.file_id()) {
317 wrapper.merging = false;
318 }
319 }
320 }
321}
322
323struct MergingFlagsGuard<'a> {
326 bulk_parts: &'a RwLock<BulkParts>,
327 file_ids: &'a HashSet<FileId>,
328 success: bool,
329}
330
331impl<'a> MergingFlagsGuard<'a> {
332 fn new(bulk_parts: &'a RwLock<BulkParts>, file_ids: &'a HashSet<FileId>) -> Self {
334 Self {
335 bulk_parts,
336 file_ids,
337 success: false,
338 }
339 }
340
341 fn mark_success(&mut self) {
344 self.success = true;
345 }
346}
347
348impl<'a> Drop for MergingFlagsGuard<'a> {
349 fn drop(&mut self) {
350 if !self.success
351 && let Ok(mut parts) = self.bulk_parts.write()
352 {
353 parts.reset_merging_flags(self.file_ids);
354 }
355 }
356}
357
358pub struct BulkMemtable {
360 id: MemtableId,
361 config: BulkMemtableConfig,
363 parts: Arc<RwLock<BulkParts>>,
364 metadata: RegionMetadataRef,
365 alloc_tracker: AllocTracker,
366 max_timestamp: AtomicI64,
367 min_timestamp: AtomicI64,
368 max_sequence: AtomicU64,
369 num_rows: AtomicUsize,
370 flat_arrow_schema: SchemaRef,
372 compactor: Arc<Mutex<MemtableCompactor>>,
374 compact_dispatcher: Option<Arc<CompactDispatcher>>,
376 append_mode: bool,
378 merge_mode: MergeMode,
380}
381
382impl std::fmt::Debug for BulkMemtable {
383 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
384 f.debug_struct("BulkMemtable")
385 .field("id", &self.id)
386 .field("num_rows", &self.num_rows.load(Ordering::Relaxed))
387 .field("min_timestamp", &self.min_timestamp.load(Ordering::Relaxed))
388 .field("max_timestamp", &self.max_timestamp.load(Ordering::Relaxed))
389 .field("max_sequence", &self.max_sequence.load(Ordering::Relaxed))
390 .finish()
391 }
392}
393
394impl Memtable for BulkMemtable {
395 fn id(&self) -> MemtableId {
396 self.id
397 }
398
399 fn write(&self, _kvs: &KeyValues) -> Result<()> {
400 UnsupportedOperationSnafu {
401 err_msg: "write() is not supported for bulk memtable",
402 }
403 .fail()
404 }
405
406 fn write_one(&self, _key_value: KeyValue) -> Result<()> {
407 UnsupportedOperationSnafu {
408 err_msg: "write_one() is not supported for bulk memtable",
409 }
410 .fail()
411 }
412
413 fn write_bulk(&self, fragment: BulkPart) -> Result<()> {
414 let local_metrics = WriteMetrics {
415 key_bytes: 0,
416 value_bytes: fragment.estimated_size(),
417 min_ts: fragment.min_timestamp,
418 max_ts: fragment.max_timestamp,
419 num_rows: fragment.num_rows(),
420 max_sequence: fragment.sequence,
421 };
422
423 {
424 let mut bulk_parts = self.parts.write().unwrap();
425
426 if bulk_parts.unordered_part.should_accept(fragment.num_rows()) {
428 bulk_parts.unordered_part.push(fragment);
429
430 if bulk_parts.should_compact_unordered_part()
432 && let Some(bulk_part) = bulk_parts.unordered_part.to_bulk_part()?
433 {
434 bulk_parts.parts.push(BulkPartWrapper {
435 part: PartToMerge::Bulk {
436 part: bulk_part,
437 file_id: FileId::random(),
438 },
439 merging: false,
440 });
441 bulk_parts.unordered_part.clear();
442 }
443 } else {
444 bulk_parts.parts.push(BulkPartWrapper {
445 part: PartToMerge::Bulk {
446 part: fragment,
447 file_id: FileId::random(),
448 },
449 merging: false,
450 });
451 }
452
453 self.update_stats(local_metrics);
458 }
459
460 if self.should_compact() {
461 self.schedule_compact();
462 }
463
464 Ok(())
465 }
466
467 fn ranges(
468 &self,
469 projection: Option<&[ColumnId]>,
470 options: RangesOptions,
471 ) -> Result<MemtableRanges> {
472 let predicate = options.predicate;
473 let sequence = options.sequence;
474 let mut ranges = BTreeMap::new();
475 let mut range_id = 0;
476
477 let context = Arc::new(BulkIterContext::new_with_pre_filter_mode(
479 self.metadata.clone(),
480 projection,
481 predicate.predicate().cloned(),
482 options.for_flush,
483 options.pre_filter_mode,
484 )?);
485
486 {
488 let bulk_parts = self.parts.read().unwrap();
489
490 if !bulk_parts.unordered_part.is_empty()
492 && let Some(unordered_bulk_part) = bulk_parts.unordered_part.to_bulk_part()?
493 {
494 let part_stats = unordered_bulk_part.to_memtable_stats(&self.metadata);
495 let range = MemtableRange::new(
496 Arc::new(MemtableRangeContext::new(
497 self.id,
498 Box::new(BulkRangeIterBuilder {
499 part: unordered_bulk_part,
500 context: context.clone(),
501 sequence,
502 }),
503 predicate.clone(),
504 )),
505 part_stats,
506 );
507 ranges.insert(range_id, range);
508 range_id += 1;
509 }
510
511 for part_wrapper in bulk_parts.parts.iter() {
513 if part_wrapper.part.num_rows() == 0 {
515 continue;
516 }
517
518 let part_stats = part_wrapper.part.to_memtable_stats(&self.metadata);
519 let iter_builder: Box<dyn IterBuilder> = match &part_wrapper.part {
520 PartToMerge::Bulk { part, .. } => Box::new(BulkRangeIterBuilder {
521 part: part.clone(),
522 context: context.clone(),
523 sequence,
524 }),
525 PartToMerge::Multi { part, .. } => Box::new(MultiBulkRangeIterBuilder {
526 part: part.clone(),
527 context: context.clone(),
528 sequence,
529 }),
530 PartToMerge::Encoded { part, file_id } => {
531 Box::new(EncodedBulkRangeIterBuilder {
532 file_id: *file_id,
533 part: part.clone(),
534 context: context.clone(),
535 sequence,
536 })
537 }
538 };
539
540 let range = MemtableRange::new(
541 Arc::new(MemtableRangeContext::new(
542 self.id,
543 iter_builder,
544 predicate.clone(),
545 )),
546 part_stats,
547 );
548 ranges.insert(range_id, range);
549 range_id += 1;
550 }
551 }
552
553 Ok(MemtableRanges { ranges })
554 }
555
556 fn is_empty(&self) -> bool {
557 let bulk_parts = self.parts.read().unwrap();
558 bulk_parts.is_empty()
559 }
560
561 fn freeze(&self) -> Result<()> {
562 self.alloc_tracker.done_allocating();
563 Ok(())
564 }
565
566 fn stats(&self) -> MemtableStats {
567 let estimated_bytes = self.alloc_tracker.bytes_allocated();
568
569 if estimated_bytes == 0 || self.num_rows.load(Ordering::Relaxed) == 0 {
570 return MemtableStats {
571 estimated_bytes,
572 time_range: None,
573 num_rows: 0,
574 num_ranges: 0,
575 max_sequence: 0,
576 series_count: 0,
577 };
578 }
579
580 let ts_type = self
581 .metadata
582 .time_index_column()
583 .column_schema
584 .data_type
585 .clone()
586 .as_timestamp()
587 .expect("Timestamp column must have timestamp type");
588 let max_timestamp = ts_type.create_timestamp(self.max_timestamp.load(Ordering::Relaxed));
589 let min_timestamp = ts_type.create_timestamp(self.min_timestamp.load(Ordering::Relaxed));
590
591 let num_ranges = self.parts.read().unwrap().num_parts();
592
593 MemtableStats {
594 estimated_bytes,
595 time_range: Some((min_timestamp, max_timestamp)),
596 num_rows: self.num_rows.load(Ordering::Relaxed),
597 num_ranges,
598 max_sequence: self.max_sequence.load(Ordering::Relaxed),
599 series_count: self.estimated_series_count(),
600 }
601 }
602
603 fn fork(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef {
604 let flat_arrow_schema = to_flat_sst_arrow_schema(
606 metadata,
607 &FlatSchemaOptions::from_encoding(metadata.primary_key_encoding),
608 );
609
610 Arc::new(Self {
611 id,
612 config: self.config.clone(),
613 parts: Arc::new(RwLock::new(BulkParts::default())),
614 metadata: metadata.clone(),
615 alloc_tracker: AllocTracker::new(self.alloc_tracker.write_buffer_manager()),
616 max_timestamp: AtomicI64::new(i64::MIN),
617 min_timestamp: AtomicI64::new(i64::MAX),
618 max_sequence: AtomicU64::new(0),
619 num_rows: AtomicUsize::new(0),
620 flat_arrow_schema,
621 compactor: Arc::new(Mutex::new(MemtableCompactor::new(
622 metadata.region_id,
623 id,
624 self.config.clone(),
625 ))),
626 compact_dispatcher: self.compact_dispatcher.clone(),
627 append_mode: self.append_mode,
628 merge_mode: self.merge_mode,
629 })
630 }
631
632 fn compact(&self, for_flush: bool) -> Result<()> {
633 let mut compactor = self.compactor.lock().unwrap();
634
635 if for_flush {
636 return Ok(());
637 }
638
639 let should_merge = self
641 .parts
642 .read()
643 .unwrap()
644 .should_merge_parts(self.config.merge_threshold);
645 if should_merge {
646 compactor.merge_parts(
647 &self.flat_arrow_schema,
648 &self.parts,
649 &self.metadata,
650 !self.append_mode,
651 self.merge_mode,
652 )?;
653 }
654
655 Ok(())
656 }
657}
658
659impl BulkMemtable {
660 pub fn new(
662 id: MemtableId,
663 config: BulkMemtableConfig,
664 metadata: RegionMetadataRef,
665 write_buffer_manager: Option<WriteBufferManagerRef>,
666 compact_dispatcher: Option<Arc<CompactDispatcher>>,
667 append_mode: bool,
668 merge_mode: MergeMode,
669 ) -> Self {
670 let flat_arrow_schema = to_flat_sst_arrow_schema(
671 &metadata,
672 &FlatSchemaOptions::from_encoding(metadata.primary_key_encoding),
673 );
674
675 let region_id = metadata.region_id;
676 Self {
677 id,
678 config: config.clone(),
679 parts: Arc::new(RwLock::new(BulkParts::default())),
680 metadata,
681 alloc_tracker: AllocTracker::new(write_buffer_manager),
682 max_timestamp: AtomicI64::new(i64::MIN),
683 min_timestamp: AtomicI64::new(i64::MAX),
684 max_sequence: AtomicU64::new(0),
685 num_rows: AtomicUsize::new(0),
686 flat_arrow_schema,
687 compactor: Arc::new(Mutex::new(MemtableCompactor::new(region_id, id, config))),
688 compact_dispatcher,
689 append_mode,
690 merge_mode,
691 }
692 }
693
694 #[cfg(test)]
696 pub fn set_unordered_part_threshold(&self, threshold: usize) {
697 self.parts
698 .write()
699 .unwrap()
700 .unordered_part
701 .set_threshold(threshold);
702 }
703
704 #[cfg(test)]
706 pub fn set_unordered_part_compact_threshold(&self, compact_threshold: usize) {
707 self.parts
708 .write()
709 .unwrap()
710 .unordered_part
711 .set_compact_threshold(compact_threshold);
712 }
713
714 fn update_stats(&self, stats: WriteMetrics) {
718 self.alloc_tracker
719 .on_allocation(stats.key_bytes + stats.value_bytes);
720
721 self.max_timestamp
722 .fetch_max(stats.max_ts, Ordering::Relaxed);
723 self.min_timestamp
724 .fetch_min(stats.min_ts, Ordering::Relaxed);
725 self.max_sequence
726 .fetch_max(stats.max_sequence, Ordering::Relaxed);
727 self.num_rows.fetch_add(stats.num_rows, Ordering::Relaxed);
728 }
729
730 fn estimated_series_count(&self) -> usize {
732 let bulk_parts = self.parts.read().unwrap();
733 bulk_parts
734 .parts
735 .iter()
736 .map(|part_wrapper| part_wrapper.part.series_count())
737 .sum()
738 }
739
740 fn should_compact(&self) -> bool {
742 let parts = self.parts.read().unwrap();
743 parts.should_merge_parts(self.config.merge_threshold)
744 }
745
746 fn schedule_compact(&self) {
748 if let Some(dispatcher) = &self.compact_dispatcher {
749 let task = MemCompactTask {
750 metadata: self.metadata.clone(),
751 parts: self.parts.clone(),
752 config: self.config.clone(),
753 flat_arrow_schema: self.flat_arrow_schema.clone(),
754 compactor: self.compactor.clone(),
755 append_mode: self.append_mode,
756 merge_mode: self.merge_mode,
757 };
758
759 dispatcher.dispatch_compact(task);
760 } else {
761 if let Err(e) = self.compact(false) {
763 common_telemetry::error!(e; "Failed to compact table");
764 }
765 }
766 }
767}
768
769pub struct BulkRangeIterBuilder {
771 pub part: BulkPart,
772 pub context: Arc<BulkIterContext>,
773 pub sequence: Option<SequenceRange>,
774}
775
776struct MultiBulkRangeIterBuilder {
778 part: MultiBulkPart,
779 context: Arc<BulkIterContext>,
780 sequence: Option<SequenceRange>,
781}
782
783impl IterBuilder for BulkRangeIterBuilder {
784 fn build(&self, _metrics: Option<MemScanMetrics>) -> Result<BoxedBatchIterator> {
785 UnsupportedOperationSnafu {
786 err_msg: "BatchIterator is not supported for bulk memtable",
787 }
788 .fail()
789 }
790
791 fn is_record_batch(&self) -> bool {
792 true
793 }
794
795 fn build_record_batch(
796 &self,
797 _time_range: Option<(Timestamp, Timestamp)>,
798 metrics: Option<MemScanMetrics>,
799 ) -> Result<BoxedRecordBatchIterator> {
800 let series_count = self.part.estimated_series_count();
801 let iter = BulkPartBatchIter::from_single(
802 self.part.batch.clone(),
803 self.context.clone(),
804 self.sequence,
805 series_count,
806 metrics,
807 );
808
809 Ok(Box::new(iter))
810 }
811
812 fn encoded_range(&self) -> Option<EncodedRange> {
813 None
814 }
815}
816
817impl IterBuilder for MultiBulkRangeIterBuilder {
818 fn build(&self, _metrics: Option<MemScanMetrics>) -> Result<BoxedBatchIterator> {
819 UnsupportedOperationSnafu {
820 err_msg: "BatchIterator is not supported for multi bulk memtable",
821 }
822 .fail()
823 }
824
825 fn is_record_batch(&self) -> bool {
826 true
827 }
828
829 fn build_record_batch(
830 &self,
831 _time_range: Option<(Timestamp, Timestamp)>,
832 metrics: Option<MemScanMetrics>,
833 ) -> Result<BoxedRecordBatchIterator> {
834 self.part
835 .read(self.context.clone(), self.sequence, metrics)?
836 .ok_or_else(|| {
837 UnsupportedOperationSnafu {
838 err_msg: "Failed to create iterator for multi bulk part",
839 }
840 .build()
841 })
842 }
843
844 fn encoded_range(&self) -> Option<EncodedRange> {
845 None
846 }
847}
848
849struct EncodedBulkRangeIterBuilder {
851 file_id: FileId,
852 part: EncodedBulkPart,
853 context: Arc<BulkIterContext>,
854 sequence: Option<SequenceRange>,
855}
856
857impl IterBuilder for EncodedBulkRangeIterBuilder {
858 fn build(&self, _metrics: Option<MemScanMetrics>) -> Result<BoxedBatchIterator> {
859 UnsupportedOperationSnafu {
860 err_msg: "BatchIterator is not supported for encoded bulk memtable",
861 }
862 .fail()
863 }
864
865 fn is_record_batch(&self) -> bool {
866 true
867 }
868
869 fn build_record_batch(
870 &self,
871 _time_range: Option<(Timestamp, Timestamp)>,
872 metrics: Option<MemScanMetrics>,
873 ) -> Result<BoxedRecordBatchIterator> {
874 if let Some(iter) = self
875 .part
876 .read(self.context.clone(), self.sequence, metrics)?
877 {
878 Ok(iter)
879 } else {
880 Ok(Box::new(std::iter::empty()))
882 }
883 }
884
885 fn encoded_range(&self) -> Option<EncodedRange> {
886 Some(EncodedRange {
887 data: self.part.data().clone(),
888 sst_info: self.part.to_sst_info(self.file_id),
889 })
890 }
891}
892
893struct BulkPartWrapper {
894 part: PartToMerge,
896 merging: bool,
898}
899
900impl BulkPartWrapper {
901 fn file_id(&self) -> FileId {
903 self.part.file_id()
904 }
905}
906
907#[derive(Clone)]
909enum PartToMerge {
910 Bulk { part: BulkPart, file_id: FileId },
912 Multi {
914 part: MultiBulkPart,
915 file_id: FileId,
916 },
917 Encoded {
919 part: EncodedBulkPart,
920 file_id: FileId,
921 },
922}
923
924impl PartToMerge {
925 fn file_id(&self) -> FileId {
927 match self {
928 PartToMerge::Bulk { file_id, .. } => *file_id,
929 PartToMerge::Multi { file_id, .. } => *file_id,
930 PartToMerge::Encoded { file_id, .. } => *file_id,
931 }
932 }
933
934 fn min_timestamp(&self) -> i64 {
936 match self {
937 PartToMerge::Bulk { part, .. } => part.min_timestamp,
938 PartToMerge::Multi { part, .. } => part.min_timestamp(),
939 PartToMerge::Encoded { part, .. } => part.metadata().min_timestamp,
940 }
941 }
942
943 fn max_timestamp(&self) -> i64 {
945 match self {
946 PartToMerge::Bulk { part, .. } => part.max_timestamp,
947 PartToMerge::Multi { part, .. } => part.max_timestamp(),
948 PartToMerge::Encoded { part, .. } => part.metadata().max_timestamp,
949 }
950 }
951
952 fn num_rows(&self) -> usize {
954 match self {
955 PartToMerge::Bulk { part, .. } => part.num_rows(),
956 PartToMerge::Multi { part, .. } => part.num_rows(),
957 PartToMerge::Encoded { part, .. } => part.metadata().num_rows,
958 }
959 }
960
961 fn max_sequence(&self) -> u64 {
963 match self {
964 PartToMerge::Bulk { part, .. } => part.sequence,
965 PartToMerge::Multi { part, .. } => part.max_sequence(),
966 PartToMerge::Encoded { part, .. } => part.metadata().max_sequence,
967 }
968 }
969
970 fn series_count(&self) -> usize {
972 match self {
973 PartToMerge::Bulk { part, .. } => part.estimated_series_count(),
974 PartToMerge::Multi { part, .. } => part.series_count(),
975 PartToMerge::Encoded { part, .. } => part.metadata().num_series as usize,
976 }
977 }
978
979 fn is_encoded(&self) -> bool {
981 matches!(self, PartToMerge::Encoded { .. })
982 }
983
984 fn estimated_size(&self) -> usize {
986 match self {
987 PartToMerge::Bulk { part, .. } => part.estimated_size(),
988 PartToMerge::Multi { part, .. } => part.estimated_size(),
989 PartToMerge::Encoded { part, .. } => part.size_bytes(),
990 }
991 }
992
993 fn to_memtable_stats(&self, region_metadata: &RegionMetadataRef) -> MemtableStats {
995 match self {
996 PartToMerge::Bulk { part, .. } => part.to_memtable_stats(region_metadata),
997 PartToMerge::Multi { part, .. } => part.to_memtable_stats(region_metadata),
998 PartToMerge::Encoded { part, .. } => part.to_memtable_stats(),
999 }
1000 }
1001
1002 fn create_iterator(
1004 self,
1005 context: Arc<BulkIterContext>,
1006 ) -> Result<Option<BoxedRecordBatchIterator>> {
1007 match self {
1008 PartToMerge::Bulk { part, .. } => {
1009 let series_count = part.estimated_series_count();
1010 let iter = BulkPartBatchIter::from_single(
1011 part.batch,
1012 context,
1013 None, series_count,
1015 None, );
1017 Ok(Some(Box::new(iter) as BoxedRecordBatchIterator))
1018 }
1019 PartToMerge::Multi { part, .. } => part.read(context, None, None),
1020 PartToMerge::Encoded { part, .. } => part.read(context, None, None),
1021 }
1022 }
1023}
1024
1025struct MemtableCompactor {
1026 region_id: RegionId,
1027 memtable_id: MemtableId,
1028 config: BulkMemtableConfig,
1030}
1031
1032impl MemtableCompactor {
1033 fn new(region_id: RegionId, memtable_id: MemtableId, config: BulkMemtableConfig) -> Self {
1035 Self {
1036 region_id,
1037 memtable_id,
1038 config,
1039 }
1040 }
1041
1042 fn merge_parts(
1044 &mut self,
1045 arrow_schema: &SchemaRef,
1046 bulk_parts: &RwLock<BulkParts>,
1047 metadata: &RegionMetadataRef,
1048 dedup: bool,
1049 merge_mode: MergeMode,
1050 ) -> Result<()> {
1051 let start = Instant::now();
1052
1053 let collected = bulk_parts
1055 .write()
1056 .unwrap()
1057 .collect_parts_to_merge(self.config.merge_threshold, self.config.max_merge_groups);
1058
1059 if collected.groups.is_empty() {
1060 return Ok(());
1061 }
1062
1063 let merged_file_ids: HashSet<FileId> = collected
1065 .groups
1066 .iter()
1067 .flatten()
1068 .map(|part| part.file_id())
1069 .collect();
1070 let mut guard = MergingFlagsGuard::new(bulk_parts, &merged_file_ids);
1071
1072 let num_groups = collected.groups.len();
1073 let num_parts: usize = collected.groups.iter().map(|g| g.len()).sum();
1074
1075 let encode_row_threshold = self.config.encode_row_threshold;
1076 let encode_bytes_threshold = self.config.encode_bytes_threshold;
1077
1078 let merged_parts = collected
1080 .groups
1081 .into_par_iter()
1082 .map(|group| {
1083 Self::merge_parts_group(
1084 group,
1085 arrow_schema,
1086 metadata,
1087 dedup,
1088 merge_mode,
1089 encode_row_threshold,
1090 encode_bytes_threshold,
1091 )
1092 })
1093 .collect::<Result<Vec<Option<MergedPart>>>>()?;
1094
1095 let total_output_rows = {
1097 let mut parts = bulk_parts.write().unwrap();
1098 parts.install_merged_parts(merged_parts.into_iter().flatten(), &merged_file_ids)
1099 };
1100
1101 guard.mark_success();
1102
1103 common_telemetry::debug!(
1104 "BulkMemtable {} {} concurrent compact {} groups, {} parts, {} rows, cost: {:?}",
1105 self.region_id,
1106 self.memtable_id,
1107 num_groups,
1108 num_parts,
1109 total_output_rows,
1110 start.elapsed()
1111 );
1112
1113 Ok(())
1114 }
1115
1116 fn merge_parts_group(
1118 parts_to_merge: Vec<PartToMerge>,
1119 arrow_schema: &SchemaRef,
1120 metadata: &RegionMetadataRef,
1121 dedup: bool,
1122 merge_mode: MergeMode,
1123 encode_row_threshold: usize,
1124 encode_bytes_threshold: usize,
1125 ) -> Result<Option<MergedPart>> {
1126 if parts_to_merge.is_empty() {
1127 return Ok(None);
1128 }
1129
1130 let min_timestamp = parts_to_merge
1132 .iter()
1133 .map(|p| p.min_timestamp())
1134 .min()
1135 .unwrap_or(i64::MAX);
1136 let max_timestamp = parts_to_merge
1137 .iter()
1138 .map(|p| p.max_timestamp())
1139 .max()
1140 .unwrap_or(i64::MIN);
1141 let max_sequence = parts_to_merge
1142 .iter()
1143 .map(|p| p.max_sequence())
1144 .max()
1145 .unwrap_or(0);
1146
1147 let estimated_total_rows: usize = parts_to_merge.iter().map(|p| p.num_rows()).sum();
1149 let estimated_total_bytes: usize = parts_to_merge.iter().map(|p| p.estimated_size()).sum();
1150 let estimated_series_count = parts_to_merge
1151 .iter()
1152 .map(|p| p.series_count())
1153 .max()
1154 .unwrap_or(0);
1155
1156 let context = Arc::new(BulkIterContext::new(
1157 metadata.clone(),
1158 None, None, true,
1161 )?);
1162
1163 let iterators: Vec<BoxedRecordBatchIterator> = parts_to_merge
1165 .into_iter()
1166 .filter_map(|part| part.create_iterator(context.clone()).ok().flatten())
1167 .collect();
1168
1169 if iterators.is_empty() {
1170 return Ok(None);
1171 }
1172
1173 let merged_iter =
1174 FlatMergeIterator::new(arrow_schema.clone(), iterators, DEFAULT_READ_BATCH_SIZE)?;
1175
1176 let boxed_iter: BoxedRecordBatchIterator = if dedup {
1177 match merge_mode {
1179 MergeMode::LastRow => {
1180 let dedup_iter = FlatDedupIterator::new(merged_iter, FlatLastRow::new(false));
1181 Box::new(dedup_iter)
1182 }
1183 MergeMode::LastNonNull => {
1184 let field_column_start =
1185 field_column_start(metadata, arrow_schema.fields().len());
1186
1187 let dedup_iter = FlatDedupIterator::new(
1188 merged_iter,
1189 FlatLastNonNull::new(field_column_start, false),
1190 );
1191 Box::new(dedup_iter)
1192 }
1193 }
1194 } else {
1195 Box::new(merged_iter)
1196 };
1197
1198 if estimated_total_rows > encode_row_threshold
1200 || estimated_total_bytes > encode_bytes_threshold
1201 {
1202 let encoder = BulkPartEncoder::new(metadata.clone(), DEFAULT_ROW_GROUP_SIZE)?;
1203 let mut metrics = BulkPartEncodeMetrics::default();
1204 let encoded_part = encoder.encode_record_batch_iter(
1205 boxed_iter,
1206 arrow_schema.clone(),
1207 min_timestamp,
1208 max_timestamp,
1209 max_sequence,
1210 &mut metrics,
1211 )?;
1212
1213 common_telemetry::trace!("merge_parts_group metrics: {:?}", metrics);
1214
1215 Ok(encoded_part.map(MergedPart::Encoded))
1216 } else {
1217 let mut batches = Vec::new();
1219 let mut actual_total_rows = 0;
1220
1221 for batch_result in boxed_iter {
1222 let batch = batch_result?;
1223 actual_total_rows += batch.num_rows();
1224 batches.push(batch);
1225 }
1226
1227 if actual_total_rows == 0 {
1228 return Ok(None);
1229 }
1230
1231 let multi_part = MultiBulkPart::new(
1232 batches,
1233 min_timestamp,
1234 max_timestamp,
1235 max_sequence,
1236 estimated_series_count,
1237 );
1238
1239 common_telemetry::trace!(
1240 "merge_parts_group created MultiBulkPart: rows={}, batches={}",
1241 actual_total_rows,
1242 multi_part.num_batches()
1243 );
1244
1245 Ok(Some(MergedPart::Multi(multi_part)))
1246 }
1247 }
1248}
1249
1250struct MemCompactTask {
1252 metadata: RegionMetadataRef,
1253 parts: Arc<RwLock<BulkParts>>,
1254 config: BulkMemtableConfig,
1256 flat_arrow_schema: SchemaRef,
1258 compactor: Arc<Mutex<MemtableCompactor>>,
1260 append_mode: bool,
1262 merge_mode: MergeMode,
1264}
1265
1266impl MemCompactTask {
1267 fn compact(&self) -> Result<()> {
1268 let mut compactor = self.compactor.lock().unwrap();
1269
1270 let should_merge = self
1271 .parts
1272 .read()
1273 .unwrap()
1274 .should_merge_parts(self.config.merge_threshold);
1275 if should_merge {
1276 compactor.merge_parts(
1277 &self.flat_arrow_schema,
1278 &self.parts,
1279 &self.metadata,
1280 !self.append_mode,
1281 self.merge_mode,
1282 )?;
1283 }
1284
1285 Ok(())
1286 }
1287}
1288
1289#[derive(Debug)]
1291pub struct CompactDispatcher {
1292 semaphore: Arc<Semaphore>,
1293}
1294
1295impl CompactDispatcher {
1296 pub fn new(permits: usize) -> Self {
1298 Self {
1299 semaphore: Arc::new(Semaphore::new(permits)),
1300 }
1301 }
1302
1303 fn dispatch_compact(&self, task: MemCompactTask) {
1305 let semaphore = self.semaphore.clone();
1306 common_runtime::spawn_global(async move {
1307 let Ok(_permit) = semaphore.acquire().await else {
1308 return;
1309 };
1310
1311 common_runtime::spawn_blocking_global(move || {
1312 if let Err(e) = task.compact() {
1313 common_telemetry::error!(e; "Failed to compact memtable, region: {}", task.metadata.region_id);
1314 }
1315 });
1316 });
1317 }
1318}
1319
1320#[derive(Debug, Default)]
1322pub struct BulkMemtableBuilder {
1323 config: BulkMemtableConfig,
1325 write_buffer_manager: Option<WriteBufferManagerRef>,
1326 compact_dispatcher: Option<Arc<CompactDispatcher>>,
1327 append_mode: bool,
1328 merge_mode: MergeMode,
1329}
1330
1331impl BulkMemtableBuilder {
1332 pub fn new(
1334 write_buffer_manager: Option<WriteBufferManagerRef>,
1335 append_mode: bool,
1336 merge_mode: MergeMode,
1337 ) -> Self {
1338 Self {
1339 config: BulkMemtableConfig::default(),
1340 write_buffer_manager,
1341 compact_dispatcher: None,
1342 append_mode,
1343 merge_mode,
1344 }
1345 }
1346
1347 pub fn with_compact_dispatcher(mut self, compact_dispatcher: Arc<CompactDispatcher>) -> Self {
1349 self.compact_dispatcher = Some(compact_dispatcher);
1350 self
1351 }
1352}
1353
1354impl MemtableBuilder for BulkMemtableBuilder {
1355 fn build(&self, id: MemtableId, metadata: &RegionMetadataRef) -> MemtableRef {
1356 Arc::new(BulkMemtable::new(
1357 id,
1358 self.config.clone(),
1359 metadata.clone(),
1360 self.write_buffer_manager.clone(),
1361 self.compact_dispatcher.clone(),
1362 self.append_mode,
1363 self.merge_mode,
1364 ))
1365 }
1366
1367 fn use_bulk_insert(&self, _metadata: &RegionMetadataRef) -> bool {
1368 true
1369 }
1370}
1371
1372#[cfg(test)]
1373mod tests {
1374 use mito_codec::row_converter::build_primary_key_codec;
1375
1376 use super::*;
1377 use crate::memtable::bulk::part::BulkPartConverter;
1378 use crate::read::scan_region::PredicateGroup;
1379 use crate::sst::{FlatSchemaOptions, to_flat_sst_arrow_schema};
1380 use crate::test_util::memtable_util::{build_key_values_with_ts_seq_values, metadata_for_test};
1381
1382 fn create_bulk_part_with_converter(
1383 k0: &str,
1384 k1: u32,
1385 timestamps: Vec<i64>,
1386 values: Vec<Option<f64>>,
1387 sequence: u64,
1388 ) -> Result<BulkPart> {
1389 let metadata = metadata_for_test();
1390 let capacity = 100;
1391 let primary_key_codec = build_primary_key_codec(&metadata);
1392 let schema = to_flat_sst_arrow_schema(
1393 &metadata,
1394 &FlatSchemaOptions::from_encoding(metadata.primary_key_encoding),
1395 );
1396
1397 let mut converter =
1398 BulkPartConverter::new(&metadata, schema, capacity, primary_key_codec, true);
1399
1400 let key_values = build_key_values_with_ts_seq_values(
1401 &metadata,
1402 k0.to_string(),
1403 k1,
1404 timestamps.into_iter(),
1405 values.into_iter(),
1406 sequence,
1407 );
1408
1409 converter.append_key_values(&key_values)?;
1410 converter.convert()
1411 }
1412
1413 #[test]
1414 fn test_bulk_memtable_write_read() {
1415 let metadata = metadata_for_test();
1416 let memtable = BulkMemtable::new(
1417 999,
1418 BulkMemtableConfig::default(),
1419 metadata.clone(),
1420 None,
1421 None,
1422 false,
1423 MergeMode::LastRow,
1424 );
1425 memtable.set_unordered_part_threshold(0);
1427
1428 let test_data = [
1429 (
1430 "key_a",
1431 1u32,
1432 vec![1000i64, 2000i64],
1433 vec![Some(10.5), Some(20.5)],
1434 100u64,
1435 ),
1436 (
1437 "key_b",
1438 2u32,
1439 vec![1500i64, 2500i64],
1440 vec![Some(15.5), Some(25.5)],
1441 200u64,
1442 ),
1443 ("key_c", 3u32, vec![3000i64], vec![Some(30.5)], 300u64),
1444 ];
1445
1446 for (k0, k1, timestamps, values, seq) in test_data.iter() {
1447 let part =
1448 create_bulk_part_with_converter(k0, *k1, timestamps.clone(), values.clone(), *seq)
1449 .unwrap();
1450 memtable.write_bulk(part).unwrap();
1451 }
1452
1453 let stats = memtable.stats();
1454 assert_eq!(5, stats.num_rows);
1455 assert_eq!(3, stats.num_ranges);
1456 assert_eq!(300, stats.max_sequence);
1457
1458 let (min_ts, max_ts) = stats.time_range.unwrap();
1459 assert_eq!(1000, min_ts.value());
1460 assert_eq!(3000, max_ts.value());
1461
1462 let predicate_group = PredicateGroup::new(&metadata, &[]).unwrap();
1463 let ranges = memtable
1464 .ranges(
1465 None,
1466 RangesOptions::default().with_predicate(predicate_group),
1467 )
1468 .unwrap();
1469
1470 assert_eq!(3, ranges.ranges.len());
1471 let total_rows: usize = ranges.ranges.values().map(|r| r.stats().num_rows()).sum();
1472 assert_eq!(5, total_rows);
1473
1474 for (_range_id, range) in ranges.ranges.iter() {
1475 assert!(range.num_rows() > 0);
1476 assert!(range.is_record_batch());
1477
1478 let record_batch_iter = range.build_record_batch_iter(None, None).unwrap();
1479
1480 let mut total_rows = 0;
1481 for batch_result in record_batch_iter {
1482 let batch = batch_result.unwrap();
1483 total_rows += batch.num_rows();
1484 assert!(batch.num_rows() > 0);
1485 assert_eq!(8, batch.num_columns());
1486 }
1487 assert_eq!(total_rows, range.num_rows());
1488 }
1489 }
1490
1491 #[test]
1492 fn test_bulk_memtable_ranges_with_projection() {
1493 let metadata = metadata_for_test();
1494 let memtable = BulkMemtable::new(
1495 111,
1496 BulkMemtableConfig::default(),
1497 metadata.clone(),
1498 None,
1499 None,
1500 false,
1501 MergeMode::LastRow,
1502 );
1503
1504 let bulk_part = create_bulk_part_with_converter(
1505 "projection_test",
1506 5,
1507 vec![5000, 6000, 7000],
1508 vec![Some(50.0), Some(60.0), Some(70.0)],
1509 500,
1510 )
1511 .unwrap();
1512
1513 memtable.write_bulk(bulk_part).unwrap();
1514
1515 let projection = vec![4u32];
1516 let predicate_group = PredicateGroup::new(&metadata, &[]).unwrap();
1517 let ranges = memtable
1518 .ranges(
1519 Some(&projection),
1520 RangesOptions::default().with_predicate(predicate_group),
1521 )
1522 .unwrap();
1523
1524 assert_eq!(1, ranges.ranges.len());
1525 let range = ranges.ranges.get(&0).unwrap();
1526
1527 assert!(range.is_record_batch());
1528 let record_batch_iter = range.build_record_batch_iter(None, None).unwrap();
1529
1530 let mut total_rows = 0;
1531 for batch_result in record_batch_iter {
1532 let batch = batch_result.unwrap();
1533 assert!(batch.num_rows() > 0);
1534 assert_eq!(5, batch.num_columns());
1535 total_rows += batch.num_rows();
1536 }
1537 assert_eq!(3, total_rows);
1538 }
1539
1540 #[test]
1541 fn test_bulk_memtable_unsupported_operations() {
1542 let metadata = metadata_for_test();
1543 let memtable = BulkMemtable::new(
1544 111,
1545 BulkMemtableConfig::default(),
1546 metadata.clone(),
1547 None,
1548 None,
1549 false,
1550 MergeMode::LastRow,
1551 );
1552
1553 let key_values = build_key_values_with_ts_seq_values(
1554 &metadata,
1555 "test".to_string(),
1556 1,
1557 vec![1000].into_iter(),
1558 vec![Some(1.0)].into_iter(),
1559 1,
1560 );
1561
1562 let err = memtable.write(&key_values).unwrap_err();
1563 assert!(err.to_string().contains("not supported"));
1564
1565 let kv = key_values.iter().next().unwrap();
1566 let err = memtable.write_one(kv).unwrap_err();
1567 assert!(err.to_string().contains("not supported"));
1568 }
1569
1570 #[test]
1571 fn test_bulk_memtable_freeze() {
1572 let metadata = metadata_for_test();
1573 let memtable = BulkMemtable::new(
1574 222,
1575 BulkMemtableConfig::default(),
1576 metadata.clone(),
1577 None,
1578 None,
1579 false,
1580 MergeMode::LastRow,
1581 );
1582
1583 let bulk_part = create_bulk_part_with_converter(
1584 "freeze_test",
1585 10,
1586 vec![10000],
1587 vec![Some(100.0)],
1588 1000,
1589 )
1590 .unwrap();
1591
1592 memtable.write_bulk(bulk_part).unwrap();
1593 memtable.freeze().unwrap();
1594
1595 let stats_after_freeze = memtable.stats();
1596 assert_eq!(1, stats_after_freeze.num_rows);
1597 }
1598
1599 #[test]
1600 fn test_bulk_memtable_fork() {
1601 let metadata = metadata_for_test();
1602 let original_memtable = BulkMemtable::new(
1603 333,
1604 BulkMemtableConfig::default(),
1605 metadata.clone(),
1606 None,
1607 None,
1608 false,
1609 MergeMode::LastRow,
1610 );
1611
1612 let bulk_part =
1613 create_bulk_part_with_converter("fork_test", 15, vec![15000], vec![Some(150.0)], 1500)
1614 .unwrap();
1615
1616 original_memtable.write_bulk(bulk_part).unwrap();
1617
1618 let forked_memtable = original_memtable.fork(444, &metadata);
1619
1620 assert_eq!(forked_memtable.id(), 444);
1621 assert!(forked_memtable.is_empty());
1622 assert_eq!(0, forked_memtable.stats().num_rows);
1623
1624 assert!(!original_memtable.is_empty());
1625 assert_eq!(1, original_memtable.stats().num_rows);
1626 }
1627
1628 #[test]
1629 fn test_bulk_memtable_ranges_multiple_parts() {
1630 let metadata = metadata_for_test();
1631 let memtable = BulkMemtable::new(
1632 777,
1633 BulkMemtableConfig::default(),
1634 metadata.clone(),
1635 None,
1636 None,
1637 false,
1638 MergeMode::LastRow,
1639 );
1640 memtable.set_unordered_part_threshold(0);
1642
1643 let parts_data = vec![
1644 (
1645 "part1",
1646 1u32,
1647 vec![1000i64, 1100i64],
1648 vec![Some(10.0), Some(11.0)],
1649 100u64,
1650 ),
1651 (
1652 "part2",
1653 2u32,
1654 vec![2000i64, 2100i64],
1655 vec![Some(20.0), Some(21.0)],
1656 200u64,
1657 ),
1658 ("part3", 3u32, vec![3000i64], vec![Some(30.0)], 300u64),
1659 ];
1660
1661 for (k0, k1, timestamps, values, seq) in parts_data {
1662 let part = create_bulk_part_with_converter(k0, k1, timestamps, values, seq).unwrap();
1663 memtable.write_bulk(part).unwrap();
1664 }
1665
1666 let predicate_group = PredicateGroup::new(&metadata, &[]).unwrap();
1667 let ranges = memtable
1668 .ranges(
1669 None,
1670 RangesOptions::default().with_predicate(predicate_group),
1671 )
1672 .unwrap();
1673
1674 assert_eq!(3, ranges.ranges.len());
1675 let total_rows: usize = ranges.ranges.values().map(|r| r.stats().num_rows()).sum();
1676 assert_eq!(5, total_rows);
1677 assert_eq!(3, ranges.ranges.len());
1678
1679 for (range_id, range) in ranges.ranges.iter() {
1680 assert!(*range_id < 3);
1681 assert!(range.num_rows() > 0);
1682 assert!(range.is_record_batch());
1683 }
1684 }
1685
1686 #[test]
1687 fn test_bulk_memtable_ranges_with_sequence_filter() {
1688 let metadata = metadata_for_test();
1689 let memtable = BulkMemtable::new(
1690 888,
1691 BulkMemtableConfig::default(),
1692 metadata.clone(),
1693 None,
1694 None,
1695 false,
1696 MergeMode::LastRow,
1697 );
1698
1699 let part = create_bulk_part_with_converter(
1700 "seq_test",
1701 1,
1702 vec![1000, 2000, 3000],
1703 vec![Some(10.0), Some(20.0), Some(30.0)],
1704 500,
1705 )
1706 .unwrap();
1707
1708 memtable.write_bulk(part).unwrap();
1709
1710 let predicate_group = PredicateGroup::new(&metadata, &[]).unwrap();
1711 let sequence_filter = Some(SequenceRange::LtEq { max: 400 }); let ranges = memtable
1713 .ranges(
1714 None,
1715 RangesOptions::default()
1716 .with_predicate(predicate_group)
1717 .with_sequence(sequence_filter),
1718 )
1719 .unwrap();
1720
1721 assert_eq!(1, ranges.ranges.len());
1722 let range = ranges.ranges.get(&0).unwrap();
1723
1724 let mut record_batch_iter = range.build_record_batch_iter(None, None).unwrap();
1725 assert!(record_batch_iter.next().is_none());
1726 }
1727
1728 #[test]
1729 fn test_bulk_memtable_ranges_with_encoded_parts() {
1730 let metadata = metadata_for_test();
1731 let config = BulkMemtableConfig {
1732 merge_threshold: 8,
1733 ..Default::default()
1734 };
1735 let memtable = BulkMemtable::new(
1736 999,
1737 config,
1738 metadata.clone(),
1739 None,
1740 None,
1741 false,
1742 MergeMode::LastRow,
1743 );
1744 memtable.set_unordered_part_threshold(0);
1746
1747 for i in 0..10 {
1749 let part = create_bulk_part_with_converter(
1750 &format!("key_{}", i),
1751 i,
1752 vec![1000 + i as i64 * 100],
1753 vec![Some(i as f64 * 10.0)],
1754 100 + i as u64,
1755 )
1756 .unwrap();
1757 memtable.write_bulk(part).unwrap();
1758 }
1759
1760 memtable.compact(false).unwrap();
1761
1762 let predicate_group = PredicateGroup::new(&metadata, &[]).unwrap();
1763 let ranges = memtable
1764 .ranges(
1765 None,
1766 RangesOptions::default().with_predicate(predicate_group),
1767 )
1768 .unwrap();
1769
1770 assert_eq!(3, ranges.ranges.len());
1772 let total_rows: usize = ranges.ranges.values().map(|r| r.stats().num_rows()).sum();
1773 assert_eq!(10, total_rows);
1774
1775 for (_range_id, range) in ranges.ranges.iter() {
1776 assert!(range.num_rows() > 0);
1777 assert!(range.is_record_batch());
1778
1779 let record_batch_iter = range.build_record_batch_iter(None, None).unwrap();
1780 let mut total_rows = 0;
1781 for batch_result in record_batch_iter {
1782 let batch = batch_result.unwrap();
1783 total_rows += batch.num_rows();
1784 assert!(batch.num_rows() > 0);
1785 }
1786 assert_eq!(total_rows, range.num_rows());
1787 }
1788 }
1789
1790 #[test]
1791 fn test_bulk_memtable_unordered_part() {
1792 let metadata = metadata_for_test();
1793 let memtable = BulkMemtable::new(
1794 1001,
1795 BulkMemtableConfig::default(),
1796 metadata.clone(),
1797 None,
1798 None,
1799 false,
1800 MergeMode::LastRow,
1801 );
1802
1803 memtable.set_unordered_part_threshold(5);
1806 memtable.set_unordered_part_compact_threshold(10);
1808
1809 for i in 0..3 {
1811 let part = create_bulk_part_with_converter(
1812 &format!("key_{}", i),
1813 i,
1814 vec![1000 + i as i64 * 100, 1100 + i as i64 * 100],
1815 vec![Some(i as f64 * 10.0), Some(i as f64 * 10.0 + 1.0)],
1816 100 + i as u64,
1817 )
1818 .unwrap();
1819 assert_eq!(2, part.num_rows());
1820 memtable.write_bulk(part).unwrap();
1821 }
1822
1823 let stats = memtable.stats();
1825 assert_eq!(6, stats.num_rows);
1826
1827 for i in 3..5 {
1830 let part = create_bulk_part_with_converter(
1831 &format!("key_{}", i),
1832 i,
1833 vec![1000 + i as i64 * 100, 1100 + i as i64 * 100],
1834 vec![Some(i as f64 * 10.0), Some(i as f64 * 10.0 + 1.0)],
1835 100 + i as u64,
1836 )
1837 .unwrap();
1838 memtable.write_bulk(part).unwrap();
1839 }
1840
1841 let stats = memtable.stats();
1843 assert_eq!(10, stats.num_rows);
1844
1845 let predicate_group = PredicateGroup::new(&metadata, &[]).unwrap();
1847 let ranges = memtable
1848 .ranges(
1849 None,
1850 RangesOptions::default().with_predicate(predicate_group),
1851 )
1852 .unwrap();
1853
1854 assert!(!ranges.ranges.is_empty());
1856 let total_rows: usize = ranges.ranges.values().map(|r| r.stats().num_rows()).sum();
1857 assert_eq!(10, total_rows);
1858
1859 let mut total_rows_read = 0;
1861 for (_range_id, range) in ranges.ranges.iter() {
1862 assert!(range.is_record_batch());
1863 let record_batch_iter = range.build_record_batch_iter(None, None).unwrap();
1864
1865 for batch_result in record_batch_iter {
1866 let batch = batch_result.unwrap();
1867 total_rows_read += batch.num_rows();
1868 }
1869 }
1870 assert_eq!(10, total_rows_read);
1871 }
1872
1873 #[test]
1874 fn test_bulk_memtable_unordered_part_mixed_sizes() {
1875 let metadata = metadata_for_test();
1876 let memtable = BulkMemtable::new(
1877 1002,
1878 BulkMemtableConfig::default(),
1879 metadata.clone(),
1880 None,
1881 None,
1882 false,
1883 MergeMode::LastRow,
1884 );
1885
1886 memtable.set_unordered_part_threshold(4);
1888 memtable.set_unordered_part_compact_threshold(8);
1889
1890 for i in 0..2 {
1892 let part = create_bulk_part_with_converter(
1893 &format!("small_{}", i),
1894 i,
1895 vec![1000 + i as i64, 2000 + i as i64, 3000 + i as i64],
1896 vec![Some(i as f64), Some(i as f64 + 1.0), Some(i as f64 + 2.0)],
1897 10 + i as u64,
1898 )
1899 .unwrap();
1900 assert_eq!(3, part.num_rows());
1901 memtable.write_bulk(part).unwrap();
1902 }
1903
1904 let large_part = create_bulk_part_with_converter(
1906 "large_key",
1907 100,
1908 vec![5000, 6000, 7000, 8000, 9000],
1909 vec![
1910 Some(100.0),
1911 Some(101.0),
1912 Some(102.0),
1913 Some(103.0),
1914 Some(104.0),
1915 ],
1916 50,
1917 )
1918 .unwrap();
1919 assert_eq!(5, large_part.num_rows());
1920 memtable.write_bulk(large_part).unwrap();
1921
1922 let part = create_bulk_part_with_converter(
1924 "small_2",
1925 2,
1926 vec![4000, 4100],
1927 vec![Some(20.0), Some(21.0)],
1928 30,
1929 )
1930 .unwrap();
1931 memtable.write_bulk(part).unwrap();
1932
1933 let stats = memtable.stats();
1934 assert_eq!(13, stats.num_rows); let predicate_group = PredicateGroup::new(&metadata, &[]).unwrap();
1938 let ranges = memtable
1939 .ranges(
1940 None,
1941 RangesOptions::default().with_predicate(predicate_group),
1942 )
1943 .unwrap();
1944
1945 let total_rows: usize = ranges.ranges.values().map(|r| r.stats().num_rows()).sum();
1946 assert_eq!(13, total_rows);
1947
1948 let mut total_rows_read = 0;
1949 for (_range_id, range) in ranges.ranges.iter() {
1950 let record_batch_iter = range.build_record_batch_iter(None, None).unwrap();
1951 for batch_result in record_batch_iter {
1952 let batch = batch_result.unwrap();
1953 total_rows_read += batch.num_rows();
1954 }
1955 }
1956 assert_eq!(13, total_rows_read);
1957 }
1958
1959 #[test]
1960 fn test_bulk_memtable_unordered_part_with_ranges() {
1961 let metadata = metadata_for_test();
1962 let memtable = BulkMemtable::new(
1963 1003,
1964 BulkMemtableConfig::default(),
1965 metadata.clone(),
1966 None,
1967 None,
1968 false,
1969 MergeMode::LastRow,
1970 );
1971
1972 memtable.set_unordered_part_threshold(3);
1974 memtable.set_unordered_part_compact_threshold(100); for i in 0..3 {
1978 let part = create_bulk_part_with_converter(
1979 &format!("key_{}", i),
1980 i,
1981 vec![1000 + i as i64 * 100],
1982 vec![Some(i as f64 * 10.0)],
1983 100 + i as u64,
1984 )
1985 .unwrap();
1986 assert_eq!(1, part.num_rows());
1987 memtable.write_bulk(part).unwrap();
1988 }
1989
1990 let stats = memtable.stats();
1991 assert_eq!(3, stats.num_rows);
1992
1993 let predicate_group = PredicateGroup::new(&metadata, &[]).unwrap();
1995 let ranges = memtable
1996 .ranges(
1997 None,
1998 RangesOptions::default().with_predicate(predicate_group),
1999 )
2000 .unwrap();
2001
2002 assert_eq!(1, ranges.ranges.len());
2004 let total_rows: usize = ranges.ranges.values().map(|r| r.stats().num_rows()).sum();
2005 assert_eq!(3, total_rows);
2006
2007 let range = ranges.ranges.get(&0).unwrap();
2009 let record_batch_iter = range.build_record_batch_iter(None, None).unwrap();
2010
2011 let mut total_rows = 0;
2012 for batch_result in record_batch_iter {
2013 let batch = batch_result.unwrap();
2014 total_rows += batch.num_rows();
2015 assert!(batch.num_rows() > 0);
2017 }
2018 assert_eq!(3, total_rows);
2019 }
2020
2021 fn create_bulk_part_wrapper(part: BulkPart) -> BulkPartWrapper {
2023 BulkPartWrapper {
2024 part: PartToMerge::Bulk {
2025 part,
2026 file_id: FileId::random(),
2027 },
2028 merging: false,
2029 }
2030 }
2031
2032 #[test]
2033 fn test_should_merge_parts_below_threshold() {
2034 let mut bulk_parts = BulkParts::default();
2035
2036 for i in 0..DEFAULT_MERGE_THRESHOLD - 1 {
2038 let part = create_bulk_part_with_converter(
2039 &format!("key_{}", i),
2040 i as u32,
2041 vec![1000 + i as i64 * 100],
2042 vec![Some(i as f64 * 10.0)],
2043 100 + i as u64,
2044 )
2045 .unwrap();
2046 bulk_parts.parts.push(create_bulk_part_wrapper(part));
2047 }
2048
2049 assert!(!bulk_parts.should_merge_parts(DEFAULT_MERGE_THRESHOLD));
2051 }
2052
2053 #[test]
2054 fn test_should_merge_parts_at_threshold() {
2055 let mut bulk_parts = BulkParts::default();
2056 let merge_threshold = 8;
2057
2058 for i in 0..merge_threshold {
2060 let part = create_bulk_part_with_converter(
2061 &format!("key_{}", i),
2062 i as u32,
2063 vec![1000 + i as i64 * 100],
2064 vec![Some(i as f64 * 10.0)],
2065 100 + i as u64,
2066 )
2067 .unwrap();
2068 bulk_parts.parts.push(create_bulk_part_wrapper(part));
2069 }
2070
2071 assert!(bulk_parts.should_merge_parts(merge_threshold));
2073 }
2074
2075 #[test]
2076 fn test_should_merge_parts_with_merging_flag() {
2077 let mut bulk_parts = BulkParts::default();
2078 let merge_threshold = 8;
2079
2080 for i in 0..10 {
2082 let part = create_bulk_part_with_converter(
2083 &format!("key_{}", i),
2084 i as u32,
2085 vec![1000 + i as i64 * 100],
2086 vec![Some(i as f64 * 10.0)],
2087 100 + i as u64,
2088 )
2089 .unwrap();
2090 bulk_parts.parts.push(create_bulk_part_wrapper(part));
2091 }
2092
2093 assert!(bulk_parts.should_merge_parts(merge_threshold));
2095
2096 for wrapper in bulk_parts.parts.iter_mut().take(3) {
2098 wrapper.merging = true;
2099 }
2100
2101 assert!(!bulk_parts.should_merge_parts(merge_threshold));
2103 }
2104
2105 #[test]
2106 fn test_collect_parts_to_merge_grouping() {
2107 let mut bulk_parts = BulkParts::default();
2108
2109 for i in 0..16 {
2111 let num_rows = (i % 4) + 1; let timestamps: Vec<i64> = (0..num_rows)
2113 .map(|j| 1000 + i as i64 * 100 + j as i64)
2114 .collect();
2115 let values: Vec<Option<f64>> =
2116 (0..num_rows).map(|j| Some((i * 10 + j) as f64)).collect();
2117 let part = create_bulk_part_with_converter(
2118 &format!("key_{}", i),
2119 i as u32,
2120 timestamps,
2121 values,
2122 100 + i as u64,
2123 )
2124 .unwrap();
2125 bulk_parts.parts.push(create_bulk_part_wrapper(part));
2126 }
2127
2128 assert!(bulk_parts.should_merge_parts(DEFAULT_MERGE_THRESHOLD));
2130
2131 let collected =
2133 bulk_parts.collect_parts_to_merge(DEFAULT_MERGE_THRESHOLD, DEFAULT_MAX_MERGE_GROUPS);
2134
2135 assert!(!collected.groups.is_empty());
2137
2138 for group in &collected.groups {
2140 assert!(!group.is_empty());
2141 }
2142
2143 let total_parts: usize = collected.groups.iter().map(|g| g.len()).sum();
2145 assert_eq!(16, total_parts);
2146 }
2147
2148 #[test]
2149 fn test_bulk_memtable_ranges_with_multi_bulk_part() {
2150 let metadata = metadata_for_test();
2151 let merge_threshold = 8;
2152 let config = BulkMemtableConfig {
2153 merge_threshold,
2154 ..Default::default()
2155 };
2156 let memtable = BulkMemtable::new(
2157 2005,
2158 config,
2159 metadata.clone(),
2160 None,
2161 None,
2162 false,
2163 MergeMode::LastRow,
2164 );
2165 memtable.set_unordered_part_threshold(0);
2167
2168 for i in 0..merge_threshold {
2172 let part = create_bulk_part_with_converter(
2173 &format!("key_{}", i),
2174 i as u32,
2175 vec![1000 + i as i64 * 100, 2000 + i as i64 * 100],
2176 vec![Some(i as f64 * 10.0), Some(i as f64 * 10.0 + 1.0)],
2177 100 + i as u64,
2178 )
2179 .unwrap();
2180 memtable.write_bulk(part).unwrap();
2181 }
2182
2183 memtable.compact(false).unwrap();
2185
2186 let predicate_group = PredicateGroup::new(&metadata, &[]).unwrap();
2188 let ranges = memtable
2189 .ranges(
2190 None,
2191 RangesOptions::default().with_predicate(predicate_group),
2192 )
2193 .unwrap();
2194
2195 assert_eq!(1, ranges.ranges.len());
2196 let expected_rows = merge_threshold * 2; let total_rows: usize = ranges.ranges.values().map(|r| r.stats().num_rows()).sum();
2198 assert_eq!(expected_rows, total_rows);
2199
2200 let mut total_rows_read = 0;
2202 for (_range_id, range) in ranges.ranges.iter() {
2203 assert!(range.is_record_batch());
2204 let record_batch_iter = range.build_record_batch_iter(None, None).unwrap();
2205
2206 for batch_result in record_batch_iter {
2207 let batch = batch_result.unwrap();
2208 total_rows_read += batch.num_rows();
2209 }
2210 }
2211 assert_eq!(expected_rows, total_rows_read);
2212 }
2213}