1use std::num::NonZero;
16use std::sync::Arc;
17use std::time::Duration;
18
19use common_base::cancellation::{CancellableFuture, CancellationHandle};
20use common_meta::key::SchemaMetadataManagerRef;
21use common_telemetry::{debug, info, warn};
22use common_time::TimeToLive;
23use either::Either;
24use itertools::Itertools;
25use object_store::manager::ObjectStoreManagerRef;
26use partition::expr::PartitionExpr;
27use serde::{Deserialize, Serialize};
28use snafu::{OptionExt, ResultExt};
29use store_api::metadata::RegionMetadataRef;
30use store_api::region_request::PathType;
31use store_api::storage::RegionId;
32
33use crate::access_layer::{
34 AccessLayer, AccessLayerRef, Metrics, OperationType, SstWriteRequest, WriteType,
35};
36use crate::cache::{CacheManager, CacheManagerRef};
37use crate::compaction::picker::PickerOutput;
38use crate::compaction::{CompactionOutput, CompactionSstReaderBuilder, find_dynamic_options};
39use crate::config::MitoConfig;
40use crate::error;
41use crate::error::{
42 EmptyRegionDirSnafu, InvalidPartitionExprSnafu, ObjectStoreNotFoundSnafu, Result,
43};
44use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
45use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
46use crate::region::options::RegionOptions;
47use crate::region::version::VersionRef;
48use crate::region::{ManifestContext, RegionLeaderState, RegionRoleState};
49use crate::schedule::scheduler::LocalScheduler;
50use crate::sst::FormatType;
51use crate::sst::file::FileMeta;
52use crate::sst::file_purger::LocalFilePurger;
53use crate::sst::index::intermediate::IntermediateManager;
54use crate::sst::index::puffin_manager::PuffinManagerFactory;
55use crate::sst::location::region_dir_from_table_dir;
56use crate::sst::parquet::WriteOptions;
57use crate::sst::parquet::metadata::extract_primary_key_range;
58use crate::sst::version::{SstVersion, SstVersionRef};
59
60#[derive(Clone)]
62pub struct CompactionVersion {
63 pub(crate) metadata: RegionMetadataRef,
68 pub(crate) options: RegionOptions,
70 pub(crate) ssts: SstVersionRef,
72 pub(crate) compaction_time_window: Option<Duration>,
74}
75
76impl From<VersionRef> for CompactionVersion {
77 fn from(value: VersionRef) -> Self {
78 Self {
79 metadata: value.metadata.clone(),
80 options: value.options.clone(),
81 ssts: value.ssts.clone(),
82 compaction_time_window: value.compaction_time_window,
83 }
84 }
85}
86
87#[derive(Clone)]
90pub struct CompactionRegion {
91 pub region_id: RegionId,
92 pub region_options: RegionOptions,
93
94 pub(crate) engine_config: Arc<MitoConfig>,
95 pub(crate) region_metadata: RegionMetadataRef,
96 pub(crate) cache_manager: CacheManagerRef,
97 pub access_layer: AccessLayerRef,
99 pub(crate) manifest_ctx: Arc<ManifestContext>,
100 pub(crate) current_version: CompactionVersion,
101 pub(crate) file_purger: Option<Arc<LocalFilePurger>>,
102 pub(crate) ttl: Option<TimeToLive>,
103
104 pub max_parallelism: usize,
109}
110
111#[derive(Debug, Clone)]
113pub struct OpenCompactionRegionRequest {
114 pub region_id: RegionId,
115 pub table_dir: String,
116 pub path_type: PathType,
117 pub region_options: RegionOptions,
118 pub max_parallelism: usize,
119}
120
121pub async fn open_compaction_region(
124 req: &OpenCompactionRegionRequest,
125 mito_config: &MitoConfig,
126 object_store_manager: ObjectStoreManagerRef,
127 ttl_provider: Either<TimeToLive, SchemaMetadataManagerRef>,
128) -> Result<CompactionRegion> {
129 let object_store = {
130 let name = &req.region_options.storage;
131 if let Some(name) = name {
132 object_store_manager
133 .find(name)
134 .with_context(|| ObjectStoreNotFoundSnafu {
135 object_store: name.clone(),
136 })?
137 } else {
138 object_store_manager.default_object_store()
139 }
140 };
141
142 let access_layer = {
143 let puffin_manager_factory = PuffinManagerFactory::new(
144 &mito_config.index.aux_path,
145 mito_config.index.staging_size.as_bytes(),
146 Some(mito_config.index.write_buffer_size.as_bytes() as _),
147 mito_config.index.staging_ttl,
148 )
149 .await?;
150 let intermediate_manager =
151 IntermediateManager::init_fs(mito_config.index.aux_path.clone()).await?;
152
153 Arc::new(AccessLayer::new(
154 &req.table_dir,
155 req.path_type,
156 object_store.clone(),
157 puffin_manager_factory,
158 intermediate_manager,
159 ))
160 };
161
162 let manifest_manager = {
163 let region_dir = region_dir_from_table_dir(&req.table_dir, req.region_id, req.path_type);
164 let region_manifest_options =
165 RegionManifestOptions::new(mito_config, ®ion_dir, object_store);
166
167 RegionManifestManager::open(region_manifest_options, &Default::default())
168 .await?
169 .with_context(|| EmptyRegionDirSnafu {
170 region_id: req.region_id,
171 region_dir: region_dir_from_table_dir(&req.table_dir, req.region_id, req.path_type),
172 })?
173 };
174
175 let manifest = manifest_manager.manifest();
176 let region_metadata = manifest.metadata.clone();
177 let manifest_ctx = Arc::new(ManifestContext::new(
178 manifest_manager,
179 RegionRoleState::Leader(RegionLeaderState::Writable),
180 ));
181
182 let file_purger = {
183 let purge_scheduler = Arc::new(LocalScheduler::new(mito_config.max_background_purges));
184 Arc::new(LocalFilePurger::new(
185 purge_scheduler.clone(),
186 access_layer.clone(),
187 None,
188 ))
189 };
190
191 let current_version = {
192 let mut ssts = SstVersion::new();
193 ssts.add_files(file_purger.clone(), manifest.files.values().cloned());
194 CompactionVersion {
195 metadata: region_metadata.clone(),
196 options: req.region_options.clone(),
197 ssts: Arc::new(ssts),
198 compaction_time_window: manifest.compaction_time_window,
199 }
200 };
201
202 let ttl = match ttl_provider {
203 Either::Left(ttl) => ttl,
205 Either::Right(schema_metadata_manager) => {
207 let (_, ttl) =
208 find_dynamic_options(req.region_id, &req.region_options, &schema_metadata_manager)
209 .await
210 .unwrap_or_else(|e| {
211 warn!(e; "Failed to get ttl for region: {}", region_metadata.region_id);
212 (
213 crate::region::options::CompactionOptions::default(),
214 TimeToLive::default(),
215 )
216 });
217 ttl
218 }
219 };
220
221 Ok(CompactionRegion {
222 region_id: req.region_id,
223 region_options: req.region_options.clone(),
224 engine_config: Arc::new(mito_config.clone()),
225 region_metadata: region_metadata.clone(),
226 cache_manager: Arc::new(CacheManager::default()),
227 access_layer,
228 manifest_ctx,
229 current_version,
230 file_purger: Some(file_purger),
231 ttl: Some(ttl),
232 max_parallelism: req.max_parallelism,
233 })
234}
235
236impl CompactionRegion {
237 pub fn file_purger(&self) -> Option<Arc<LocalFilePurger>> {
239 self.file_purger.clone()
240 }
241
242 pub async fn stop_purger_scheduler(&self) -> Result<()> {
244 if let Some(file_purger) = &self.file_purger {
245 file_purger.stop_scheduler().await
246 } else {
247 Ok(())
248 }
249 }
250}
251
252#[derive(Default, Clone, Debug, Serialize, Deserialize)]
254pub struct MergeOutput {
255 pub files_to_add: Vec<FileMeta>,
256 pub files_to_remove: Vec<FileMeta>,
257 pub compaction_time_window: Option<i64>,
258}
259
260impl MergeOutput {
261 pub fn is_empty(&self) -> bool {
262 self.files_to_add.is_empty() && self.files_to_remove.is_empty()
263 }
264
265 pub fn input_file_size(&self) -> u64 {
266 self.files_to_remove.iter().map(|f| f.file_size).sum()
267 }
268
269 pub fn output_file_size(&self) -> u64 {
270 self.files_to_add.iter().map(|f| f.file_size).sum()
271 }
272}
273
274#[async_trait::async_trait]
276pub trait Compactor: Send + Sync + 'static {
277 async fn merge_ssts(
279 &self,
280 compaction_region: &CompactionRegion,
281 picker_output: PickerOutput,
282 ) -> Result<MergeOutput>;
283
284 async fn update_manifest(
286 &self,
287 compaction_region: &CompactionRegion,
288 merge_output: MergeOutput,
289 ) -> Result<RegionEdit>;
290}
291
292#[async_trait::async_trait]
297pub trait SstMerger: Send + Sync + 'static {
298 async fn merge_single_output(
299 &self,
300 compaction_region: CompactionRegion,
301 output: CompactionOutput,
302 write_opts: WriteOptions,
303 ) -> Result<Vec<FileMeta>>;
304}
305
306#[derive(Clone)]
308pub struct DefaultSstMerger;
309
310#[async_trait::async_trait]
311impl SstMerger for DefaultSstMerger {
312 async fn merge_single_output(
313 &self,
314 compaction_region: CompactionRegion,
315 output: CompactionOutput,
316 write_opts: WriteOptions,
317 ) -> Result<Vec<FileMeta>> {
318 let region_id = compaction_region.region_id;
319 let storage = compaction_region.region_options.storage.clone();
320 let index_options = compaction_region
321 .current_version
322 .options
323 .index_options
324 .clone();
325 let append_mode = compaction_region.current_version.options.append_mode;
326 let merge_mode = compaction_region.current_version.options.merge_mode();
327 let flat_format = compaction_region
328 .region_options
329 .sst_format
330 .map(|format| format == FormatType::Flat)
331 .unwrap_or(compaction_region.engine_config.default_flat_format);
332
333 let index_config = compaction_region.engine_config.index.clone();
334 let inverted_index_config = compaction_region.engine_config.inverted_index.clone();
335 let fulltext_index_config = compaction_region.engine_config.fulltext_index.clone();
336 let bloom_filter_index_config = compaction_region.engine_config.bloom_filter_index.clone();
337 #[cfg(feature = "vector_index")]
338 let vector_index_config = compaction_region.engine_config.vector_index.clone();
339
340 let input_file_names = output
341 .inputs
342 .iter()
343 .map(|f| f.file_id().to_string())
344 .join(",");
345 let max_sequence = output
346 .inputs
347 .iter()
348 .map(|f| f.meta_ref().sequence)
349 .max()
350 .flatten();
351 let builder = CompactionSstReaderBuilder {
352 metadata: compaction_region.region_metadata.clone(),
353 sst_layer: compaction_region.access_layer.clone(),
354 cache: compaction_region.cache_manager.clone(),
355 inputs: &output.inputs,
356 append_mode,
357 filter_deleted: output.filter_deleted,
358 time_range: output.output_time_range,
359 merge_mode,
360 };
361 let source = builder.build_flat_sst_reader().await?;
362 let mut metrics = Metrics::new(WriteType::Compaction);
363 let region_metadata = compaction_region.region_metadata.clone();
364 let sst_infos = compaction_region
365 .access_layer
366 .write_sst(
367 SstWriteRequest {
368 op_type: OperationType::Compact,
369 metadata: region_metadata.clone(),
370 source,
371 cache_manager: compaction_region.cache_manager.clone(),
372 storage,
373 max_sequence: max_sequence.map(NonZero::get),
374 sst_write_format: if flat_format {
375 FormatType::Flat
376 } else {
377 FormatType::PrimaryKey
378 },
379 index_options,
380 index_config,
381 inverted_index_config,
382 fulltext_index_config,
383 bloom_filter_index_config,
384 #[cfg(feature = "vector_index")]
385 vector_index_config,
386 },
387 &write_opts,
388 &mut metrics,
389 )
390 .await?;
391 let partition_expr = match ®ion_metadata.partition_expr {
393 None => None,
394 Some(json_str) if json_str.is_empty() => None,
395 Some(json_str) => PartitionExpr::from_json_str(json_str).with_context(|_| {
396 InvalidPartitionExprSnafu {
397 expr: json_str.clone(),
398 }
399 })?,
400 };
401
402 let output_files = sst_infos
403 .into_iter()
404 .map(|sst_info| {
405 let pk_range = sst_info
406 .file_metadata
407 .as_ref()
408 .and_then(|meta| extract_primary_key_range(meta, ®ion_metadata));
409 let (primary_key_min, primary_key_max) = match pk_range {
410 Some((min, max)) => (Some(min), Some(max)),
411 None => (None, None),
412 };
413
414 FileMeta {
415 region_id,
416 file_id: sst_info.file_id,
417 time_range: sst_info.time_range,
418 level: output.output_level,
419 file_size: sst_info.file_size,
420 max_row_group_uncompressed_size: sst_info.max_row_group_uncompressed_size,
421 available_indexes: sst_info.index_metadata.build_available_indexes(),
422 indexes: sst_info.index_metadata.build_indexes(),
423 index_file_size: sst_info.index_metadata.file_size,
424 index_version: 0,
425 num_rows: sst_info.num_rows as u64,
426 num_row_groups: sst_info.num_row_groups,
427 sequence: max_sequence,
428 partition_expr: partition_expr.clone(),
429 num_series: sst_info.num_series,
430 primary_key_min,
431 primary_key_max,
432 }
433 })
434 .collect::<Vec<_>>();
435 let output_file_names = output_files.iter().map(|f| f.file_id.to_string()).join(",");
436 info!(
437 "Region {} compaction inputs: [{}], outputs: [{}], flat_format: {}, metrics: {:?}",
438 region_id, input_file_names, output_file_names, flat_format, metrics
439 );
440 metrics.observe();
441 Ok(output_files)
442 }
443}
444
445pub struct DefaultCompactor<M = DefaultSstMerger> {
450 merger: M,
451 cancel_handle: Arc<CancellationHandle>,
452}
453
454#[cfg(test)]
455impl<M: SstMerger> DefaultCompactor<M> {
456 pub fn with_merger(merger: M) -> Self {
457 Self {
458 merger,
459 cancel_handle: Arc::new(CancellationHandle::default()),
460 }
461 }
462}
463
464impl DefaultCompactor {
465 pub fn with_cancel_handle(cancel_handle: Arc<CancellationHandle>) -> Self {
466 Self {
467 merger: DefaultSstMerger,
468 cancel_handle,
469 }
470 }
471}
472
473#[async_trait::async_trait]
474impl<M: SstMerger> Compactor for DefaultCompactor<M>
475where
476 M: Clone,
477{
478 async fn merge_ssts(
479 &self,
480 compaction_region: &CompactionRegion,
481 mut picker_output: PickerOutput,
482 ) -> Result<MergeOutput> {
483 let internal_parallelism = compaction_region.max_parallelism.max(1);
484 let compaction_time_window = picker_output.time_window_size;
485 let region_id = compaction_region.region_id;
486
487 let mut tasks: Vec<(Vec<FileMeta>, _)> = Vec::with_capacity(picker_output.outputs.len());
490
491 for output in picker_output.outputs.drain(..) {
492 let inputs_to_remove: Vec<_> =
493 output.inputs.iter().map(|f| f.meta_ref().clone()).collect();
494 let write_opts = WriteOptions {
495 write_buffer_size: compaction_region.engine_config.sst_write_buffer_size,
496 max_file_size: picker_output.max_file_size,
497 ..Default::default()
498 };
499 let merger = self.merger.clone();
500 let compaction_region = compaction_region.clone();
501 let fut = async move {
502 merger
503 .merge_single_output(compaction_region, output, write_opts)
504 .await
505 };
506 tasks.push((inputs_to_remove, fut));
507 }
508
509 let mut output_files = Vec::with_capacity(tasks.len());
510 let mut compacted_inputs = Vec::with_capacity(
511 tasks.iter().map(|(inputs, _)| inputs.len()).sum::<usize>()
512 + picker_output.expired_ssts.len(),
513 );
514
515 while !tasks.is_empty() {
516 let mut chunk: Vec<(Vec<FileMeta>, _)> = Vec::with_capacity(internal_parallelism);
517 for _ in 0..internal_parallelism {
518 if let Some(task) = tasks.pop() {
519 chunk.push(task);
520 }
521 }
522 let mut spawned: Vec<_> = chunk
523 .into_iter()
524 .map(|(inputs, fut)| {
525 let handle = common_runtime::spawn_compact(fut);
526 (inputs, handle)
527 })
528 .collect();
529
530 while let Some((inputs, handle)) = spawned.pop() {
531 let abort_handle = handle.abort_handle();
532 match CancellableFuture::new(handle, self.cancel_handle.clone()).await {
533 Ok(Ok(Ok(files))) => {
534 output_files.extend(files);
535 compacted_inputs.extend(inputs);
536 }
537 Ok(Ok(Err(e))) => {
538 warn!(
539 e; "Failed to merge compaction output for region: {}, inputs: [{}]",
540 region_id,
541 inputs.iter().map(|f| f.file_id.to_string()).join(",")
542 );
543 }
544 Ok(Err(e)) => {
545 warn!(
546 "Region {} compaction task join error for inputs: [{}], skipping: {}",
547 region_id,
548 inputs.iter().map(|f| f.file_id.to_string()).join(","),
549 e
550 );
551 if self.cancel_handle.is_cancelled() {
554 abort_handle.abort();
555 for (_, handle) in spawned {
556 handle.abort();
557 }
558 }
559 return Err(e).context(error::JoinSnafu);
560 }
561 Err(_) => {
562 debug!(
563 "Compaction merge cancelled for region: {}, aborting remaining {} spawned tasks",
564 region_id,
565 spawned.len(),
566 );
567 abort_handle.abort();
568 for (_, handle) in spawned {
569 handle.abort();
570 }
571 break;
572 }
573 }
574 }
575
576 if self.cancel_handle.is_cancelled() {
577 info!("Compaction merge cancelled for region: {}", region_id);
578 break;
579 }
580 }
581
582 compacted_inputs.extend(
584 picker_output
585 .expired_ssts
586 .iter()
587 .map(|f| f.meta_ref().clone()),
588 );
589
590 Ok(MergeOutput {
591 files_to_add: output_files,
592 files_to_remove: compacted_inputs,
593 compaction_time_window: Some(compaction_time_window),
594 })
595 }
596
597 async fn update_manifest(
598 &self,
599 compaction_region: &CompactionRegion,
600 merge_output: MergeOutput,
601 ) -> Result<RegionEdit> {
602 let edit = RegionEdit {
604 files_to_add: merge_output.files_to_add,
605 files_to_remove: merge_output.files_to_remove,
606 timestamp_ms: Some(chrono::Utc::now().timestamp_millis()),
608 compaction_time_window: merge_output
609 .compaction_time_window
610 .map(|seconds| Duration::from_secs(seconds as u64)),
611 flushed_entry_id: None,
612 flushed_sequence: None,
613 committed_sequence: None,
614 };
615
616 let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
617 compaction_region
619 .manifest_ctx
620 .update_manifest_for_compaction(action_list)
621 .await?;
622
623 Ok(edit)
624 }
625}
626
627#[cfg(test)]
628mod tests {
629 use std::sync::atomic::{AtomicUsize, Ordering};
630 use std::sync::{Arc, Mutex};
631 use std::time::Duration;
632
633 use store_api::storage::{FileId, RegionId};
634 use tokio::time::sleep;
635
636 use super::{DefaultCompactor, *};
637 use crate::cache::CacheManager;
638 use crate::compaction::picker::PickerOutput;
639 use crate::error::Result;
640 use crate::sst::file::FileHandle;
641 use crate::sst::file_purger::NoopFilePurger;
642 use crate::sst::version::SstVersion;
643 use crate::test_util::memtable_util::metadata_for_test;
644 use crate::test_util::scheduler_util::SchedulerEnv;
645
646 fn dummy_file_meta() -> FileMeta {
647 FileMeta {
648 region_id: RegionId::new(1, 1),
649 file_id: FileId::random(),
650 file_size: 100,
651 ..Default::default()
652 }
653 }
654
655 fn new_file_handle(meta: FileMeta) -> FileHandle {
656 FileHandle::new(meta, Arc::new(NoopFilePurger))
657 }
658
659 async fn new_test_compaction_region() -> CompactionRegion {
662 let env = SchedulerEnv::new().await;
663 let metadata = metadata_for_test();
664 let manifest_ctx = env.mock_manifest_context(metadata.clone()).await;
665 CompactionRegion {
666 region_id: RegionId::new(1, 1),
667 region_options: RegionOptions::default(),
668 engine_config: Arc::new(MitoConfig::default()),
669 region_metadata: metadata.clone(),
670 cache_manager: Arc::new(CacheManager::default()),
671 access_layer: env.access_layer.clone(),
672 manifest_ctx,
673 current_version: CompactionVersion {
674 metadata,
675 options: RegionOptions::default(),
676 ssts: Arc::new(SstVersion::new()),
677 compaction_time_window: None,
678 },
679 file_purger: None,
680 ttl: None,
681 max_parallelism: 1,
682 }
683 }
684
685 #[derive(Clone)]
689 struct MockMerger {
690 results: Arc<Mutex<Vec<Result<Vec<FileMeta>>>>>,
691 call_idx: Arc<AtomicUsize>,
692 }
693
694 impl MockMerger {
695 fn new(results: Vec<Result<Vec<FileMeta>>>) -> Self {
696 Self {
697 results: Arc::new(Mutex::new(results)),
698 call_idx: Arc::new(AtomicUsize::new(0)),
699 }
700 }
701 }
702
703 #[async_trait::async_trait]
704 impl SstMerger for MockMerger {
705 async fn merge_single_output(
706 &self,
707 _compaction_region: CompactionRegion,
708 _output: CompactionOutput,
709 _write_opts: WriteOptions,
710 ) -> Result<Vec<FileMeta>> {
711 let idx = self.call_idx.fetch_add(1, Ordering::SeqCst);
712 match self.results.lock().unwrap().get(idx) {
713 Some(Ok(files)) => Ok(files.clone()),
714 Some(Err(_)) => error::InvalidMetaSnafu {
715 reason: format!("simulated failure at index {idx}"),
716 }
717 .fail(),
718 None => panic!("MockMerger: no result configured for call index {idx}"),
719 }
720 }
721 }
722
723 #[tokio::test]
724 async fn test_partial_merge_failure_collects_only_successful_outputs() {
725 common_telemetry::init_default_ut_logging();
726
727 let compaction_region = new_test_compaction_region().await;
728
729 let input_meta_0 = dummy_file_meta();
731 let input_meta_1 = dummy_file_meta();
732 let input_meta_2 = dummy_file_meta();
733
734 let output_meta_0 = vec![dummy_file_meta()];
735 let output_meta_2 = vec![dummy_file_meta(), dummy_file_meta()];
736
737 let merger = MockMerger::new(vec![
738 Ok(output_meta_0.clone()),
739 Err(error::InvalidMetaSnafu {
740 reason: "boom".to_string(),
741 }
742 .build()),
743 Ok(output_meta_2.clone()),
744 ]);
745 let compactor = DefaultCompactor::with_merger(merger);
746
747 let picker_output = PickerOutput {
748 outputs: vec![
749 CompactionOutput {
750 output_level: 1,
751 inputs: vec![new_file_handle(input_meta_0.clone())],
752 filter_deleted: false,
753 output_time_range: None,
754 },
755 CompactionOutput {
756 output_level: 1,
757 inputs: vec![new_file_handle(input_meta_1.clone())],
758 filter_deleted: false,
759 output_time_range: None,
760 },
761 CompactionOutput {
762 output_level: 1,
763 inputs: vec![new_file_handle(input_meta_2.clone())],
764 filter_deleted: false,
765 output_time_range: None,
766 },
767 ],
768 expired_ssts: vec![],
769 time_window_size: 3600,
770 max_file_size: None,
771 };
772
773 let merge_output = compactor
774 .merge_ssts(&compaction_region, picker_output)
775 .await
776 .unwrap();
777
778 assert_eq!(merge_output.files_to_add.len(), 3);
780 assert_eq!(merge_output.files_to_remove.len(), 2);
782
783 let removed_ids: Vec<_> = merge_output
784 .files_to_remove
785 .iter()
786 .map(|f| f.file_id)
787 .collect();
788 assert!(removed_ids.contains(&input_meta_0.file_id));
789 assert!(removed_ids.contains(&input_meta_2.file_id));
790 assert!(!removed_ids.contains(&input_meta_1.file_id));
792 }
793
794 #[tokio::test]
795 async fn test_all_outputs_succeed() {
796 common_telemetry::init_default_ut_logging();
797
798 let compaction_region = new_test_compaction_region().await;
799 let input_meta = dummy_file_meta();
800 let output_meta = vec![dummy_file_meta()];
801
802 let merger = MockMerger::new(vec![Ok(output_meta.clone())]);
803 let compactor = DefaultCompactor::with_merger(merger);
804
805 let picker_output = PickerOutput {
806 outputs: vec![CompactionOutput {
807 output_level: 1,
808 inputs: vec![new_file_handle(input_meta.clone())],
809 filter_deleted: false,
810 output_time_range: None,
811 }],
812 expired_ssts: vec![],
813 time_window_size: 3600,
814 max_file_size: None,
815 };
816
817 let merge_output = compactor
818 .merge_ssts(&compaction_region, picker_output)
819 .await
820 .unwrap();
821
822 assert_eq!(merge_output.files_to_add.len(), 1);
823 assert_eq!(merge_output.files_to_add[0].file_id, output_meta[0].file_id);
824 assert_eq!(merge_output.files_to_remove.len(), 1);
825 assert_eq!(merge_output.files_to_remove[0].file_id, input_meta.file_id);
826 }
827
828 #[tokio::test]
829 async fn test_expired_ssts_always_removed() {
830 common_telemetry::init_default_ut_logging();
831
832 let compaction_region = new_test_compaction_region().await;
833 let input_meta = dummy_file_meta();
834 let expired_meta = dummy_file_meta();
835
836 let merger = MockMerger::new(vec![Err(error::InvalidMetaSnafu {
838 reason: "fail".to_string(),
839 }
840 .build())]);
841 let compactor = DefaultCompactor::with_merger(merger);
842
843 let picker_output = PickerOutput {
844 outputs: vec![CompactionOutput {
845 output_level: 1,
846 inputs: vec![new_file_handle(input_meta.clone())],
847 filter_deleted: false,
848 output_time_range: None,
849 }],
850 expired_ssts: vec![new_file_handle(expired_meta.clone())],
851 time_window_size: 3600,
852 max_file_size: None,
853 };
854
855 let merge_output = compactor
856 .merge_ssts(&compaction_region, picker_output)
857 .await
858 .unwrap();
859
860 assert!(merge_output.files_to_add.is_empty());
862 assert_eq!(merge_output.files_to_remove.len(), 1);
864 assert_eq!(
865 merge_output.files_to_remove[0].file_id,
866 expired_meta.file_id
867 );
868 }
869
870 #[derive(Clone)]
871 struct BlockingMerger {
872 call_idx: Arc<AtomicUsize>,
873 }
874
875 #[async_trait::async_trait]
876 impl SstMerger for BlockingMerger {
877 async fn merge_single_output(
878 &self,
879 _compaction_region: CompactionRegion,
880 _output: CompactionOutput,
881 _write_opts: WriteOptions,
882 ) -> Result<Vec<FileMeta>> {
883 self.call_idx.fetch_add(1, Ordering::SeqCst);
884 std::future::pending().await
885 }
886 }
887
888 #[tokio::test(flavor = "multi_thread")]
889 async fn test_merge_ssts_cancels_spawned_tasks() {
890 common_telemetry::init_default_ut_logging();
891
892 let mut compaction_region = new_test_compaction_region().await;
893 compaction_region.max_parallelism = 2;
894
895 let cancel_handle = Arc::new(CancellationHandle::default());
896 let call_idx = Arc::new(AtomicUsize::new(0));
897 let compactor = DefaultCompactor {
898 merger: BlockingMerger {
899 call_idx: call_idx.clone(),
900 },
901 cancel_handle: cancel_handle.clone(),
902 };
903
904 let picker_output = PickerOutput {
905 outputs: vec![
906 CompactionOutput {
907 output_level: 1,
908 inputs: vec![new_file_handle(dummy_file_meta())],
909 filter_deleted: false,
910 output_time_range: None,
911 },
912 CompactionOutput {
913 output_level: 1,
914 inputs: vec![new_file_handle(dummy_file_meta())],
915 filter_deleted: false,
916 output_time_range: None,
917 },
918 CompactionOutput {
919 output_level: 1,
920 inputs: vec![new_file_handle(dummy_file_meta())],
921 filter_deleted: false,
922 output_time_range: None,
923 },
924 ],
925 expired_ssts: vec![],
926 time_window_size: 3600,
927 max_file_size: None,
928 };
929
930 let task = tokio::spawn(async move {
931 compactor
932 .merge_ssts(&compaction_region, picker_output)
933 .await
934 });
935
936 sleep(Duration::from_millis(100)).await;
937 cancel_handle.cancel();
938
939 let merge_output = task
940 .await
941 .expect("merge_ssts should stop after cancellation")
942 .unwrap();
943
944 let started = call_idx.load(Ordering::SeqCst);
945
946 assert!(merge_output.files_to_add.is_empty());
947 assert!(merge_output.files_to_remove.is_empty());
948 assert_eq!(started, 2);
949 }
950}