1use std::num::NonZero;
16use std::sync::Arc;
17use std::time::Duration;
18
19use common_base::cancellation::{CancellableFuture, CancellationHandle};
20use common_meta::key::SchemaMetadataManagerRef;
21use common_telemetry::{debug, info, warn};
22use common_time::TimeToLive;
23use either::Either;
24use itertools::Itertools;
25use object_store::manager::ObjectStoreManagerRef;
26use partition::expr::PartitionExpr;
27use serde::{Deserialize, Serialize};
28use snafu::{OptionExt, ResultExt};
29use store_api::metadata::RegionMetadataRef;
30use store_api::region_request::PathType;
31use store_api::storage::RegionId;
32
33use crate::access_layer::{
34 AccessLayer, AccessLayerRef, Metrics, OperationType, SstWriteRequest, WriteType,
35};
36use crate::cache::{CacheManager, CacheManagerRef};
37use crate::compaction::picker::PickerOutput;
38use crate::compaction::{CompactionOutput, CompactionSstReaderBuilder, find_dynamic_options};
39use crate::config::MitoConfig;
40use crate::error;
41use crate::error::{
42 EmptyRegionDirSnafu, InvalidPartitionExprSnafu, ObjectStoreNotFoundSnafu, Result,
43};
44use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
45use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
46use crate::read::FlatSource;
47use crate::region::options::RegionOptions;
48use crate::region::version::VersionRef;
49use crate::region::{ManifestContext, RegionLeaderState, RegionRoleState};
50use crate::schedule::scheduler::LocalScheduler;
51use crate::sst::FormatType;
52use crate::sst::file::FileMeta;
53use crate::sst::file_purger::LocalFilePurger;
54use crate::sst::index::intermediate::IntermediateManager;
55use crate::sst::index::puffin_manager::PuffinManagerFactory;
56use crate::sst::location::region_dir_from_table_dir;
57use crate::sst::parquet::WriteOptions;
58use crate::sst::parquet::metadata::extract_primary_key_range;
59use crate::sst::version::{SstVersion, SstVersionRef};
60
61#[derive(Clone)]
63pub struct CompactionVersion {
64 pub(crate) metadata: RegionMetadataRef,
69 pub(crate) options: RegionOptions,
71 pub(crate) ssts: SstVersionRef,
73 pub(crate) compaction_time_window: Option<Duration>,
75}
76
77impl From<VersionRef> for CompactionVersion {
78 fn from(value: VersionRef) -> Self {
79 Self {
80 metadata: value.metadata.clone(),
81 options: value.options.clone(),
82 ssts: value.ssts.clone(),
83 compaction_time_window: value.compaction_time_window,
84 }
85 }
86}
87
88#[derive(Clone)]
91pub struct CompactionRegion {
92 pub region_id: RegionId,
93 pub region_options: RegionOptions,
94
95 pub(crate) engine_config: Arc<MitoConfig>,
96 pub(crate) region_metadata: RegionMetadataRef,
97 pub(crate) cache_manager: CacheManagerRef,
98 pub access_layer: AccessLayerRef,
100 pub(crate) manifest_ctx: Arc<ManifestContext>,
101 pub(crate) current_version: CompactionVersion,
102 pub(crate) file_purger: Option<Arc<LocalFilePurger>>,
103 pub(crate) ttl: Option<TimeToLive>,
104
105 pub max_parallelism: usize,
110}
111
112#[derive(Debug, Clone)]
114pub struct OpenCompactionRegionRequest {
115 pub region_id: RegionId,
116 pub table_dir: String,
117 pub path_type: PathType,
118 pub region_options: RegionOptions,
119 pub max_parallelism: usize,
120}
121
122pub async fn open_compaction_region(
125 req: &OpenCompactionRegionRequest,
126 mito_config: &MitoConfig,
127 object_store_manager: ObjectStoreManagerRef,
128 ttl_provider: Either<TimeToLive, SchemaMetadataManagerRef>,
129) -> Result<CompactionRegion> {
130 let object_store = {
131 let name = &req.region_options.storage;
132 if let Some(name) = name {
133 object_store_manager
134 .find(name)
135 .with_context(|| ObjectStoreNotFoundSnafu {
136 object_store: name.clone(),
137 })?
138 } else {
139 object_store_manager.default_object_store()
140 }
141 };
142
143 let access_layer = {
144 let puffin_manager_factory = PuffinManagerFactory::new(
145 &mito_config.index.aux_path,
146 mito_config.index.staging_size.as_bytes(),
147 Some(mito_config.index.write_buffer_size.as_bytes() as _),
148 mito_config.index.staging_ttl,
149 )
150 .await?;
151 let intermediate_manager =
152 IntermediateManager::init_fs(mito_config.index.aux_path.clone()).await?;
153
154 Arc::new(AccessLayer::new(
155 &req.table_dir,
156 req.path_type,
157 object_store.clone(),
158 puffin_manager_factory,
159 intermediate_manager,
160 ))
161 };
162
163 let manifest_manager = {
164 let region_dir = region_dir_from_table_dir(&req.table_dir, req.region_id, req.path_type);
165 let region_manifest_options =
166 RegionManifestOptions::new(mito_config, ®ion_dir, object_store);
167
168 RegionManifestManager::open(region_manifest_options, &Default::default())
169 .await?
170 .with_context(|| EmptyRegionDirSnafu {
171 region_id: req.region_id,
172 region_dir: region_dir_from_table_dir(&req.table_dir, req.region_id, req.path_type),
173 })?
174 };
175
176 let manifest = manifest_manager.manifest();
177 let region_metadata = manifest.metadata.clone();
178 let manifest_ctx = Arc::new(ManifestContext::new(
179 manifest_manager,
180 RegionRoleState::Leader(RegionLeaderState::Writable),
181 ));
182
183 let file_purger = {
184 let purge_scheduler = Arc::new(LocalScheduler::new(mito_config.max_background_purges));
185 Arc::new(LocalFilePurger::new(
186 purge_scheduler.clone(),
187 access_layer.clone(),
188 None,
189 ))
190 };
191
192 let current_version = {
193 let mut ssts = SstVersion::new();
194 ssts.add_files(file_purger.clone(), manifest.files.values().cloned());
195 CompactionVersion {
196 metadata: region_metadata.clone(),
197 options: req.region_options.clone(),
198 ssts: Arc::new(ssts),
199 compaction_time_window: manifest.compaction_time_window,
200 }
201 };
202
203 let ttl = match ttl_provider {
204 Either::Left(ttl) => ttl,
206 Either::Right(schema_metadata_manager) => {
208 let (_, ttl) = find_dynamic_options(
209 req.region_id.table_id(),
210 &req.region_options,
211 &schema_metadata_manager,
212 )
213 .await
214 .unwrap_or_else(|e| {
215 warn!(e; "Failed to get ttl for region: {}", region_metadata.region_id);
216 (
217 crate::region::options::CompactionOptions::default(),
218 TimeToLive::default(),
219 )
220 });
221 ttl
222 }
223 };
224
225 Ok(CompactionRegion {
226 region_id: req.region_id,
227 region_options: req.region_options.clone(),
228 engine_config: Arc::new(mito_config.clone()),
229 region_metadata: region_metadata.clone(),
230 cache_manager: Arc::new(CacheManager::default()),
231 access_layer,
232 manifest_ctx,
233 current_version,
234 file_purger: Some(file_purger),
235 ttl: Some(ttl),
236 max_parallelism: req.max_parallelism,
237 })
238}
239
240impl CompactionRegion {
241 pub fn file_purger(&self) -> Option<Arc<LocalFilePurger>> {
243 self.file_purger.clone()
244 }
245
246 pub async fn stop_purger_scheduler(&self) -> Result<()> {
248 if let Some(file_purger) = &self.file_purger {
249 file_purger.stop_scheduler().await
250 } else {
251 Ok(())
252 }
253 }
254}
255
256#[derive(Default, Clone, Debug, Serialize, Deserialize)]
258pub struct MergeOutput {
259 pub files_to_add: Vec<FileMeta>,
260 pub files_to_remove: Vec<FileMeta>,
261 pub compaction_time_window: Option<i64>,
262}
263
264impl MergeOutput {
265 pub fn is_empty(&self) -> bool {
266 self.files_to_add.is_empty() && self.files_to_remove.is_empty()
267 }
268
269 pub fn input_file_size(&self) -> u64 {
270 self.files_to_remove.iter().map(|f| f.file_size).sum()
271 }
272
273 pub fn output_file_size(&self) -> u64 {
274 self.files_to_add.iter().map(|f| f.file_size).sum()
275 }
276}
277
278#[async_trait::async_trait]
280pub trait Compactor: Send + Sync + 'static {
281 async fn merge_ssts(
283 &self,
284 compaction_region: &CompactionRegion,
285 picker_output: PickerOutput,
286 ) -> Result<MergeOutput>;
287
288 async fn update_manifest(
290 &self,
291 compaction_region: &CompactionRegion,
292 merge_output: MergeOutput,
293 ) -> Result<RegionEdit>;
294}
295
296#[async_trait::async_trait]
301pub trait SstMerger: Send + Sync + 'static {
302 async fn merge_single_output(
303 &self,
304 compaction_region: CompactionRegion,
305 output: CompactionOutput,
306 write_opts: WriteOptions,
307 ) -> Result<Vec<FileMeta>>;
308}
309
310#[derive(Clone)]
312pub struct DefaultSstMerger;
313
314#[async_trait::async_trait]
315impl SstMerger for DefaultSstMerger {
316 async fn merge_single_output(
317 &self,
318 compaction_region: CompactionRegion,
319 output: CompactionOutput,
320 write_opts: WriteOptions,
321 ) -> Result<Vec<FileMeta>> {
322 let region_id = compaction_region.region_id;
323 let storage = compaction_region.region_options.storage.clone();
324 let index_options = compaction_region
325 .current_version
326 .options
327 .index_options
328 .clone();
329 let append_mode = compaction_region.current_version.options.append_mode;
330 let merge_mode = compaction_region.current_version.options.merge_mode();
331 let flat_format = compaction_region
332 .region_options
333 .sst_format
334 .map(|format| format == FormatType::Flat)
335 .unwrap_or(compaction_region.engine_config.default_flat_format);
336
337 let index_config = compaction_region.engine_config.index.clone();
338 let inverted_index_config = compaction_region.engine_config.inverted_index.clone();
339 let fulltext_index_config = compaction_region.engine_config.fulltext_index.clone();
340 let bloom_filter_index_config = compaction_region.engine_config.bloom_filter_index.clone();
341 #[cfg(feature = "vector_index")]
342 let vector_index_config = compaction_region.engine_config.vector_index.clone();
343
344 let input_file_names = output
345 .inputs
346 .iter()
347 .map(|f| f.file_id().to_string())
348 .join(",");
349 let max_sequence = output
350 .inputs
351 .iter()
352 .map(|f| f.meta_ref().sequence)
353 .max()
354 .flatten();
355 let builder = CompactionSstReaderBuilder {
356 metadata: compaction_region.region_metadata.clone(),
357 sst_layer: compaction_region.access_layer.clone(),
358 cache: compaction_region.cache_manager.clone(),
359 inputs: &output.inputs,
360 append_mode,
361 filter_deleted: output.filter_deleted,
362 time_range: output.output_time_range,
363 merge_mode,
364 };
365 let reader = builder.build_flat_sst_reader().await?;
366 let source = FlatSource::Stream(reader);
367 let mut metrics = Metrics::new(WriteType::Compaction);
368 let region_metadata = compaction_region.region_metadata.clone();
369 let sst_infos = compaction_region
370 .access_layer
371 .write_sst(
372 SstWriteRequest {
373 op_type: OperationType::Compact,
374 metadata: region_metadata.clone(),
375 source,
376 cache_manager: compaction_region.cache_manager.clone(),
377 storage,
378 max_sequence: max_sequence.map(NonZero::get),
379 sst_write_format: if flat_format {
380 FormatType::Flat
381 } else {
382 FormatType::PrimaryKey
383 },
384 index_options,
385 index_config,
386 inverted_index_config,
387 fulltext_index_config,
388 bloom_filter_index_config,
389 #[cfg(feature = "vector_index")]
390 vector_index_config,
391 },
392 &write_opts,
393 &mut metrics,
394 )
395 .await?;
396 let partition_expr = match ®ion_metadata.partition_expr {
398 None => None,
399 Some(json_str) if json_str.is_empty() => None,
400 Some(json_str) => PartitionExpr::from_json_str(json_str).with_context(|_| {
401 InvalidPartitionExprSnafu {
402 expr: json_str.clone(),
403 }
404 })?,
405 };
406
407 let output_files = sst_infos
408 .into_iter()
409 .map(|sst_info| {
410 let pk_range = sst_info
411 .file_metadata
412 .as_ref()
413 .and_then(|meta| extract_primary_key_range(meta, ®ion_metadata));
414 let (primary_key_min, primary_key_max) = match pk_range {
415 Some((min, max)) => (Some(min), Some(max)),
416 None => (None, None),
417 };
418
419 FileMeta {
420 region_id,
421 file_id: sst_info.file_id,
422 time_range: sst_info.time_range,
423 level: output.output_level,
424 file_size: sst_info.file_size,
425 max_row_group_uncompressed_size: sst_info.max_row_group_uncompressed_size,
426 available_indexes: sst_info.index_metadata.build_available_indexes(),
427 indexes: sst_info.index_metadata.build_indexes(),
428 index_file_size: sst_info.index_metadata.file_size,
429 index_version: 0,
430 num_rows: sst_info.num_rows as u64,
431 num_row_groups: sst_info.num_row_groups,
432 sequence: max_sequence,
433 partition_expr: partition_expr.clone(),
434 num_series: sst_info.num_series,
435 primary_key_min,
436 primary_key_max,
437 }
438 })
439 .collect::<Vec<_>>();
440 let output_file_names = output_files.iter().map(|f| f.file_id.to_string()).join(",");
441 info!(
442 "Region {} compaction inputs: [{}], outputs: [{}], flat_format: {}, metrics: {:?}",
443 region_id, input_file_names, output_file_names, flat_format, metrics
444 );
445 metrics.observe();
446 Ok(output_files)
447 }
448}
449
450pub struct DefaultCompactor<M = DefaultSstMerger> {
455 merger: M,
456 cancel_handle: Arc<CancellationHandle>,
457}
458
459#[cfg(test)]
460impl<M: SstMerger> DefaultCompactor<M> {
461 pub fn with_merger(merger: M) -> Self {
462 Self {
463 merger,
464 cancel_handle: Arc::new(CancellationHandle::default()),
465 }
466 }
467}
468
469impl DefaultCompactor {
470 pub fn with_cancel_handle(cancel_handle: Arc<CancellationHandle>) -> Self {
471 Self {
472 merger: DefaultSstMerger,
473 cancel_handle,
474 }
475 }
476}
477
478#[async_trait::async_trait]
479impl<M: SstMerger> Compactor for DefaultCompactor<M>
480where
481 M: Clone,
482{
483 async fn merge_ssts(
484 &self,
485 compaction_region: &CompactionRegion,
486 mut picker_output: PickerOutput,
487 ) -> Result<MergeOutput> {
488 let internal_parallelism = compaction_region.max_parallelism.max(1);
489 let compaction_time_window = picker_output.time_window_size;
490 let region_id = compaction_region.region_id;
491
492 let mut tasks: Vec<(Vec<FileMeta>, _)> = Vec::with_capacity(picker_output.outputs.len());
495
496 for output in picker_output.outputs.drain(..) {
497 let inputs_to_remove: Vec<_> =
498 output.inputs.iter().map(|f| f.meta_ref().clone()).collect();
499 let write_opts = WriteOptions {
500 write_buffer_size: compaction_region.engine_config.sst_write_buffer_size,
501 max_file_size: picker_output.max_file_size,
502 ..Default::default()
503 };
504 let merger = self.merger.clone();
505 let compaction_region = compaction_region.clone();
506 let fut = async move {
507 merger
508 .merge_single_output(compaction_region, output, write_opts)
509 .await
510 };
511 tasks.push((inputs_to_remove, fut));
512 }
513
514 let mut output_files = Vec::with_capacity(tasks.len());
515 let mut compacted_inputs = Vec::with_capacity(
516 tasks.iter().map(|(inputs, _)| inputs.len()).sum::<usize>()
517 + picker_output.expired_ssts.len(),
518 );
519
520 while !tasks.is_empty() {
521 let mut chunk: Vec<(Vec<FileMeta>, _)> = Vec::with_capacity(internal_parallelism);
522 for _ in 0..internal_parallelism {
523 if let Some(task) = tasks.pop() {
524 chunk.push(task);
525 }
526 }
527 let mut spawned: Vec<_> = chunk
528 .into_iter()
529 .map(|(inputs, fut)| {
530 let handle = common_runtime::spawn_compact(fut);
531 (inputs, handle)
532 })
533 .collect();
534
535 while let Some((inputs, handle)) = spawned.pop() {
536 let abort_handle = handle.abort_handle();
537 match CancellableFuture::new(handle, self.cancel_handle.clone()).await {
538 Ok(Ok(Ok(files))) => {
539 output_files.extend(files);
540 compacted_inputs.extend(inputs);
541 }
542 Ok(Ok(Err(e))) => {
543 warn!(
544 e; "Failed to merge compaction output for region: {}, inputs: [{}]",
545 region_id,
546 inputs.iter().map(|f| f.file_id.to_string()).join(",")
547 );
548 }
549 Ok(Err(e)) => {
550 warn!(
551 "Region {} compaction task join error for inputs: [{}], skipping: {}",
552 region_id,
553 inputs.iter().map(|f| f.file_id.to_string()).join(","),
554 e
555 );
556 if self.cancel_handle.is_cancelled() {
559 abort_handle.abort();
560 for (_, handle) in spawned {
561 handle.abort();
562 }
563 }
564 return Err(e).context(error::JoinSnafu);
565 }
566 Err(_) => {
567 debug!(
568 "Compaction merge cancelled for region: {}, aborting remaining {} spawned tasks",
569 region_id,
570 spawned.len(),
571 );
572 abort_handle.abort();
573 for (_, handle) in spawned {
574 handle.abort();
575 }
576 break;
577 }
578 }
579 }
580
581 if self.cancel_handle.is_cancelled() {
582 info!("Compaction merge cancelled for region: {}", region_id);
583 break;
584 }
585 }
586
587 compacted_inputs.extend(
589 picker_output
590 .expired_ssts
591 .iter()
592 .map(|f| f.meta_ref().clone()),
593 );
594
595 Ok(MergeOutput {
596 files_to_add: output_files,
597 files_to_remove: compacted_inputs,
598 compaction_time_window: Some(compaction_time_window),
599 })
600 }
601
602 async fn update_manifest(
603 &self,
604 compaction_region: &CompactionRegion,
605 merge_output: MergeOutput,
606 ) -> Result<RegionEdit> {
607 let edit = RegionEdit {
609 files_to_add: merge_output.files_to_add,
610 files_to_remove: merge_output.files_to_remove,
611 timestamp_ms: Some(chrono::Utc::now().timestamp_millis()),
613 compaction_time_window: merge_output
614 .compaction_time_window
615 .map(|seconds| Duration::from_secs(seconds as u64)),
616 flushed_entry_id: None,
617 flushed_sequence: None,
618 committed_sequence: None,
619 };
620
621 let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
622 compaction_region
624 .manifest_ctx
625 .update_manifest(RegionLeaderState::Writable, action_list, false)
626 .await?;
627
628 Ok(edit)
629 }
630}
631
632#[cfg(test)]
633mod tests {
634 use std::sync::atomic::{AtomicUsize, Ordering};
635 use std::sync::{Arc, Mutex};
636 use std::time::Duration;
637
638 use store_api::storage::{FileId, RegionId};
639 use tokio::time::sleep;
640
641 use super::{DefaultCompactor, *};
642 use crate::cache::CacheManager;
643 use crate::compaction::picker::PickerOutput;
644 use crate::error::Result;
645 use crate::sst::file::FileHandle;
646 use crate::sst::file_purger::NoopFilePurger;
647 use crate::sst::version::SstVersion;
648 use crate::test_util::memtable_util::metadata_for_test;
649 use crate::test_util::scheduler_util::SchedulerEnv;
650
651 fn dummy_file_meta() -> FileMeta {
652 FileMeta {
653 region_id: RegionId::new(1, 1),
654 file_id: FileId::random(),
655 file_size: 100,
656 ..Default::default()
657 }
658 }
659
660 fn new_file_handle(meta: FileMeta) -> FileHandle {
661 FileHandle::new(meta, Arc::new(NoopFilePurger))
662 }
663
664 async fn new_test_compaction_region() -> CompactionRegion {
667 let env = SchedulerEnv::new().await;
668 let metadata = metadata_for_test();
669 let manifest_ctx = env.mock_manifest_context(metadata.clone()).await;
670 CompactionRegion {
671 region_id: RegionId::new(1, 1),
672 region_options: RegionOptions::default(),
673 engine_config: Arc::new(MitoConfig::default()),
674 region_metadata: metadata.clone(),
675 cache_manager: Arc::new(CacheManager::default()),
676 access_layer: env.access_layer.clone(),
677 manifest_ctx,
678 current_version: CompactionVersion {
679 metadata,
680 options: RegionOptions::default(),
681 ssts: Arc::new(SstVersion::new()),
682 compaction_time_window: None,
683 },
684 file_purger: None,
685 ttl: None,
686 max_parallelism: 1,
687 }
688 }
689
690 #[derive(Clone)]
694 struct MockMerger {
695 results: Arc<Mutex<Vec<Result<Vec<FileMeta>>>>>,
696 call_idx: Arc<AtomicUsize>,
697 }
698
699 impl MockMerger {
700 fn new(results: Vec<Result<Vec<FileMeta>>>) -> Self {
701 Self {
702 results: Arc::new(Mutex::new(results)),
703 call_idx: Arc::new(AtomicUsize::new(0)),
704 }
705 }
706 }
707
708 #[async_trait::async_trait]
709 impl SstMerger for MockMerger {
710 async fn merge_single_output(
711 &self,
712 _compaction_region: CompactionRegion,
713 _output: CompactionOutput,
714 _write_opts: WriteOptions,
715 ) -> Result<Vec<FileMeta>> {
716 let idx = self.call_idx.fetch_add(1, Ordering::SeqCst);
717 match self.results.lock().unwrap().get(idx) {
718 Some(Ok(files)) => Ok(files.clone()),
719 Some(Err(_)) => error::InvalidMetaSnafu {
720 reason: format!("simulated failure at index {idx}"),
721 }
722 .fail(),
723 None => panic!("MockMerger: no result configured for call index {idx}"),
724 }
725 }
726 }
727
728 #[tokio::test]
729 async fn test_partial_merge_failure_collects_only_successful_outputs() {
730 common_telemetry::init_default_ut_logging();
731
732 let compaction_region = new_test_compaction_region().await;
733
734 let input_meta_0 = dummy_file_meta();
736 let input_meta_1 = dummy_file_meta();
737 let input_meta_2 = dummy_file_meta();
738
739 let output_meta_0 = vec![dummy_file_meta()];
740 let output_meta_2 = vec![dummy_file_meta(), dummy_file_meta()];
741
742 let merger = MockMerger::new(vec![
743 Ok(output_meta_0.clone()),
744 Err(error::InvalidMetaSnafu {
745 reason: "boom".to_string(),
746 }
747 .build()),
748 Ok(output_meta_2.clone()),
749 ]);
750 let compactor = DefaultCompactor::with_merger(merger);
751
752 let picker_output = PickerOutput {
753 outputs: vec![
754 CompactionOutput {
755 output_level: 1,
756 inputs: vec![new_file_handle(input_meta_0.clone())],
757 filter_deleted: false,
758 output_time_range: None,
759 },
760 CompactionOutput {
761 output_level: 1,
762 inputs: vec![new_file_handle(input_meta_1.clone())],
763 filter_deleted: false,
764 output_time_range: None,
765 },
766 CompactionOutput {
767 output_level: 1,
768 inputs: vec![new_file_handle(input_meta_2.clone())],
769 filter_deleted: false,
770 output_time_range: None,
771 },
772 ],
773 expired_ssts: vec![],
774 time_window_size: 3600,
775 max_file_size: None,
776 };
777
778 let merge_output = compactor
779 .merge_ssts(&compaction_region, picker_output)
780 .await
781 .unwrap();
782
783 assert_eq!(merge_output.files_to_add.len(), 3);
785 assert_eq!(merge_output.files_to_remove.len(), 2);
787
788 let removed_ids: Vec<_> = merge_output
789 .files_to_remove
790 .iter()
791 .map(|f| f.file_id)
792 .collect();
793 assert!(removed_ids.contains(&input_meta_0.file_id));
794 assert!(removed_ids.contains(&input_meta_2.file_id));
795 assert!(!removed_ids.contains(&input_meta_1.file_id));
797 }
798
799 #[tokio::test]
800 async fn test_all_outputs_succeed() {
801 common_telemetry::init_default_ut_logging();
802
803 let compaction_region = new_test_compaction_region().await;
804 let input_meta = dummy_file_meta();
805 let output_meta = vec![dummy_file_meta()];
806
807 let merger = MockMerger::new(vec![Ok(output_meta.clone())]);
808 let compactor = DefaultCompactor::with_merger(merger);
809
810 let picker_output = PickerOutput {
811 outputs: vec![CompactionOutput {
812 output_level: 1,
813 inputs: vec![new_file_handle(input_meta.clone())],
814 filter_deleted: false,
815 output_time_range: None,
816 }],
817 expired_ssts: vec![],
818 time_window_size: 3600,
819 max_file_size: None,
820 };
821
822 let merge_output = compactor
823 .merge_ssts(&compaction_region, picker_output)
824 .await
825 .unwrap();
826
827 assert_eq!(merge_output.files_to_add.len(), 1);
828 assert_eq!(merge_output.files_to_add[0].file_id, output_meta[0].file_id);
829 assert_eq!(merge_output.files_to_remove.len(), 1);
830 assert_eq!(merge_output.files_to_remove[0].file_id, input_meta.file_id);
831 }
832
833 #[tokio::test]
834 async fn test_expired_ssts_always_removed() {
835 common_telemetry::init_default_ut_logging();
836
837 let compaction_region = new_test_compaction_region().await;
838 let input_meta = dummy_file_meta();
839 let expired_meta = dummy_file_meta();
840
841 let merger = MockMerger::new(vec![Err(error::InvalidMetaSnafu {
843 reason: "fail".to_string(),
844 }
845 .build())]);
846 let compactor = DefaultCompactor::with_merger(merger);
847
848 let picker_output = PickerOutput {
849 outputs: vec![CompactionOutput {
850 output_level: 1,
851 inputs: vec![new_file_handle(input_meta.clone())],
852 filter_deleted: false,
853 output_time_range: None,
854 }],
855 expired_ssts: vec![new_file_handle(expired_meta.clone())],
856 time_window_size: 3600,
857 max_file_size: None,
858 };
859
860 let merge_output = compactor
861 .merge_ssts(&compaction_region, picker_output)
862 .await
863 .unwrap();
864
865 assert!(merge_output.files_to_add.is_empty());
867 assert_eq!(merge_output.files_to_remove.len(), 1);
869 assert_eq!(
870 merge_output.files_to_remove[0].file_id,
871 expired_meta.file_id
872 );
873 }
874
875 #[derive(Clone)]
876 struct BlockingMerger {
877 call_idx: Arc<AtomicUsize>,
878 }
879
880 #[async_trait::async_trait]
881 impl SstMerger for BlockingMerger {
882 async fn merge_single_output(
883 &self,
884 _compaction_region: CompactionRegion,
885 _output: CompactionOutput,
886 _write_opts: WriteOptions,
887 ) -> Result<Vec<FileMeta>> {
888 self.call_idx.fetch_add(1, Ordering::SeqCst);
889 std::future::pending().await
890 }
891 }
892
893 #[tokio::test(flavor = "multi_thread")]
894 async fn test_merge_ssts_cancels_spawned_tasks() {
895 common_telemetry::init_default_ut_logging();
896
897 let mut compaction_region = new_test_compaction_region().await;
898 compaction_region.max_parallelism = 2;
899
900 let cancel_handle = Arc::new(CancellationHandle::default());
901 let call_idx = Arc::new(AtomicUsize::new(0));
902 let compactor = DefaultCompactor {
903 merger: BlockingMerger {
904 call_idx: call_idx.clone(),
905 },
906 cancel_handle: cancel_handle.clone(),
907 };
908
909 let picker_output = PickerOutput {
910 outputs: vec![
911 CompactionOutput {
912 output_level: 1,
913 inputs: vec![new_file_handle(dummy_file_meta())],
914 filter_deleted: false,
915 output_time_range: None,
916 },
917 CompactionOutput {
918 output_level: 1,
919 inputs: vec![new_file_handle(dummy_file_meta())],
920 filter_deleted: false,
921 output_time_range: None,
922 },
923 CompactionOutput {
924 output_level: 1,
925 inputs: vec![new_file_handle(dummy_file_meta())],
926 filter_deleted: false,
927 output_time_range: None,
928 },
929 ],
930 expired_ssts: vec![],
931 time_window_size: 3600,
932 max_file_size: None,
933 };
934
935 let task = tokio::spawn(async move {
936 compactor
937 .merge_ssts(&compaction_region, picker_output)
938 .await
939 });
940
941 sleep(Duration::from_millis(100)).await;
942 cancel_handle.cancel();
943
944 let merge_output = task
945 .await
946 .expect("merge_ssts should stop after cancellation")
947 .unwrap();
948
949 let started = call_idx.load(Ordering::SeqCst);
950
951 assert!(merge_output.files_to_add.is_empty());
952 assert!(merge_output.files_to_remove.is_empty());
953 assert_eq!(started, 2);
954 }
955}