Skip to main content

mito2/compaction/
compactor.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::num::NonZero;
16use std::sync::Arc;
17use std::time::Duration;
18
19use common_base::cancellation::{CancellableFuture, CancellationHandle};
20use common_meta::key::SchemaMetadataManagerRef;
21use common_telemetry::{debug, info, warn};
22use common_time::TimeToLive;
23use either::Either;
24use itertools::Itertools;
25use object_store::manager::ObjectStoreManagerRef;
26use partition::expr::PartitionExpr;
27use serde::{Deserialize, Serialize};
28use snafu::{OptionExt, ResultExt};
29use store_api::metadata::RegionMetadataRef;
30use store_api::region_request::PathType;
31use store_api::storage::RegionId;
32
33use crate::access_layer::{
34    AccessLayer, AccessLayerRef, Metrics, OperationType, SstWriteRequest, WriteType,
35};
36use crate::cache::{CacheManager, CacheManagerRef};
37use crate::compaction::picker::PickerOutput;
38use crate::compaction::{CompactionOutput, CompactionSstReaderBuilder, find_dynamic_options};
39use crate::config::MitoConfig;
40use crate::error;
41use crate::error::{
42    EmptyRegionDirSnafu, InvalidPartitionExprSnafu, ObjectStoreNotFoundSnafu, Result,
43};
44use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
45use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
46use crate::region::options::RegionOptions;
47use crate::region::version::VersionRef;
48use crate::region::{ManifestContext, RegionLeaderState, RegionRoleState};
49use crate::schedule::scheduler::LocalScheduler;
50use crate::sst::FormatType;
51use crate::sst::file::FileMeta;
52use crate::sst::file_purger::LocalFilePurger;
53use crate::sst::index::intermediate::IntermediateManager;
54use crate::sst::index::puffin_manager::PuffinManagerFactory;
55use crate::sst::location::region_dir_from_table_dir;
56use crate::sst::parquet::WriteOptions;
57use crate::sst::parquet::metadata::extract_primary_key_range;
58use crate::sst::version::{SstVersion, SstVersionRef};
59
60/// Region version for compaction that does not hold memtables.
61#[derive(Clone)]
62pub struct CompactionVersion {
63    /// Metadata of the region.
64    ///
65    /// Altering metadata isn't frequent, storing metadata in Arc to allow sharing
66    /// metadata and reuse metadata when creating a new `Version`.
67    pub(crate) metadata: RegionMetadataRef,
68    /// Options of the region.
69    pub(crate) options: RegionOptions,
70    /// SSTs of the region.
71    pub(crate) ssts: SstVersionRef,
72    /// Inferred compaction time window.
73    pub(crate) compaction_time_window: Option<Duration>,
74}
75
76impl From<VersionRef> for CompactionVersion {
77    fn from(value: VersionRef) -> Self {
78        Self {
79            metadata: value.metadata.clone(),
80            options: value.options.clone(),
81            ssts: value.ssts.clone(),
82            compaction_time_window: value.compaction_time_window,
83        }
84    }
85}
86
87/// CompactionRegion represents a region that needs to be compacted.
88/// It's the subset of MitoRegion.
89#[derive(Clone)]
90pub struct CompactionRegion {
91    pub region_id: RegionId,
92    pub region_options: RegionOptions,
93
94    pub(crate) engine_config: Arc<MitoConfig>,
95    pub(crate) region_metadata: RegionMetadataRef,
96    pub(crate) cache_manager: CacheManagerRef,
97    /// Access layer to get the table path and path type.
98    pub access_layer: AccessLayerRef,
99    pub(crate) manifest_ctx: Arc<ManifestContext>,
100    pub(crate) current_version: CompactionVersion,
101    pub(crate) file_purger: Option<Arc<LocalFilePurger>>,
102    pub(crate) ttl: Option<TimeToLive>,
103
104    /// Controls the parallelism of this compaction task. Default is 1.
105    ///
106    /// The parallel is inside this compaction task, not across different compaction tasks.
107    /// It can be different windows of the same compaction task or something like this.
108    pub max_parallelism: usize,
109}
110
111/// OpenCompactionRegionRequest represents the request to open a compaction region.
112#[derive(Debug, Clone)]
113pub struct OpenCompactionRegionRequest {
114    pub region_id: RegionId,
115    pub table_dir: String,
116    pub path_type: PathType,
117    pub region_options: RegionOptions,
118    pub max_parallelism: usize,
119}
120
121/// Open a compaction region from a compaction request.
122/// It's simple version of RegionOpener::open().
123pub async fn open_compaction_region(
124    req: &OpenCompactionRegionRequest,
125    mito_config: &MitoConfig,
126    object_store_manager: ObjectStoreManagerRef,
127    ttl_provider: Either<TimeToLive, SchemaMetadataManagerRef>,
128) -> Result<CompactionRegion> {
129    let object_store = {
130        let name = &req.region_options.storage;
131        if let Some(name) = name {
132            object_store_manager
133                .find(name)
134                .with_context(|| ObjectStoreNotFoundSnafu {
135                    object_store: name.clone(),
136                })?
137        } else {
138            object_store_manager.default_object_store()
139        }
140    };
141
142    let access_layer = {
143        let puffin_manager_factory = PuffinManagerFactory::new(
144            &mito_config.index.aux_path,
145            mito_config.index.staging_size.as_bytes(),
146            Some(mito_config.index.write_buffer_size.as_bytes() as _),
147            mito_config.index.staging_ttl,
148        )
149        .await?;
150        let intermediate_manager =
151            IntermediateManager::init_fs(mito_config.index.aux_path.clone()).await?;
152
153        Arc::new(AccessLayer::new(
154            &req.table_dir,
155            req.path_type,
156            object_store.clone(),
157            puffin_manager_factory,
158            intermediate_manager,
159        ))
160    };
161
162    let manifest_manager = {
163        let region_dir = region_dir_from_table_dir(&req.table_dir, req.region_id, req.path_type);
164        let region_manifest_options =
165            RegionManifestOptions::new(mito_config, &region_dir, object_store);
166
167        RegionManifestManager::open(region_manifest_options, &Default::default())
168            .await?
169            .with_context(|| EmptyRegionDirSnafu {
170                region_id: req.region_id,
171                region_dir: region_dir_from_table_dir(&req.table_dir, req.region_id, req.path_type),
172            })?
173    };
174
175    let manifest = manifest_manager.manifest();
176    let region_metadata = manifest.metadata.clone();
177    let manifest_ctx = Arc::new(ManifestContext::new(
178        manifest_manager,
179        RegionRoleState::Leader(RegionLeaderState::Writable),
180    ));
181
182    let file_purger = {
183        let purge_scheduler = Arc::new(LocalScheduler::new(mito_config.max_background_purges));
184        Arc::new(LocalFilePurger::new(
185            purge_scheduler.clone(),
186            access_layer.clone(),
187            None,
188        ))
189    };
190
191    let current_version = {
192        let mut ssts = SstVersion::new();
193        ssts.add_files(file_purger.clone(), manifest.files.values().cloned());
194        CompactionVersion {
195            metadata: region_metadata.clone(),
196            options: req.region_options.clone(),
197            ssts: Arc::new(ssts),
198            compaction_time_window: manifest.compaction_time_window,
199        }
200    };
201
202    let ttl = match ttl_provider {
203        // Use the specified ttl.
204        Either::Left(ttl) => ttl,
205        // Get the ttl from the schema metadata manager.
206        Either::Right(schema_metadata_manager) => {
207            let (_, ttl) =
208                find_dynamic_options(req.region_id, &req.region_options, &schema_metadata_manager)
209                    .await
210                    .unwrap_or_else(|e| {
211                        warn!(e; "Failed to get ttl for region: {}", region_metadata.region_id);
212                        (
213                            crate::region::options::CompactionOptions::default(),
214                            TimeToLive::default(),
215                        )
216                    });
217            ttl
218        }
219    };
220
221    Ok(CompactionRegion {
222        region_id: req.region_id,
223        region_options: req.region_options.clone(),
224        engine_config: Arc::new(mito_config.clone()),
225        region_metadata: region_metadata.clone(),
226        cache_manager: Arc::new(CacheManager::default()),
227        access_layer,
228        manifest_ctx,
229        current_version,
230        file_purger: Some(file_purger),
231        ttl: Some(ttl),
232        max_parallelism: req.max_parallelism,
233    })
234}
235
236impl CompactionRegion {
237    /// Get the file purger of the compaction region.
238    pub fn file_purger(&self) -> Option<Arc<LocalFilePurger>> {
239        self.file_purger.clone()
240    }
241
242    /// Stop the file purger scheduler of the compaction region.
243    pub async fn stop_purger_scheduler(&self) -> Result<()> {
244        if let Some(file_purger) = &self.file_purger {
245            file_purger.stop_scheduler().await
246        } else {
247            Ok(())
248        }
249    }
250}
251
252/// `[MergeOutput]` represents the output of merging SST files.
253#[derive(Default, Clone, Debug, Serialize, Deserialize)]
254pub struct MergeOutput {
255    pub files_to_add: Vec<FileMeta>,
256    pub files_to_remove: Vec<FileMeta>,
257    pub compaction_time_window: Option<i64>,
258}
259
260impl MergeOutput {
261    pub fn is_empty(&self) -> bool {
262        self.files_to_add.is_empty() && self.files_to_remove.is_empty()
263    }
264
265    pub fn input_file_size(&self) -> u64 {
266        self.files_to_remove.iter().map(|f| f.file_size).sum()
267    }
268
269    pub fn output_file_size(&self) -> u64 {
270        self.files_to_add.iter().map(|f| f.file_size).sum()
271    }
272}
273
274/// Compactor is the trait that defines the compaction logic.
275#[async_trait::async_trait]
276pub trait Compactor: Send + Sync + 'static {
277    /// Merge SST files for a region.
278    async fn merge_ssts(
279        &self,
280        compaction_region: &CompactionRegion,
281        picker_output: PickerOutput,
282    ) -> Result<MergeOutput>;
283
284    /// Update the manifest after merging SST files.
285    async fn update_manifest(
286        &self,
287        compaction_region: &CompactionRegion,
288        merge_output: MergeOutput,
289    ) -> Result<RegionEdit>;
290}
291
292/// Trait for merging a single compaction output into SST files.
293///
294/// This is extracted from `DefaultCompactor` to allow injecting mock
295/// implementations in tests.
296#[async_trait::async_trait]
297pub trait SstMerger: Send + Sync + 'static {
298    async fn merge_single_output(
299        &self,
300        compaction_region: CompactionRegion,
301        output: CompactionOutput,
302        write_opts: WriteOptions,
303    ) -> Result<Vec<FileMeta>>;
304}
305
306/// The production [`SstMerger`] that reads, merges, and writes SST files.
307#[derive(Clone)]
308pub struct DefaultSstMerger;
309
310#[async_trait::async_trait]
311impl SstMerger for DefaultSstMerger {
312    async fn merge_single_output(
313        &self,
314        compaction_region: CompactionRegion,
315        output: CompactionOutput,
316        write_opts: WriteOptions,
317    ) -> Result<Vec<FileMeta>> {
318        let region_id = compaction_region.region_id;
319        let storage = compaction_region.region_options.storage.clone();
320        let index_options = compaction_region
321            .current_version
322            .options
323            .index_options
324            .clone();
325        let append_mode = compaction_region.current_version.options.append_mode;
326        let merge_mode = compaction_region.current_version.options.merge_mode();
327        let flat_format = compaction_region
328            .region_options
329            .sst_format
330            .map(|format| format == FormatType::Flat)
331            .unwrap_or(compaction_region.engine_config.default_flat_format);
332
333        let index_config = compaction_region.engine_config.index.clone();
334        let inverted_index_config = compaction_region.engine_config.inverted_index.clone();
335        let fulltext_index_config = compaction_region.engine_config.fulltext_index.clone();
336        let bloom_filter_index_config = compaction_region.engine_config.bloom_filter_index.clone();
337        #[cfg(feature = "vector_index")]
338        let vector_index_config = compaction_region.engine_config.vector_index.clone();
339
340        let input_file_names = output
341            .inputs
342            .iter()
343            .map(|f| f.file_id().to_string())
344            .join(",");
345        let max_sequence = output
346            .inputs
347            .iter()
348            .map(|f| f.meta_ref().sequence)
349            .max()
350            .flatten();
351        let builder = CompactionSstReaderBuilder {
352            metadata: compaction_region.region_metadata.clone(),
353            sst_layer: compaction_region.access_layer.clone(),
354            cache: compaction_region.cache_manager.clone(),
355            inputs: &output.inputs,
356            append_mode,
357            filter_deleted: output.filter_deleted,
358            time_range: output.output_time_range,
359            merge_mode,
360        };
361        let source = builder.build_flat_sst_reader().await?;
362        let mut metrics = Metrics::new(WriteType::Compaction);
363        let region_metadata = compaction_region.region_metadata.clone();
364        let sst_infos = compaction_region
365            .access_layer
366            .write_sst(
367                SstWriteRequest {
368                    op_type: OperationType::Compact,
369                    metadata: region_metadata.clone(),
370                    source,
371                    cache_manager: compaction_region.cache_manager.clone(),
372                    storage,
373                    max_sequence: max_sequence.map(NonZero::get),
374                    sst_write_format: if flat_format {
375                        FormatType::Flat
376                    } else {
377                        FormatType::PrimaryKey
378                    },
379                    index_options,
380                    index_config,
381                    inverted_index_config,
382                    fulltext_index_config,
383                    bloom_filter_index_config,
384                    #[cfg(feature = "vector_index")]
385                    vector_index_config,
386                },
387                &write_opts,
388                &mut metrics,
389            )
390            .await?;
391        // Convert partition expression once outside the map
392        let partition_expr = match &region_metadata.partition_expr {
393            None => None,
394            Some(json_str) if json_str.is_empty() => None,
395            Some(json_str) => PartitionExpr::from_json_str(json_str).with_context(|_| {
396                InvalidPartitionExprSnafu {
397                    expr: json_str.clone(),
398                }
399            })?,
400        };
401
402        let output_files = sst_infos
403            .into_iter()
404            .map(|sst_info| {
405                let pk_range = sst_info
406                    .file_metadata
407                    .as_ref()
408                    .and_then(|meta| extract_primary_key_range(meta, &region_metadata));
409                let (primary_key_min, primary_key_max) = match pk_range {
410                    Some((min, max)) => (Some(min), Some(max)),
411                    None => (None, None),
412                };
413
414                FileMeta {
415                    region_id,
416                    file_id: sst_info.file_id,
417                    time_range: sst_info.time_range,
418                    level: output.output_level,
419                    file_size: sst_info.file_size,
420                    max_row_group_uncompressed_size: sst_info.max_row_group_uncompressed_size,
421                    available_indexes: sst_info.index_metadata.build_available_indexes(),
422                    indexes: sst_info.index_metadata.build_indexes(),
423                    index_file_size: sst_info.index_metadata.file_size,
424                    index_version: 0,
425                    num_rows: sst_info.num_rows as u64,
426                    num_row_groups: sst_info.num_row_groups,
427                    sequence: max_sequence,
428                    partition_expr: partition_expr.clone(),
429                    num_series: sst_info.num_series,
430                    primary_key_min,
431                    primary_key_max,
432                }
433            })
434            .collect::<Vec<_>>();
435        let output_file_names = output_files.iter().map(|f| f.file_id.to_string()).join(",");
436        info!(
437            "Region {} compaction inputs: [{}], outputs: [{}], flat_format: {}, metrics: {:?}",
438            region_id, input_file_names, output_file_names, flat_format, metrics
439        );
440        metrics.observe();
441        Ok(output_files)
442    }
443}
444
445/// DefaultCompactor is the default implementation of Compactor.
446///
447/// It is parameterized by an [`SstMerger`] to allow injecting mock
448/// implementations in tests.
449pub struct DefaultCompactor<M = DefaultSstMerger> {
450    merger: M,
451    cancel_handle: Arc<CancellationHandle>,
452}
453
454#[cfg(test)]
455impl<M: SstMerger> DefaultCompactor<M> {
456    pub fn with_merger(merger: M) -> Self {
457        Self {
458            merger,
459            cancel_handle: Arc::new(CancellationHandle::default()),
460        }
461    }
462}
463
464impl DefaultCompactor {
465    pub fn with_cancel_handle(cancel_handle: Arc<CancellationHandle>) -> Self {
466        Self {
467            merger: DefaultSstMerger,
468            cancel_handle,
469        }
470    }
471}
472
473#[async_trait::async_trait]
474impl<M: SstMerger> Compactor for DefaultCompactor<M>
475where
476    M: Clone,
477{
478    async fn merge_ssts(
479        &self,
480        compaction_region: &CompactionRegion,
481        mut picker_output: PickerOutput,
482    ) -> Result<MergeOutput> {
483        let internal_parallelism = compaction_region.max_parallelism.max(1);
484        let compaction_time_window = picker_output.time_window_size;
485        let region_id = compaction_region.region_id;
486
487        // Build tasks along with their input file metas so we can track which
488        // inputs correspond to each task.
489        let mut tasks: Vec<(Vec<FileMeta>, _)> = Vec::with_capacity(picker_output.outputs.len());
490
491        for output in picker_output.outputs.drain(..) {
492            let inputs_to_remove: Vec<_> =
493                output.inputs.iter().map(|f| f.meta_ref().clone()).collect();
494            let write_opts = WriteOptions {
495                write_buffer_size: compaction_region.engine_config.sst_write_buffer_size,
496                max_file_size: picker_output.max_file_size,
497                ..Default::default()
498            };
499            let merger = self.merger.clone();
500            let compaction_region = compaction_region.clone();
501            let fut = async move {
502                merger
503                    .merge_single_output(compaction_region, output, write_opts)
504                    .await
505            };
506            tasks.push((inputs_to_remove, fut));
507        }
508
509        let mut output_files = Vec::with_capacity(tasks.len());
510        let mut compacted_inputs = Vec::with_capacity(
511            tasks.iter().map(|(inputs, _)| inputs.len()).sum::<usize>()
512                + picker_output.expired_ssts.len(),
513        );
514
515        while !tasks.is_empty() {
516            let mut chunk: Vec<(Vec<FileMeta>, _)> = Vec::with_capacity(internal_parallelism);
517            for _ in 0..internal_parallelism {
518                if let Some(task) = tasks.pop() {
519                    chunk.push(task);
520                }
521            }
522            let mut spawned: Vec<_> = chunk
523                .into_iter()
524                .map(|(inputs, fut)| {
525                    let handle = common_runtime::spawn_compact(fut);
526                    (inputs, handle)
527                })
528                .collect();
529
530            while let Some((inputs, handle)) = spawned.pop() {
531                let abort_handle = handle.abort_handle();
532                match CancellableFuture::new(handle, self.cancel_handle.clone()).await {
533                    Ok(Ok(Ok(files))) => {
534                        output_files.extend(files);
535                        compacted_inputs.extend(inputs);
536                    }
537                    Ok(Ok(Err(e))) => {
538                        warn!(
539                            e; "Failed to merge compaction output for region: {}, inputs: [{}]",
540                            region_id,
541                            inputs.iter().map(|f| f.file_id.to_string()).join(",")
542                        );
543                    }
544                    Ok(Err(e)) => {
545                        warn!(
546                            "Region {} compaction task join error for inputs: [{}], skipping: {}",
547                            region_id,
548                            inputs.iter().map(|f| f.file_id.to_string()).join(","),
549                            e
550                        );
551                        // If the cancel handle is cancelled,
552                        // cancel the remaining tasks before returns the error.
553                        if self.cancel_handle.is_cancelled() {
554                            abort_handle.abort();
555                            for (_, handle) in spawned {
556                                handle.abort();
557                            }
558                        }
559                        return Err(e).context(error::JoinSnafu);
560                    }
561                    Err(_) => {
562                        debug!(
563                            "Compaction merge cancelled for region: {}, aborting remaining {} spawned tasks",
564                            region_id,
565                            spawned.len(),
566                        );
567                        abort_handle.abort();
568                        for (_, handle) in spawned {
569                            handle.abort();
570                        }
571                        break;
572                    }
573                }
574            }
575
576            if self.cancel_handle.is_cancelled() {
577                info!("Compaction merge cancelled for region: {}", region_id);
578                break;
579            }
580        }
581
582        // Include expired SSTs in removals — these don't depend on merge success.
583        compacted_inputs.extend(
584            picker_output
585                .expired_ssts
586                .iter()
587                .map(|f| f.meta_ref().clone()),
588        );
589
590        Ok(MergeOutput {
591            files_to_add: output_files,
592            files_to_remove: compacted_inputs,
593            compaction_time_window: Some(compaction_time_window),
594        })
595    }
596
597    async fn update_manifest(
598        &self,
599        compaction_region: &CompactionRegion,
600        merge_output: MergeOutput,
601    ) -> Result<RegionEdit> {
602        // Write region edit to manifest.
603        let edit = RegionEdit {
604            files_to_add: merge_output.files_to_add,
605            files_to_remove: merge_output.files_to_remove,
606            // Use current timestamp as the edit timestamp.
607            timestamp_ms: Some(chrono::Utc::now().timestamp_millis()),
608            compaction_time_window: merge_output
609                .compaction_time_window
610                .map(|seconds| Duration::from_secs(seconds as u64)),
611            flushed_entry_id: None,
612            flushed_sequence: None,
613            committed_sequence: None,
614        };
615
616        let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
617        // TODO: We might leak files if we fail to update manifest. We can add a cleanup task to remove them later.
618        compaction_region
619            .manifest_ctx
620            .update_manifest_for_compaction(action_list)
621            .await?;
622
623        Ok(edit)
624    }
625}
626
627#[cfg(test)]
628mod tests {
629    use std::sync::atomic::{AtomicUsize, Ordering};
630    use std::sync::{Arc, Mutex};
631    use std::time::Duration;
632
633    use store_api::storage::{FileId, RegionId};
634    use tokio::time::sleep;
635
636    use super::{DefaultCompactor, *};
637    use crate::cache::CacheManager;
638    use crate::compaction::picker::PickerOutput;
639    use crate::error::Result;
640    use crate::sst::file::FileHandle;
641    use crate::sst::file_purger::NoopFilePurger;
642    use crate::sst::version::SstVersion;
643    use crate::test_util::memtable_util::metadata_for_test;
644    use crate::test_util::scheduler_util::SchedulerEnv;
645
646    fn dummy_file_meta() -> FileMeta {
647        FileMeta {
648            region_id: RegionId::new(1, 1),
649            file_id: FileId::random(),
650            file_size: 100,
651            ..Default::default()
652        }
653    }
654
655    fn new_file_handle(meta: FileMeta) -> FileHandle {
656        FileHandle::new(meta, Arc::new(NoopFilePurger))
657    }
658
659    /// Build a minimal [`CompactionRegion`] suitable for tests where the
660    /// [`SstMerger`] is mocked and never touches the access layer.
661    async fn new_test_compaction_region() -> CompactionRegion {
662        let env = SchedulerEnv::new().await;
663        let metadata = metadata_for_test();
664        let manifest_ctx = env.mock_manifest_context(metadata.clone()).await;
665        CompactionRegion {
666            region_id: RegionId::new(1, 1),
667            region_options: RegionOptions::default(),
668            engine_config: Arc::new(MitoConfig::default()),
669            region_metadata: metadata.clone(),
670            cache_manager: Arc::new(CacheManager::default()),
671            access_layer: env.access_layer.clone(),
672            manifest_ctx,
673            current_version: CompactionVersion {
674                metadata,
675                options: RegionOptions::default(),
676                ssts: Arc::new(SstVersion::new()),
677                compaction_time_window: None,
678            },
679            file_purger: None,
680            ttl: None,
681            max_parallelism: 1,
682        }
683    }
684
685    /// An [`SstMerger`] that returns pre-configured results per call index.
686    ///
687    /// Call 0 gets `results[0]`, call 1 gets `results[1]`, etc.
688    #[derive(Clone)]
689    struct MockMerger {
690        results: Arc<Mutex<Vec<Result<Vec<FileMeta>>>>>,
691        call_idx: Arc<AtomicUsize>,
692    }
693
694    impl MockMerger {
695        fn new(results: Vec<Result<Vec<FileMeta>>>) -> Self {
696            Self {
697                results: Arc::new(Mutex::new(results)),
698                call_idx: Arc::new(AtomicUsize::new(0)),
699            }
700        }
701    }
702
703    #[async_trait::async_trait]
704    impl SstMerger for MockMerger {
705        async fn merge_single_output(
706            &self,
707            _compaction_region: CompactionRegion,
708            _output: CompactionOutput,
709            _write_opts: WriteOptions,
710        ) -> Result<Vec<FileMeta>> {
711            let idx = self.call_idx.fetch_add(1, Ordering::SeqCst);
712            match self.results.lock().unwrap().get(idx) {
713                Some(Ok(files)) => Ok(files.clone()),
714                Some(Err(_)) => error::InvalidMetaSnafu {
715                    reason: format!("simulated failure at index {idx}"),
716                }
717                .fail(),
718                None => panic!("MockMerger: no result configured for call index {idx}"),
719            }
720        }
721    }
722
723    #[tokio::test]
724    async fn test_partial_merge_failure_collects_only_successful_outputs() {
725        common_telemetry::init_default_ut_logging();
726
727        let compaction_region = new_test_compaction_region().await;
728
729        // Prepare 3 compaction outputs: output 0 and 2 succeed, output 1 fails.
730        let input_meta_0 = dummy_file_meta();
731        let input_meta_1 = dummy_file_meta();
732        let input_meta_2 = dummy_file_meta();
733
734        let output_meta_0 = vec![dummy_file_meta()];
735        let output_meta_2 = vec![dummy_file_meta(), dummy_file_meta()];
736
737        let merger = MockMerger::new(vec![
738            Ok(output_meta_0.clone()),
739            Err(error::InvalidMetaSnafu {
740                reason: "boom".to_string(),
741            }
742            .build()),
743            Ok(output_meta_2.clone()),
744        ]);
745        let compactor = DefaultCompactor::with_merger(merger);
746
747        let picker_output = PickerOutput {
748            outputs: vec![
749                CompactionOutput {
750                    output_level: 1,
751                    inputs: vec![new_file_handle(input_meta_0.clone())],
752                    filter_deleted: false,
753                    output_time_range: None,
754                },
755                CompactionOutput {
756                    output_level: 1,
757                    inputs: vec![new_file_handle(input_meta_1.clone())],
758                    filter_deleted: false,
759                    output_time_range: None,
760                },
761                CompactionOutput {
762                    output_level: 1,
763                    inputs: vec![new_file_handle(input_meta_2.clone())],
764                    filter_deleted: false,
765                    output_time_range: None,
766                },
767            ],
768            expired_ssts: vec![],
769            time_window_size: 3600,
770            max_file_size: None,
771        };
772
773        let merge_output = compactor
774            .merge_ssts(&compaction_region, picker_output)
775            .await
776            .unwrap();
777
778        // Outputs 0 and 2 succeeded (1 + 2 = 3 files added).
779        assert_eq!(merge_output.files_to_add.len(), 3);
780        // Only inputs from successful merges should be removed.
781        assert_eq!(merge_output.files_to_remove.len(), 2);
782
783        let removed_ids: Vec<_> = merge_output
784            .files_to_remove
785            .iter()
786            .map(|f| f.file_id)
787            .collect();
788        assert!(removed_ids.contains(&input_meta_0.file_id));
789        assert!(removed_ids.contains(&input_meta_2.file_id));
790        // The failed output's input must NOT be removed.
791        assert!(!removed_ids.contains(&input_meta_1.file_id));
792    }
793
794    #[tokio::test]
795    async fn test_all_outputs_succeed() {
796        common_telemetry::init_default_ut_logging();
797
798        let compaction_region = new_test_compaction_region().await;
799        let input_meta = dummy_file_meta();
800        let output_meta = vec![dummy_file_meta()];
801
802        let merger = MockMerger::new(vec![Ok(output_meta.clone())]);
803        let compactor = DefaultCompactor::with_merger(merger);
804
805        let picker_output = PickerOutput {
806            outputs: vec![CompactionOutput {
807                output_level: 1,
808                inputs: vec![new_file_handle(input_meta.clone())],
809                filter_deleted: false,
810                output_time_range: None,
811            }],
812            expired_ssts: vec![],
813            time_window_size: 3600,
814            max_file_size: None,
815        };
816
817        let merge_output = compactor
818            .merge_ssts(&compaction_region, picker_output)
819            .await
820            .unwrap();
821
822        assert_eq!(merge_output.files_to_add.len(), 1);
823        assert_eq!(merge_output.files_to_add[0].file_id, output_meta[0].file_id);
824        assert_eq!(merge_output.files_to_remove.len(), 1);
825        assert_eq!(merge_output.files_to_remove[0].file_id, input_meta.file_id);
826    }
827
828    #[tokio::test]
829    async fn test_expired_ssts_always_removed() {
830        common_telemetry::init_default_ut_logging();
831
832        let compaction_region = new_test_compaction_region().await;
833        let input_meta = dummy_file_meta();
834        let expired_meta = dummy_file_meta();
835
836        // The single merge output fails, but expired SSTs should still be removed.
837        let merger = MockMerger::new(vec![Err(error::InvalidMetaSnafu {
838            reason: "fail".to_string(),
839        }
840        .build())]);
841        let compactor = DefaultCompactor::with_merger(merger);
842
843        let picker_output = PickerOutput {
844            outputs: vec![CompactionOutput {
845                output_level: 1,
846                inputs: vec![new_file_handle(input_meta.clone())],
847                filter_deleted: false,
848                output_time_range: None,
849            }],
850            expired_ssts: vec![new_file_handle(expired_meta.clone())],
851            time_window_size: 3600,
852            max_file_size: None,
853        };
854
855        let merge_output = compactor
856            .merge_ssts(&compaction_region, picker_output)
857            .await
858            .unwrap();
859
860        // No files added (merge failed).
861        assert!(merge_output.files_to_add.is_empty());
862        // Only the expired SST should be in files_to_remove (not the failed merge's input).
863        assert_eq!(merge_output.files_to_remove.len(), 1);
864        assert_eq!(
865            merge_output.files_to_remove[0].file_id,
866            expired_meta.file_id
867        );
868    }
869
870    #[derive(Clone)]
871    struct BlockingMerger {
872        call_idx: Arc<AtomicUsize>,
873    }
874
875    #[async_trait::async_trait]
876    impl SstMerger for BlockingMerger {
877        async fn merge_single_output(
878            &self,
879            _compaction_region: CompactionRegion,
880            _output: CompactionOutput,
881            _write_opts: WriteOptions,
882        ) -> Result<Vec<FileMeta>> {
883            self.call_idx.fetch_add(1, Ordering::SeqCst);
884            std::future::pending().await
885        }
886    }
887
888    #[tokio::test(flavor = "multi_thread")]
889    async fn test_merge_ssts_cancels_spawned_tasks() {
890        common_telemetry::init_default_ut_logging();
891
892        let mut compaction_region = new_test_compaction_region().await;
893        compaction_region.max_parallelism = 2;
894
895        let cancel_handle = Arc::new(CancellationHandle::default());
896        let call_idx = Arc::new(AtomicUsize::new(0));
897        let compactor = DefaultCompactor {
898            merger: BlockingMerger {
899                call_idx: call_idx.clone(),
900            },
901            cancel_handle: cancel_handle.clone(),
902        };
903
904        let picker_output = PickerOutput {
905            outputs: vec![
906                CompactionOutput {
907                    output_level: 1,
908                    inputs: vec![new_file_handle(dummy_file_meta())],
909                    filter_deleted: false,
910                    output_time_range: None,
911                },
912                CompactionOutput {
913                    output_level: 1,
914                    inputs: vec![new_file_handle(dummy_file_meta())],
915                    filter_deleted: false,
916                    output_time_range: None,
917                },
918                CompactionOutput {
919                    output_level: 1,
920                    inputs: vec![new_file_handle(dummy_file_meta())],
921                    filter_deleted: false,
922                    output_time_range: None,
923                },
924            ],
925            expired_ssts: vec![],
926            time_window_size: 3600,
927            max_file_size: None,
928        };
929
930        let task = tokio::spawn(async move {
931            compactor
932                .merge_ssts(&compaction_region, picker_output)
933                .await
934        });
935
936        sleep(Duration::from_millis(100)).await;
937        cancel_handle.cancel();
938
939        let merge_output = task
940            .await
941            .expect("merge_ssts should stop after cancellation")
942            .unwrap();
943
944        let started = call_idx.load(Ordering::SeqCst);
945
946        assert!(merge_output.files_to_add.is_empty());
947        assert!(merge_output.files_to_remove.is_empty());
948        assert_eq!(started, 2);
949    }
950}