Skip to main content

mito2/compaction/
compactor.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::num::NonZero;
16use std::sync::Arc;
17use std::time::Duration;
18
19use common_base::cancellation::{CancellableFuture, CancellationHandle};
20use common_meta::key::SchemaMetadataManagerRef;
21use common_telemetry::{debug, info, warn};
22use common_time::TimeToLive;
23use either::Either;
24use itertools::Itertools;
25use object_store::manager::ObjectStoreManagerRef;
26use partition::expr::PartitionExpr;
27use serde::{Deserialize, Serialize};
28use snafu::{OptionExt, ResultExt};
29use store_api::metadata::RegionMetadataRef;
30use store_api::region_request::PathType;
31use store_api::storage::RegionId;
32
33use crate::access_layer::{
34    AccessLayer, AccessLayerRef, Metrics, OperationType, SstWriteRequest, WriteType,
35};
36use crate::cache::{CacheManager, CacheManagerRef};
37use crate::compaction::picker::PickerOutput;
38use crate::compaction::{CompactionOutput, CompactionSstReaderBuilder, find_dynamic_options};
39use crate::config::MitoConfig;
40use crate::error;
41use crate::error::{
42    EmptyRegionDirSnafu, InvalidPartitionExprSnafu, ObjectStoreNotFoundSnafu, Result,
43};
44use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
45use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
46use crate::read::FlatSource;
47use crate::region::options::RegionOptions;
48use crate::region::version::VersionRef;
49use crate::region::{ManifestContext, RegionLeaderState, RegionRoleState};
50use crate::schedule::scheduler::LocalScheduler;
51use crate::sst::FormatType;
52use crate::sst::file::FileMeta;
53use crate::sst::file_purger::LocalFilePurger;
54use crate::sst::index::intermediate::IntermediateManager;
55use crate::sst::index::puffin_manager::PuffinManagerFactory;
56use crate::sst::location::region_dir_from_table_dir;
57use crate::sst::parquet::WriteOptions;
58use crate::sst::parquet::metadata::extract_primary_key_range;
59use crate::sst::version::{SstVersion, SstVersionRef};
60
61/// Region version for compaction that does not hold memtables.
62#[derive(Clone)]
63pub struct CompactionVersion {
64    /// Metadata of the region.
65    ///
66    /// Altering metadata isn't frequent, storing metadata in Arc to allow sharing
67    /// metadata and reuse metadata when creating a new `Version`.
68    pub(crate) metadata: RegionMetadataRef,
69    /// Options of the region.
70    pub(crate) options: RegionOptions,
71    /// SSTs of the region.
72    pub(crate) ssts: SstVersionRef,
73    /// Inferred compaction time window.
74    pub(crate) compaction_time_window: Option<Duration>,
75}
76
77impl From<VersionRef> for CompactionVersion {
78    fn from(value: VersionRef) -> Self {
79        Self {
80            metadata: value.metadata.clone(),
81            options: value.options.clone(),
82            ssts: value.ssts.clone(),
83            compaction_time_window: value.compaction_time_window,
84        }
85    }
86}
87
88/// CompactionRegion represents a region that needs to be compacted.
89/// It's the subset of MitoRegion.
90#[derive(Clone)]
91pub struct CompactionRegion {
92    pub region_id: RegionId,
93    pub region_options: RegionOptions,
94
95    pub(crate) engine_config: Arc<MitoConfig>,
96    pub(crate) region_metadata: RegionMetadataRef,
97    pub(crate) cache_manager: CacheManagerRef,
98    /// Access layer to get the table path and path type.
99    pub access_layer: AccessLayerRef,
100    pub(crate) manifest_ctx: Arc<ManifestContext>,
101    pub(crate) current_version: CompactionVersion,
102    pub(crate) file_purger: Option<Arc<LocalFilePurger>>,
103    pub(crate) ttl: Option<TimeToLive>,
104
105    /// Controls the parallelism of this compaction task. Default is 1.
106    ///
107    /// The parallel is inside this compaction task, not across different compaction tasks.
108    /// It can be different windows of the same compaction task or something like this.
109    pub max_parallelism: usize,
110}
111
112/// OpenCompactionRegionRequest represents the request to open a compaction region.
113#[derive(Debug, Clone)]
114pub struct OpenCompactionRegionRequest {
115    pub region_id: RegionId,
116    pub table_dir: String,
117    pub path_type: PathType,
118    pub region_options: RegionOptions,
119    pub max_parallelism: usize,
120}
121
122/// Open a compaction region from a compaction request.
123/// It's simple version of RegionOpener::open().
124pub async fn open_compaction_region(
125    req: &OpenCompactionRegionRequest,
126    mito_config: &MitoConfig,
127    object_store_manager: ObjectStoreManagerRef,
128    ttl_provider: Either<TimeToLive, SchemaMetadataManagerRef>,
129) -> Result<CompactionRegion> {
130    let object_store = {
131        let name = &req.region_options.storage;
132        if let Some(name) = name {
133            object_store_manager
134                .find(name)
135                .with_context(|| ObjectStoreNotFoundSnafu {
136                    object_store: name.clone(),
137                })?
138        } else {
139            object_store_manager.default_object_store()
140        }
141    };
142
143    let access_layer = {
144        let puffin_manager_factory = PuffinManagerFactory::new(
145            &mito_config.index.aux_path,
146            mito_config.index.staging_size.as_bytes(),
147            Some(mito_config.index.write_buffer_size.as_bytes() as _),
148            mito_config.index.staging_ttl,
149        )
150        .await?;
151        let intermediate_manager =
152            IntermediateManager::init_fs(mito_config.index.aux_path.clone()).await?;
153
154        Arc::new(AccessLayer::new(
155            &req.table_dir,
156            req.path_type,
157            object_store.clone(),
158            puffin_manager_factory,
159            intermediate_manager,
160        ))
161    };
162
163    let manifest_manager = {
164        let region_dir = region_dir_from_table_dir(&req.table_dir, req.region_id, req.path_type);
165        let region_manifest_options =
166            RegionManifestOptions::new(mito_config, &region_dir, object_store);
167
168        RegionManifestManager::open(region_manifest_options, &Default::default())
169            .await?
170            .with_context(|| EmptyRegionDirSnafu {
171                region_id: req.region_id,
172                region_dir: region_dir_from_table_dir(&req.table_dir, req.region_id, req.path_type),
173            })?
174    };
175
176    let manifest = manifest_manager.manifest();
177    let region_metadata = manifest.metadata.clone();
178    let manifest_ctx = Arc::new(ManifestContext::new(
179        manifest_manager,
180        RegionRoleState::Leader(RegionLeaderState::Writable),
181    ));
182
183    let file_purger = {
184        let purge_scheduler = Arc::new(LocalScheduler::new(mito_config.max_background_purges));
185        Arc::new(LocalFilePurger::new(
186            purge_scheduler.clone(),
187            access_layer.clone(),
188            None,
189        ))
190    };
191
192    let current_version = {
193        let mut ssts = SstVersion::new();
194        ssts.add_files(file_purger.clone(), manifest.files.values().cloned());
195        CompactionVersion {
196            metadata: region_metadata.clone(),
197            options: req.region_options.clone(),
198            ssts: Arc::new(ssts),
199            compaction_time_window: manifest.compaction_time_window,
200        }
201    };
202
203    let ttl = match ttl_provider {
204        // Use the specified ttl.
205        Either::Left(ttl) => ttl,
206        // Get the ttl from the schema metadata manager.
207        Either::Right(schema_metadata_manager) => {
208            let (_, ttl) = find_dynamic_options(
209                req.region_id.table_id(),
210                &req.region_options,
211                &schema_metadata_manager,
212            )
213            .await
214            .unwrap_or_else(|e| {
215                warn!(e; "Failed to get ttl for region: {}", region_metadata.region_id);
216                (
217                    crate::region::options::CompactionOptions::default(),
218                    TimeToLive::default(),
219                )
220            });
221            ttl
222        }
223    };
224
225    Ok(CompactionRegion {
226        region_id: req.region_id,
227        region_options: req.region_options.clone(),
228        engine_config: Arc::new(mito_config.clone()),
229        region_metadata: region_metadata.clone(),
230        cache_manager: Arc::new(CacheManager::default()),
231        access_layer,
232        manifest_ctx,
233        current_version,
234        file_purger: Some(file_purger),
235        ttl: Some(ttl),
236        max_parallelism: req.max_parallelism,
237    })
238}
239
240impl CompactionRegion {
241    /// Get the file purger of the compaction region.
242    pub fn file_purger(&self) -> Option<Arc<LocalFilePurger>> {
243        self.file_purger.clone()
244    }
245
246    /// Stop the file purger scheduler of the compaction region.
247    pub async fn stop_purger_scheduler(&self) -> Result<()> {
248        if let Some(file_purger) = &self.file_purger {
249            file_purger.stop_scheduler().await
250        } else {
251            Ok(())
252        }
253    }
254}
255
256/// `[MergeOutput]` represents the output of merging SST files.
257#[derive(Default, Clone, Debug, Serialize, Deserialize)]
258pub struct MergeOutput {
259    pub files_to_add: Vec<FileMeta>,
260    pub files_to_remove: Vec<FileMeta>,
261    pub compaction_time_window: Option<i64>,
262}
263
264impl MergeOutput {
265    pub fn is_empty(&self) -> bool {
266        self.files_to_add.is_empty() && self.files_to_remove.is_empty()
267    }
268
269    pub fn input_file_size(&self) -> u64 {
270        self.files_to_remove.iter().map(|f| f.file_size).sum()
271    }
272
273    pub fn output_file_size(&self) -> u64 {
274        self.files_to_add.iter().map(|f| f.file_size).sum()
275    }
276}
277
278/// Compactor is the trait that defines the compaction logic.
279#[async_trait::async_trait]
280pub trait Compactor: Send + Sync + 'static {
281    /// Merge SST files for a region.
282    async fn merge_ssts(
283        &self,
284        compaction_region: &CompactionRegion,
285        picker_output: PickerOutput,
286    ) -> Result<MergeOutput>;
287
288    /// Update the manifest after merging SST files.
289    async fn update_manifest(
290        &self,
291        compaction_region: &CompactionRegion,
292        merge_output: MergeOutput,
293    ) -> Result<RegionEdit>;
294}
295
296/// Trait for merging a single compaction output into SST files.
297///
298/// This is extracted from `DefaultCompactor` to allow injecting mock
299/// implementations in tests.
300#[async_trait::async_trait]
301pub trait SstMerger: Send + Sync + 'static {
302    async fn merge_single_output(
303        &self,
304        compaction_region: CompactionRegion,
305        output: CompactionOutput,
306        write_opts: WriteOptions,
307    ) -> Result<Vec<FileMeta>>;
308}
309
310/// The production [`SstMerger`] that reads, merges, and writes SST files.
311#[derive(Clone)]
312pub struct DefaultSstMerger;
313
314#[async_trait::async_trait]
315impl SstMerger for DefaultSstMerger {
316    async fn merge_single_output(
317        &self,
318        compaction_region: CompactionRegion,
319        output: CompactionOutput,
320        write_opts: WriteOptions,
321    ) -> Result<Vec<FileMeta>> {
322        let region_id = compaction_region.region_id;
323        let storage = compaction_region.region_options.storage.clone();
324        let index_options = compaction_region
325            .current_version
326            .options
327            .index_options
328            .clone();
329        let append_mode = compaction_region.current_version.options.append_mode;
330        let merge_mode = compaction_region.current_version.options.merge_mode();
331        let flat_format = compaction_region
332            .region_options
333            .sst_format
334            .map(|format| format == FormatType::Flat)
335            .unwrap_or(compaction_region.engine_config.default_flat_format);
336
337        let index_config = compaction_region.engine_config.index.clone();
338        let inverted_index_config = compaction_region.engine_config.inverted_index.clone();
339        let fulltext_index_config = compaction_region.engine_config.fulltext_index.clone();
340        let bloom_filter_index_config = compaction_region.engine_config.bloom_filter_index.clone();
341        #[cfg(feature = "vector_index")]
342        let vector_index_config = compaction_region.engine_config.vector_index.clone();
343
344        let input_file_names = output
345            .inputs
346            .iter()
347            .map(|f| f.file_id().to_string())
348            .join(",");
349        let max_sequence = output
350            .inputs
351            .iter()
352            .map(|f| f.meta_ref().sequence)
353            .max()
354            .flatten();
355        let builder = CompactionSstReaderBuilder {
356            metadata: compaction_region.region_metadata.clone(),
357            sst_layer: compaction_region.access_layer.clone(),
358            cache: compaction_region.cache_manager.clone(),
359            inputs: &output.inputs,
360            append_mode,
361            filter_deleted: output.filter_deleted,
362            time_range: output.output_time_range,
363            merge_mode,
364        };
365        let reader = builder.build_flat_sst_reader().await?;
366        let source = FlatSource::Stream(reader);
367        let mut metrics = Metrics::new(WriteType::Compaction);
368        let region_metadata = compaction_region.region_metadata.clone();
369        let sst_infos = compaction_region
370            .access_layer
371            .write_sst(
372                SstWriteRequest {
373                    op_type: OperationType::Compact,
374                    metadata: region_metadata.clone(),
375                    source,
376                    cache_manager: compaction_region.cache_manager.clone(),
377                    storage,
378                    max_sequence: max_sequence.map(NonZero::get),
379                    sst_write_format: if flat_format {
380                        FormatType::Flat
381                    } else {
382                        FormatType::PrimaryKey
383                    },
384                    index_options,
385                    index_config,
386                    inverted_index_config,
387                    fulltext_index_config,
388                    bloom_filter_index_config,
389                    #[cfg(feature = "vector_index")]
390                    vector_index_config,
391                },
392                &write_opts,
393                &mut metrics,
394            )
395            .await?;
396        // Convert partition expression once outside the map
397        let partition_expr = match &region_metadata.partition_expr {
398            None => None,
399            Some(json_str) if json_str.is_empty() => None,
400            Some(json_str) => PartitionExpr::from_json_str(json_str).with_context(|_| {
401                InvalidPartitionExprSnafu {
402                    expr: json_str.clone(),
403                }
404            })?,
405        };
406
407        let output_files = sst_infos
408            .into_iter()
409            .map(|sst_info| {
410                let pk_range = sst_info
411                    .file_metadata
412                    .as_ref()
413                    .and_then(|meta| extract_primary_key_range(meta, &region_metadata));
414                let (primary_key_min, primary_key_max) = match pk_range {
415                    Some((min, max)) => (Some(min), Some(max)),
416                    None => (None, None),
417                };
418
419                FileMeta {
420                    region_id,
421                    file_id: sst_info.file_id,
422                    time_range: sst_info.time_range,
423                    level: output.output_level,
424                    file_size: sst_info.file_size,
425                    max_row_group_uncompressed_size: sst_info.max_row_group_uncompressed_size,
426                    available_indexes: sst_info.index_metadata.build_available_indexes(),
427                    indexes: sst_info.index_metadata.build_indexes(),
428                    index_file_size: sst_info.index_metadata.file_size,
429                    index_version: 0,
430                    num_rows: sst_info.num_rows as u64,
431                    num_row_groups: sst_info.num_row_groups,
432                    sequence: max_sequence,
433                    partition_expr: partition_expr.clone(),
434                    num_series: sst_info.num_series,
435                    primary_key_min,
436                    primary_key_max,
437                }
438            })
439            .collect::<Vec<_>>();
440        let output_file_names = output_files.iter().map(|f| f.file_id.to_string()).join(",");
441        info!(
442            "Region {} compaction inputs: [{}], outputs: [{}], flat_format: {}, metrics: {:?}",
443            region_id, input_file_names, output_file_names, flat_format, metrics
444        );
445        metrics.observe();
446        Ok(output_files)
447    }
448}
449
450/// DefaultCompactor is the default implementation of Compactor.
451///
452/// It is parameterized by an [`SstMerger`] to allow injecting mock
453/// implementations in tests.
454pub struct DefaultCompactor<M = DefaultSstMerger> {
455    merger: M,
456    cancel_handle: Arc<CancellationHandle>,
457}
458
459#[cfg(test)]
460impl<M: SstMerger> DefaultCompactor<M> {
461    pub fn with_merger(merger: M) -> Self {
462        Self {
463            merger,
464            cancel_handle: Arc::new(CancellationHandle::default()),
465        }
466    }
467}
468
469impl DefaultCompactor {
470    pub fn with_cancel_handle(cancel_handle: Arc<CancellationHandle>) -> Self {
471        Self {
472            merger: DefaultSstMerger,
473            cancel_handle,
474        }
475    }
476}
477
478#[async_trait::async_trait]
479impl<M: SstMerger> Compactor for DefaultCompactor<M>
480where
481    M: Clone,
482{
483    async fn merge_ssts(
484        &self,
485        compaction_region: &CompactionRegion,
486        mut picker_output: PickerOutput,
487    ) -> Result<MergeOutput> {
488        let internal_parallelism = compaction_region.max_parallelism.max(1);
489        let compaction_time_window = picker_output.time_window_size;
490        let region_id = compaction_region.region_id;
491
492        // Build tasks along with their input file metas so we can track which
493        // inputs correspond to each task.
494        let mut tasks: Vec<(Vec<FileMeta>, _)> = Vec::with_capacity(picker_output.outputs.len());
495
496        for output in picker_output.outputs.drain(..) {
497            let inputs_to_remove: Vec<_> =
498                output.inputs.iter().map(|f| f.meta_ref().clone()).collect();
499            let write_opts = WriteOptions {
500                write_buffer_size: compaction_region.engine_config.sst_write_buffer_size,
501                max_file_size: picker_output.max_file_size,
502                ..Default::default()
503            };
504            let merger = self.merger.clone();
505            let compaction_region = compaction_region.clone();
506            let fut = async move {
507                merger
508                    .merge_single_output(compaction_region, output, write_opts)
509                    .await
510            };
511            tasks.push((inputs_to_remove, fut));
512        }
513
514        let mut output_files = Vec::with_capacity(tasks.len());
515        let mut compacted_inputs = Vec::with_capacity(
516            tasks.iter().map(|(inputs, _)| inputs.len()).sum::<usize>()
517                + picker_output.expired_ssts.len(),
518        );
519
520        while !tasks.is_empty() {
521            let mut chunk: Vec<(Vec<FileMeta>, _)> = Vec::with_capacity(internal_parallelism);
522            for _ in 0..internal_parallelism {
523                if let Some(task) = tasks.pop() {
524                    chunk.push(task);
525                }
526            }
527            let mut spawned: Vec<_> = chunk
528                .into_iter()
529                .map(|(inputs, fut)| {
530                    let handle = common_runtime::spawn_compact(fut);
531                    (inputs, handle)
532                })
533                .collect();
534
535            while let Some((inputs, handle)) = spawned.pop() {
536                let abort_handle = handle.abort_handle();
537                match CancellableFuture::new(handle, self.cancel_handle.clone()).await {
538                    Ok(Ok(Ok(files))) => {
539                        output_files.extend(files);
540                        compacted_inputs.extend(inputs);
541                    }
542                    Ok(Ok(Err(e))) => {
543                        warn!(
544                            e; "Failed to merge compaction output for region: {}, inputs: [{}]",
545                            region_id,
546                            inputs.iter().map(|f| f.file_id.to_string()).join(",")
547                        );
548                    }
549                    Ok(Err(e)) => {
550                        warn!(
551                            "Region {} compaction task join error for inputs: [{}], skipping: {}",
552                            region_id,
553                            inputs.iter().map(|f| f.file_id.to_string()).join(","),
554                            e
555                        );
556                        // If the cancel handle is cancelled,
557                        // cancel the remaining tasks before returns the error.
558                        if self.cancel_handle.is_cancelled() {
559                            abort_handle.abort();
560                            for (_, handle) in spawned {
561                                handle.abort();
562                            }
563                        }
564                        return Err(e).context(error::JoinSnafu);
565                    }
566                    Err(_) => {
567                        debug!(
568                            "Compaction merge cancelled for region: {}, aborting remaining {} spawned tasks",
569                            region_id,
570                            spawned.len(),
571                        );
572                        abort_handle.abort();
573                        for (_, handle) in spawned {
574                            handle.abort();
575                        }
576                        break;
577                    }
578                }
579            }
580
581            if self.cancel_handle.is_cancelled() {
582                info!("Compaction merge cancelled for region: {}", region_id);
583                break;
584            }
585        }
586
587        // Include expired SSTs in removals — these don't depend on merge success.
588        compacted_inputs.extend(
589            picker_output
590                .expired_ssts
591                .iter()
592                .map(|f| f.meta_ref().clone()),
593        );
594
595        Ok(MergeOutput {
596            files_to_add: output_files,
597            files_to_remove: compacted_inputs,
598            compaction_time_window: Some(compaction_time_window),
599        })
600    }
601
602    async fn update_manifest(
603        &self,
604        compaction_region: &CompactionRegion,
605        merge_output: MergeOutput,
606    ) -> Result<RegionEdit> {
607        // Write region edit to manifest.
608        let edit = RegionEdit {
609            files_to_add: merge_output.files_to_add,
610            files_to_remove: merge_output.files_to_remove,
611            // Use current timestamp as the edit timestamp.
612            timestamp_ms: Some(chrono::Utc::now().timestamp_millis()),
613            compaction_time_window: merge_output
614                .compaction_time_window
615                .map(|seconds| Duration::from_secs(seconds as u64)),
616            flushed_entry_id: None,
617            flushed_sequence: None,
618            committed_sequence: None,
619        };
620
621        let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
622        // TODO: We might leak files if we fail to update manifest. We can add a cleanup task to remove them later.
623        compaction_region
624            .manifest_ctx
625            .update_manifest(RegionLeaderState::Writable, action_list, false)
626            .await?;
627
628        Ok(edit)
629    }
630}
631
632#[cfg(test)]
633mod tests {
634    use std::sync::atomic::{AtomicUsize, Ordering};
635    use std::sync::{Arc, Mutex};
636    use std::time::Duration;
637
638    use store_api::storage::{FileId, RegionId};
639    use tokio::time::sleep;
640
641    use super::{DefaultCompactor, *};
642    use crate::cache::CacheManager;
643    use crate::compaction::picker::PickerOutput;
644    use crate::error::Result;
645    use crate::sst::file::FileHandle;
646    use crate::sst::file_purger::NoopFilePurger;
647    use crate::sst::version::SstVersion;
648    use crate::test_util::memtable_util::metadata_for_test;
649    use crate::test_util::scheduler_util::SchedulerEnv;
650
651    fn dummy_file_meta() -> FileMeta {
652        FileMeta {
653            region_id: RegionId::new(1, 1),
654            file_id: FileId::random(),
655            file_size: 100,
656            ..Default::default()
657        }
658    }
659
660    fn new_file_handle(meta: FileMeta) -> FileHandle {
661        FileHandle::new(meta, Arc::new(NoopFilePurger))
662    }
663
664    /// Build a minimal [`CompactionRegion`] suitable for tests where the
665    /// [`SstMerger`] is mocked and never touches the access layer.
666    async fn new_test_compaction_region() -> CompactionRegion {
667        let env = SchedulerEnv::new().await;
668        let metadata = metadata_for_test();
669        let manifest_ctx = env.mock_manifest_context(metadata.clone()).await;
670        CompactionRegion {
671            region_id: RegionId::new(1, 1),
672            region_options: RegionOptions::default(),
673            engine_config: Arc::new(MitoConfig::default()),
674            region_metadata: metadata.clone(),
675            cache_manager: Arc::new(CacheManager::default()),
676            access_layer: env.access_layer.clone(),
677            manifest_ctx,
678            current_version: CompactionVersion {
679                metadata,
680                options: RegionOptions::default(),
681                ssts: Arc::new(SstVersion::new()),
682                compaction_time_window: None,
683            },
684            file_purger: None,
685            ttl: None,
686            max_parallelism: 1,
687        }
688    }
689
690    /// An [`SstMerger`] that returns pre-configured results per call index.
691    ///
692    /// Call 0 gets `results[0]`, call 1 gets `results[1]`, etc.
693    #[derive(Clone)]
694    struct MockMerger {
695        results: Arc<Mutex<Vec<Result<Vec<FileMeta>>>>>,
696        call_idx: Arc<AtomicUsize>,
697    }
698
699    impl MockMerger {
700        fn new(results: Vec<Result<Vec<FileMeta>>>) -> Self {
701            Self {
702                results: Arc::new(Mutex::new(results)),
703                call_idx: Arc::new(AtomicUsize::new(0)),
704            }
705        }
706    }
707
708    #[async_trait::async_trait]
709    impl SstMerger for MockMerger {
710        async fn merge_single_output(
711            &self,
712            _compaction_region: CompactionRegion,
713            _output: CompactionOutput,
714            _write_opts: WriteOptions,
715        ) -> Result<Vec<FileMeta>> {
716            let idx = self.call_idx.fetch_add(1, Ordering::SeqCst);
717            match self.results.lock().unwrap().get(idx) {
718                Some(Ok(files)) => Ok(files.clone()),
719                Some(Err(_)) => error::InvalidMetaSnafu {
720                    reason: format!("simulated failure at index {idx}"),
721                }
722                .fail(),
723                None => panic!("MockMerger: no result configured for call index {idx}"),
724            }
725        }
726    }
727
728    #[tokio::test]
729    async fn test_partial_merge_failure_collects_only_successful_outputs() {
730        common_telemetry::init_default_ut_logging();
731
732        let compaction_region = new_test_compaction_region().await;
733
734        // Prepare 3 compaction outputs: output 0 and 2 succeed, output 1 fails.
735        let input_meta_0 = dummy_file_meta();
736        let input_meta_1 = dummy_file_meta();
737        let input_meta_2 = dummy_file_meta();
738
739        let output_meta_0 = vec![dummy_file_meta()];
740        let output_meta_2 = vec![dummy_file_meta(), dummy_file_meta()];
741
742        let merger = MockMerger::new(vec![
743            Ok(output_meta_0.clone()),
744            Err(error::InvalidMetaSnafu {
745                reason: "boom".to_string(),
746            }
747            .build()),
748            Ok(output_meta_2.clone()),
749        ]);
750        let compactor = DefaultCompactor::with_merger(merger);
751
752        let picker_output = PickerOutput {
753            outputs: vec![
754                CompactionOutput {
755                    output_level: 1,
756                    inputs: vec![new_file_handle(input_meta_0.clone())],
757                    filter_deleted: false,
758                    output_time_range: None,
759                },
760                CompactionOutput {
761                    output_level: 1,
762                    inputs: vec![new_file_handle(input_meta_1.clone())],
763                    filter_deleted: false,
764                    output_time_range: None,
765                },
766                CompactionOutput {
767                    output_level: 1,
768                    inputs: vec![new_file_handle(input_meta_2.clone())],
769                    filter_deleted: false,
770                    output_time_range: None,
771                },
772            ],
773            expired_ssts: vec![],
774            time_window_size: 3600,
775            max_file_size: None,
776        };
777
778        let merge_output = compactor
779            .merge_ssts(&compaction_region, picker_output)
780            .await
781            .unwrap();
782
783        // Outputs 0 and 2 succeeded (1 + 2 = 3 files added).
784        assert_eq!(merge_output.files_to_add.len(), 3);
785        // Only inputs from successful merges should be removed.
786        assert_eq!(merge_output.files_to_remove.len(), 2);
787
788        let removed_ids: Vec<_> = merge_output
789            .files_to_remove
790            .iter()
791            .map(|f| f.file_id)
792            .collect();
793        assert!(removed_ids.contains(&input_meta_0.file_id));
794        assert!(removed_ids.contains(&input_meta_2.file_id));
795        // The failed output's input must NOT be removed.
796        assert!(!removed_ids.contains(&input_meta_1.file_id));
797    }
798
799    #[tokio::test]
800    async fn test_all_outputs_succeed() {
801        common_telemetry::init_default_ut_logging();
802
803        let compaction_region = new_test_compaction_region().await;
804        let input_meta = dummy_file_meta();
805        let output_meta = vec![dummy_file_meta()];
806
807        let merger = MockMerger::new(vec![Ok(output_meta.clone())]);
808        let compactor = DefaultCompactor::with_merger(merger);
809
810        let picker_output = PickerOutput {
811            outputs: vec![CompactionOutput {
812                output_level: 1,
813                inputs: vec![new_file_handle(input_meta.clone())],
814                filter_deleted: false,
815                output_time_range: None,
816            }],
817            expired_ssts: vec![],
818            time_window_size: 3600,
819            max_file_size: None,
820        };
821
822        let merge_output = compactor
823            .merge_ssts(&compaction_region, picker_output)
824            .await
825            .unwrap();
826
827        assert_eq!(merge_output.files_to_add.len(), 1);
828        assert_eq!(merge_output.files_to_add[0].file_id, output_meta[0].file_id);
829        assert_eq!(merge_output.files_to_remove.len(), 1);
830        assert_eq!(merge_output.files_to_remove[0].file_id, input_meta.file_id);
831    }
832
833    #[tokio::test]
834    async fn test_expired_ssts_always_removed() {
835        common_telemetry::init_default_ut_logging();
836
837        let compaction_region = new_test_compaction_region().await;
838        let input_meta = dummy_file_meta();
839        let expired_meta = dummy_file_meta();
840
841        // The single merge output fails, but expired SSTs should still be removed.
842        let merger = MockMerger::new(vec![Err(error::InvalidMetaSnafu {
843            reason: "fail".to_string(),
844        }
845        .build())]);
846        let compactor = DefaultCompactor::with_merger(merger);
847
848        let picker_output = PickerOutput {
849            outputs: vec![CompactionOutput {
850                output_level: 1,
851                inputs: vec![new_file_handle(input_meta.clone())],
852                filter_deleted: false,
853                output_time_range: None,
854            }],
855            expired_ssts: vec![new_file_handle(expired_meta.clone())],
856            time_window_size: 3600,
857            max_file_size: None,
858        };
859
860        let merge_output = compactor
861            .merge_ssts(&compaction_region, picker_output)
862            .await
863            .unwrap();
864
865        // No files added (merge failed).
866        assert!(merge_output.files_to_add.is_empty());
867        // Only the expired SST should be in files_to_remove (not the failed merge's input).
868        assert_eq!(merge_output.files_to_remove.len(), 1);
869        assert_eq!(
870            merge_output.files_to_remove[0].file_id,
871            expired_meta.file_id
872        );
873    }
874
875    #[derive(Clone)]
876    struct BlockingMerger {
877        call_idx: Arc<AtomicUsize>,
878    }
879
880    #[async_trait::async_trait]
881    impl SstMerger for BlockingMerger {
882        async fn merge_single_output(
883            &self,
884            _compaction_region: CompactionRegion,
885            _output: CompactionOutput,
886            _write_opts: WriteOptions,
887        ) -> Result<Vec<FileMeta>> {
888            self.call_idx.fetch_add(1, Ordering::SeqCst);
889            std::future::pending().await
890        }
891    }
892
893    #[tokio::test(flavor = "multi_thread")]
894    async fn test_merge_ssts_cancels_spawned_tasks() {
895        common_telemetry::init_default_ut_logging();
896
897        let mut compaction_region = new_test_compaction_region().await;
898        compaction_region.max_parallelism = 2;
899
900        let cancel_handle = Arc::new(CancellationHandle::default());
901        let call_idx = Arc::new(AtomicUsize::new(0));
902        let compactor = DefaultCompactor {
903            merger: BlockingMerger {
904                call_idx: call_idx.clone(),
905            },
906            cancel_handle: cancel_handle.clone(),
907        };
908
909        let picker_output = PickerOutput {
910            outputs: vec![
911                CompactionOutput {
912                    output_level: 1,
913                    inputs: vec![new_file_handle(dummy_file_meta())],
914                    filter_deleted: false,
915                    output_time_range: None,
916                },
917                CompactionOutput {
918                    output_level: 1,
919                    inputs: vec![new_file_handle(dummy_file_meta())],
920                    filter_deleted: false,
921                    output_time_range: None,
922                },
923                CompactionOutput {
924                    output_level: 1,
925                    inputs: vec![new_file_handle(dummy_file_meta())],
926                    filter_deleted: false,
927                    output_time_range: None,
928                },
929            ],
930            expired_ssts: vec![],
931            time_window_size: 3600,
932            max_file_size: None,
933        };
934
935        let task = tokio::spawn(async move {
936            compactor
937                .merge_ssts(&compaction_region, picker_output)
938                .await
939        });
940
941        sleep(Duration::from_millis(100)).await;
942        cancel_handle.cancel();
943
944        let merge_output = task
945            .await
946            .expect("merge_ssts should stop after cancellation")
947            .unwrap();
948
949        let started = call_idx.load(Ordering::SeqCst);
950
951        assert!(merge_output.files_to_add.is_empty());
952        assert!(merge_output.files_to_remove.is_empty());
953        assert_eq!(started, 2);
954    }
955}