mito2/
engine.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Mito region engine.
16
17#[cfg(test)]
18mod alter_test;
19#[cfg(test)]
20mod append_mode_test;
21#[cfg(test)]
22mod basic_test;
23#[cfg(test)]
24mod batch_catchup_test;
25#[cfg(test)]
26mod batch_open_test;
27#[cfg(test)]
28mod bump_committed_sequence_test;
29#[cfg(test)]
30mod catchup_test;
31#[cfg(test)]
32mod close_test;
33#[cfg(test)]
34pub(crate) mod compaction_test;
35#[cfg(test)]
36mod create_test;
37#[cfg(test)]
38mod drop_test;
39#[cfg(test)]
40mod edit_region_test;
41#[cfg(test)]
42mod filter_deleted_test;
43#[cfg(test)]
44mod flush_test;
45#[cfg(test)]
46mod index_build_test;
47#[cfg(any(test, feature = "test"))]
48pub mod listener;
49#[cfg(test)]
50mod merge_mode_test;
51#[cfg(test)]
52mod open_test;
53#[cfg(test)]
54mod parallel_test;
55#[cfg(test)]
56mod projection_test;
57#[cfg(test)]
58mod prune_test;
59#[cfg(test)]
60mod row_selector_test;
61#[cfg(test)]
62mod scan_corrupt;
63#[cfg(test)]
64mod scan_test;
65#[cfg(test)]
66mod set_role_state_test;
67#[cfg(test)]
68mod skip_wal_test;
69#[cfg(test)]
70mod staging_test;
71#[cfg(test)]
72mod sync_test;
73#[cfg(test)]
74mod truncate_test;
75
76#[cfg(test)]
77mod copy_region_from_test;
78#[cfg(test)]
79mod remap_manifests_test;
80
81#[cfg(test)]
82mod apply_staging_manifest_test;
83#[cfg(test)]
84mod partition_filter_test;
85mod puffin_index;
86
87use std::any::Any;
88use std::collections::{HashMap, HashSet};
89use std::sync::Arc;
90use std::time::Instant;
91
92use api::region::RegionResponse;
93use async_trait::async_trait;
94use common_base::Plugins;
95use common_error::ext::BoxedError;
96use common_meta::error::UnexpectedSnafu;
97use common_meta::key::SchemaMetadataManagerRef;
98use common_recordbatch::{QueryMemoryTracker, SendableRecordBatchStream};
99use common_stat::get_total_memory_bytes;
100use common_telemetry::{info, tracing, warn};
101use common_wal::options::WalOptions;
102use futures::future::{join_all, try_join_all};
103use futures::stream::{self, Stream, StreamExt};
104use object_store::manager::ObjectStoreManagerRef;
105use snafu::{OptionExt, ResultExt, ensure};
106use store_api::ManifestVersion;
107use store_api::codec::PrimaryKeyEncoding;
108use store_api::logstore::LogStore;
109use store_api::logstore::provider::{KafkaProvider, Provider};
110use store_api::metadata::{ColumnMetadata, RegionMetadataRef};
111use store_api::metric_engine_consts::{
112    MANIFEST_INFO_EXTENSION_KEY, TABLE_COLUMN_METADATA_EXTENSION_KEY,
113};
114use store_api::region_engine::{
115    BatchResponses, MitoCopyRegionFromRequest, MitoCopyRegionFromResponse, RegionEngine,
116    RegionManifestInfo, RegionRole, RegionScannerRef, RegionStatistic, RemapManifestsRequest,
117    RemapManifestsResponse, SetRegionRoleStateResponse, SettableRegionRoleState,
118    SyncRegionFromRequest, SyncRegionFromResponse,
119};
120use store_api::region_request::{
121    AffectedRows, RegionCatchupRequest, RegionOpenRequest, RegionRequest,
122};
123use store_api::sst_entry::{ManifestSstEntry, PuffinIndexMetaEntry, StorageSstEntry};
124use store_api::storage::{FileId, FileRefsManifest, RegionId, ScanRequest, SequenceNumber};
125use tokio::sync::{Semaphore, oneshot};
126
127use crate::access_layer::RegionFilePathFactory;
128use crate::cache::{CacheManagerRef, CacheStrategy};
129use crate::config::MitoConfig;
130use crate::engine::puffin_index::{IndexEntryContext, collect_index_entries_from_puffin};
131use crate::error::{
132    InvalidRequestSnafu, JoinSnafu, MitoManifestInfoSnafu, RecvSnafu, RegionNotFoundSnafu, Result,
133    SerdeJsonSnafu, SerializeColumnMetadataSnafu,
134};
135#[cfg(feature = "enterprise")]
136use crate::extension::BoxedExtensionRangeProviderFactory;
137use crate::gc::GcLimiterRef;
138use crate::manifest::action::RegionEdit;
139use crate::memtable::MemtableStats;
140use crate::metrics::{
141    HANDLE_REQUEST_ELAPSED, SCAN_MEMORY_USAGE_BYTES, SCAN_REQUESTS_REJECTED_TOTAL,
142};
143use crate::read::scan_region::{ScanRegion, Scanner};
144use crate::read::stream::ScanBatchStream;
145use crate::region::MitoRegionRef;
146use crate::region::opener::PartitionExprFetcherRef;
147use crate::region::options::parse_wal_options;
148use crate::request::{RegionEditRequest, WorkerRequest};
149use crate::sst::file::{FileMeta, RegionFileId, RegionIndexId};
150use crate::sst::file_ref::FileReferenceManagerRef;
151use crate::sst::index::intermediate::IntermediateManager;
152use crate::sst::index::puffin_manager::PuffinManagerFactory;
153use crate::wal::entry_distributor::{
154    DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE, build_wal_entry_distributor_and_receivers,
155};
156use crate::wal::raw_entry_reader::{LogStoreRawEntryReader, RawEntryReader};
157use crate::worker::WorkerGroup;
158
159pub const MITO_ENGINE_NAME: &str = "mito";
160
161pub struct MitoEngineBuilder<'a, S: LogStore> {
162    data_home: &'a str,
163    config: MitoConfig,
164    log_store: Arc<S>,
165    object_store_manager: ObjectStoreManagerRef,
166    schema_metadata_manager: SchemaMetadataManagerRef,
167    file_ref_manager: FileReferenceManagerRef,
168    partition_expr_fetcher: PartitionExprFetcherRef,
169    plugins: Plugins,
170    #[cfg(feature = "enterprise")]
171    extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
172}
173
174impl<'a, S: LogStore> MitoEngineBuilder<'a, S> {
175    #[allow(clippy::too_many_arguments)]
176    pub fn new(
177        data_home: &'a str,
178        config: MitoConfig,
179        log_store: Arc<S>,
180        object_store_manager: ObjectStoreManagerRef,
181        schema_metadata_manager: SchemaMetadataManagerRef,
182        file_ref_manager: FileReferenceManagerRef,
183        partition_expr_fetcher: PartitionExprFetcherRef,
184        plugins: Plugins,
185    ) -> Self {
186        Self {
187            data_home,
188            config,
189            log_store,
190            object_store_manager,
191            schema_metadata_manager,
192            file_ref_manager,
193            plugins,
194            partition_expr_fetcher,
195            #[cfg(feature = "enterprise")]
196            extension_range_provider_factory: None,
197        }
198    }
199
200    #[cfg(feature = "enterprise")]
201    #[must_use]
202    pub fn with_extension_range_provider_factory(
203        self,
204        extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
205    ) -> Self {
206        Self {
207            extension_range_provider_factory,
208            ..self
209        }
210    }
211
212    pub async fn try_build(mut self) -> Result<MitoEngine> {
213        self.config.sanitize(self.data_home)?;
214
215        let config = Arc::new(self.config);
216        let workers = WorkerGroup::start(
217            config.clone(),
218            self.log_store.clone(),
219            self.object_store_manager,
220            self.schema_metadata_manager,
221            self.file_ref_manager,
222            self.partition_expr_fetcher.clone(),
223            self.plugins,
224        )
225        .await?;
226        let wal_raw_entry_reader = Arc::new(LogStoreRawEntryReader::new(self.log_store));
227        let total_memory = get_total_memory_bytes().max(0) as u64;
228        let scan_memory_limit = config.scan_memory_limit.resolve(total_memory) as usize;
229        let scan_memory_tracker =
230            QueryMemoryTracker::builder(scan_memory_limit, config.scan_memory_on_exhausted)
231                .on_update(|usage| {
232                    SCAN_MEMORY_USAGE_BYTES.set(usage as i64);
233                })
234                .on_reject(|| {
235                    SCAN_REQUESTS_REJECTED_TOTAL.inc();
236                })
237                .build();
238
239        let inner = EngineInner {
240            workers,
241            config,
242            wal_raw_entry_reader,
243            scan_memory_tracker,
244            #[cfg(feature = "enterprise")]
245            extension_range_provider_factory: None,
246        };
247
248        #[cfg(feature = "enterprise")]
249        let inner =
250            inner.with_extension_range_provider_factory(self.extension_range_provider_factory);
251
252        Ok(MitoEngine {
253            inner: Arc::new(inner),
254        })
255    }
256}
257
258/// Region engine implementation for timeseries data.
259#[derive(Clone)]
260pub struct MitoEngine {
261    inner: Arc<EngineInner>,
262}
263
264impl MitoEngine {
265    /// Returns a new [MitoEngine] with specific `config`, `log_store` and `object_store`.
266    #[allow(clippy::too_many_arguments)]
267    pub async fn new<S: LogStore>(
268        data_home: &str,
269        config: MitoConfig,
270        log_store: Arc<S>,
271        object_store_manager: ObjectStoreManagerRef,
272        schema_metadata_manager: SchemaMetadataManagerRef,
273        file_ref_manager: FileReferenceManagerRef,
274        partition_expr_fetcher: PartitionExprFetcherRef,
275        plugins: Plugins,
276    ) -> Result<MitoEngine> {
277        let builder = MitoEngineBuilder::new(
278            data_home,
279            config,
280            log_store,
281            object_store_manager,
282            schema_metadata_manager,
283            file_ref_manager,
284            partition_expr_fetcher,
285            plugins,
286        );
287        builder.try_build().await
288    }
289
290    pub fn mito_config(&self) -> &MitoConfig {
291        &self.inner.config
292    }
293
294    pub fn cache_manager(&self) -> CacheManagerRef {
295        self.inner.workers.cache_manager()
296    }
297
298    pub fn file_ref_manager(&self) -> FileReferenceManagerRef {
299        self.inner.workers.file_ref_manager()
300    }
301
302    pub fn gc_limiter(&self) -> GcLimiterRef {
303        self.inner.workers.gc_limiter()
304    }
305
306    pub fn object_store_manager(&self) -> &ObjectStoreManagerRef {
307        self.inner.workers.object_store_manager()
308    }
309
310    pub fn puffin_manager_factory(&self) -> &PuffinManagerFactory {
311        self.inner.workers.puffin_manager_factory()
312    }
313
314    pub fn intermediate_manager(&self) -> &IntermediateManager {
315        self.inner.workers.intermediate_manager()
316    }
317
318    pub fn schema_metadata_manager(&self) -> &SchemaMetadataManagerRef {
319        self.inner.workers.schema_metadata_manager()
320    }
321
322    /// Get all tmp ref files for given region ids, excluding files that's already in manifest.
323    pub async fn get_snapshot_of_file_refs(
324        &self,
325        file_handle_regions: impl IntoIterator<Item = RegionId>,
326        related_regions: HashMap<RegionId, HashSet<RegionId>>,
327    ) -> Result<FileRefsManifest> {
328        let file_ref_mgr = self.file_ref_manager();
329
330        let file_handle_regions = file_handle_regions.into_iter().collect::<Vec<_>>();
331        // Convert region IDs to MitoRegionRef objects, ignore regions that do not exist on current datanode
332        // as regions on other datanodes are not managed by this engine.
333        let query_regions: Vec<MitoRegionRef> = file_handle_regions
334            .into_iter()
335            .filter_map(|region_id| self.find_region(region_id))
336            .collect();
337
338        let dst_region_to_src_regions: Vec<(MitoRegionRef, HashSet<RegionId>)> = {
339            let dst2src = related_regions
340                .into_iter()
341                .flat_map(|(src, dsts)| dsts.into_iter().map(move |dst| (dst, src)))
342                .fold(
343                    HashMap::<RegionId, HashSet<RegionId>>::new(),
344                    |mut acc, (k, v)| {
345                        let entry = acc.entry(k).or_default();
346                        entry.insert(v);
347                        acc
348                    },
349                );
350            let mut dst_region_to_src_regions = Vec::with_capacity(dst2src.len());
351            for (dst_region, srcs) in dst2src {
352                let Some(dst_region) = self.find_region(dst_region) else {
353                    continue;
354                };
355                dst_region_to_src_regions.push((dst_region, srcs));
356            }
357            dst_region_to_src_regions
358        };
359
360        file_ref_mgr
361            .get_snapshot_of_file_refs(query_regions, dst_region_to_src_regions)
362            .await
363    }
364
365    /// Returns true if the specific region exists.
366    pub fn is_region_exists(&self, region_id: RegionId) -> bool {
367        self.inner.workers.is_region_exists(region_id)
368    }
369
370    /// Returns true if the specific region exists.
371    pub fn is_region_opening(&self, region_id: RegionId) -> bool {
372        self.inner.workers.is_region_opening(region_id)
373    }
374
375    /// Returns true if the specific region is catching up.
376    pub fn is_region_catching_up(&self, region_id: RegionId) -> bool {
377        self.inner.workers.is_region_catching_up(region_id)
378    }
379
380    /// Returns the region disk/memory statistic.
381    pub fn get_region_statistic(&self, region_id: RegionId) -> Option<RegionStatistic> {
382        self.find_region(region_id)
383            .map(|region| region.region_statistic())
384    }
385
386    /// Returns primary key encoding of the region.
387    pub fn get_primary_key_encoding(&self, region_id: RegionId) -> Option<PrimaryKeyEncoding> {
388        self.find_region(region_id)
389            .map(|r| r.primary_key_encoding())
390    }
391
392    /// Handle substrait query and return a stream of record batches
393    ///
394    /// Notice that the output stream's ordering is not guranateed. If order
395    /// matter, please use [`scanner`] to build a [`Scanner`] to consume.
396    #[tracing::instrument(skip_all)]
397    pub async fn scan_to_stream(
398        &self,
399        region_id: RegionId,
400        request: ScanRequest,
401    ) -> Result<SendableRecordBatchStream, BoxedError> {
402        self.scanner(region_id, request)
403            .await
404            .map_err(BoxedError::new)?
405            .scan()
406            .await
407    }
408
409    /// Scan [`Batch`]es by [`ScanRequest`].
410    pub async fn scan_batch(
411        &self,
412        region_id: RegionId,
413        request: ScanRequest,
414        filter_deleted: bool,
415    ) -> Result<ScanBatchStream> {
416        let mut scan_region = self.scan_region(region_id, request)?;
417        scan_region.set_filter_deleted(filter_deleted);
418        scan_region.scanner().await?.scan_batch()
419    }
420
421    /// Returns a scanner to scan for `request`.
422    pub(crate) async fn scanner(
423        &self,
424        region_id: RegionId,
425        request: ScanRequest,
426    ) -> Result<Scanner> {
427        self.scan_region(region_id, request)?.scanner().await
428    }
429
430    /// Scans a region.
431    #[tracing::instrument(skip_all, fields(region_id = %region_id))]
432    fn scan_region(&self, region_id: RegionId, request: ScanRequest) -> Result<ScanRegion> {
433        self.inner.scan_region(region_id, request)
434    }
435
436    /// Edit region's metadata by [RegionEdit] directly. Use with care.
437    /// Now we only allow adding files or removing files from region (the [RegionEdit] struct can only contain a non-empty "files_to_add" or "files_to_remove" field).
438    /// Other region editing intention will result in an "invalid request" error.
439    /// Also note that if a region is to be edited directly, we MUST not write data to it thereafter.
440    pub async fn edit_region(&self, region_id: RegionId, edit: RegionEdit) -> Result<()> {
441        let _timer = HANDLE_REQUEST_ELAPSED
442            .with_label_values(&["edit_region"])
443            .start_timer();
444
445        ensure!(
446            is_valid_region_edit(&edit),
447            InvalidRequestSnafu {
448                region_id,
449                reason: "invalid region edit"
450            }
451        );
452
453        let (tx, rx) = oneshot::channel();
454        let request = WorkerRequest::EditRegion(RegionEditRequest {
455            region_id,
456            edit,
457            tx,
458        });
459        self.inner
460            .workers
461            .submit_to_worker(region_id, request)
462            .await?;
463        rx.await.context(RecvSnafu)?
464    }
465
466    /// Handles copy region from request.
467    ///
468    /// This method is only supported for internal use and is not exposed in the trait implementation.
469    pub async fn copy_region_from(
470        &self,
471        region_id: RegionId,
472        request: MitoCopyRegionFromRequest,
473    ) -> Result<MitoCopyRegionFromResponse> {
474        self.inner.copy_region_from(region_id, request).await
475    }
476
477    #[cfg(test)]
478    pub(crate) fn get_region(&self, id: RegionId) -> Option<crate::region::MitoRegionRef> {
479        self.find_region(id)
480    }
481
482    pub fn find_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
483        self.inner.workers.get_region(region_id)
484    }
485
486    /// Returns all regions.
487    pub fn regions(&self) -> Vec<MitoRegionRef> {
488        self.inner.workers.all_regions().collect()
489    }
490
491    fn encode_manifest_info_to_extensions(
492        region_id: &RegionId,
493        manifest_info: RegionManifestInfo,
494        extensions: &mut HashMap<String, Vec<u8>>,
495    ) -> Result<()> {
496        let region_manifest_info = vec![(*region_id, manifest_info)];
497
498        extensions.insert(
499            MANIFEST_INFO_EXTENSION_KEY.to_string(),
500            RegionManifestInfo::encode_list(&region_manifest_info).context(SerdeJsonSnafu)?,
501        );
502        info!(
503            "Added manifest info: {:?} to extensions, region_id: {:?}",
504            region_manifest_info, region_id
505        );
506        Ok(())
507    }
508
509    fn encode_column_metadatas_to_extensions(
510        region_id: &RegionId,
511        column_metadatas: Vec<ColumnMetadata>,
512        extensions: &mut HashMap<String, Vec<u8>>,
513    ) -> Result<()> {
514        extensions.insert(
515            TABLE_COLUMN_METADATA_EXTENSION_KEY.to_string(),
516            ColumnMetadata::encode_list(&column_metadatas).context(SerializeColumnMetadataSnafu)?,
517        );
518        info!(
519            "Added column metadatas: {:?} to extensions, region_id: {:?}",
520            column_metadatas, region_id
521        );
522        Ok(())
523    }
524
525    /// Find the current version's memtables and SSTs stats by region_id.
526    /// The stats must be collected in one place one time to ensure data consistency.
527    pub fn find_memtable_and_sst_stats(
528        &self,
529        region_id: RegionId,
530    ) -> Result<(Vec<MemtableStats>, Vec<FileMeta>)> {
531        let region = self
532            .find_region(region_id)
533            .context(RegionNotFoundSnafu { region_id })?;
534
535        let version = region.version();
536        let memtable_stats = version
537            .memtables
538            .list_memtables()
539            .iter()
540            .map(|x| x.stats())
541            .collect::<Vec<_>>();
542
543        let sst_stats = version
544            .ssts
545            .levels()
546            .iter()
547            .flat_map(|level| level.files().map(|x| x.meta_ref()))
548            .cloned()
549            .collect::<Vec<_>>();
550        Ok((memtable_stats, sst_stats))
551    }
552
553    /// Lists all SSTs from the manifest of all regions in the engine.
554    pub async fn all_ssts_from_manifest(&self) -> Vec<ManifestSstEntry> {
555        let node_id = self.inner.workers.file_ref_manager().node_id();
556        let regions = self.inner.workers.all_regions();
557
558        let mut results = Vec::new();
559        for region in regions {
560            let mut entries = region.manifest_sst_entries().await;
561            for e in &mut entries {
562                e.node_id = node_id;
563            }
564            results.extend(entries);
565        }
566
567        results
568    }
569
570    /// Lists metadata about all puffin index targets stored in the engine.
571    pub async fn all_index_metas(&self) -> Vec<PuffinIndexMetaEntry> {
572        let node_id = self.inner.workers.file_ref_manager().node_id();
573        let cache_manager = self.inner.workers.cache_manager();
574        let puffin_metadata_cache = cache_manager.puffin_metadata_cache().cloned();
575        let bloom_filter_cache = cache_manager.bloom_filter_index_cache().cloned();
576        let inverted_index_cache = cache_manager.inverted_index_cache().cloned();
577
578        let mut results = Vec::new();
579
580        for region in self.inner.workers.all_regions() {
581            let manifest_entries = region.manifest_sst_entries().await;
582            let access_layer = region.access_layer.clone();
583            let table_dir = access_layer.table_dir().to_string();
584            let path_type = access_layer.path_type();
585            let object_store = access_layer.object_store().clone();
586            let puffin_factory = access_layer.puffin_manager_factory().clone();
587            let path_factory = RegionFilePathFactory::new(table_dir, path_type);
588
589            let entry_futures = manifest_entries.into_iter().map(|entry| {
590                let object_store = object_store.clone();
591                let path_factory = path_factory.clone();
592                let puffin_factory = puffin_factory.clone();
593                let puffin_metadata_cache = puffin_metadata_cache.clone();
594                let bloom_filter_cache = bloom_filter_cache.clone();
595                let inverted_index_cache = inverted_index_cache.clone();
596
597                async move {
598                    let Some(index_file_path) = entry.index_file_path.as_ref() else {
599                        return Vec::new();
600                    };
601
602                    let index_version = entry.index_version;
603                    let file_id = match FileId::parse_str(&entry.file_id) {
604                        Ok(file_id) => file_id,
605                        Err(err) => {
606                            warn!(
607                                err;
608                                "Failed to parse puffin index file id, table_dir: {}, file_id: {}",
609                                entry.table_dir,
610                                entry.file_id
611                            );
612                            return Vec::new();
613                        }
614                    };
615                    let region_index_id = RegionIndexId::new(
616                        RegionFileId::new(entry.region_id, file_id),
617                        index_version,
618                    );
619                    let context = IndexEntryContext {
620                        table_dir: &entry.table_dir,
621                        index_file_path: index_file_path.as_str(),
622                        region_id: entry.region_id,
623                        table_id: entry.table_id,
624                        region_number: entry.region_number,
625                        region_group: entry.region_group,
626                        region_sequence: entry.region_sequence,
627                        file_id: &entry.file_id,
628                        index_file_size: entry.index_file_size,
629                        node_id,
630                    };
631
632                    let manager = puffin_factory
633                        .build(object_store, path_factory)
634                        .with_puffin_metadata_cache(puffin_metadata_cache);
635
636                    collect_index_entries_from_puffin(
637                        manager,
638                        region_index_id,
639                        context,
640                        bloom_filter_cache,
641                        inverted_index_cache,
642                    )
643                    .await
644                }
645            });
646
647            let mut meta_stream = stream::iter(entry_futures).buffer_unordered(8); // Parallelism is 8.
648            while let Some(mut metas) = meta_stream.next().await {
649                results.append(&mut metas);
650            }
651        }
652
653        results
654    }
655
656    /// Lists all SSTs from the storage layer of all regions in the engine.
657    pub fn all_ssts_from_storage(&self) -> impl Stream<Item = Result<StorageSstEntry>> {
658        let node_id = self.inner.workers.file_ref_manager().node_id();
659        let regions = self.inner.workers.all_regions();
660
661        let mut layers_distinct_table_dirs = HashMap::new();
662        for region in regions {
663            let table_dir = region.access_layer.table_dir();
664            if !layers_distinct_table_dirs.contains_key(table_dir) {
665                layers_distinct_table_dirs
666                    .insert(table_dir.to_string(), region.access_layer.clone());
667            }
668        }
669
670        stream::iter(layers_distinct_table_dirs)
671            .map(|(_, access_layer)| access_layer.storage_sst_entries())
672            .flatten()
673            .map(move |entry| {
674                entry.map(move |mut entry| {
675                    entry.node_id = node_id;
676                    entry
677                })
678            })
679    }
680}
681
682/// Check whether the region edit is valid.
683///
684/// Only adding or removing files to region is considered valid now.
685fn is_valid_region_edit(edit: &RegionEdit) -> bool {
686    (!edit.files_to_add.is_empty() || !edit.files_to_remove.is_empty())
687        && matches!(
688            edit,
689            RegionEdit {
690                files_to_add: _,
691                files_to_remove: _,
692                timestamp_ms: _,
693                compaction_time_window: None,
694                flushed_entry_id: None,
695                flushed_sequence: None,
696                ..
697            }
698        )
699}
700
701/// Inner struct of [MitoEngine].
702struct EngineInner {
703    /// Region workers group.
704    workers: WorkerGroup,
705    /// Config of the engine.
706    config: Arc<MitoConfig>,
707    /// The Wal raw entry reader.
708    wal_raw_entry_reader: Arc<dyn RawEntryReader>,
709    /// Memory tracker for table scans.
710    scan_memory_tracker: QueryMemoryTracker,
711    #[cfg(feature = "enterprise")]
712    extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
713}
714
715type TopicGroupedRegionOpenRequests = HashMap<String, Vec<(RegionId, RegionOpenRequest)>>;
716
717/// Returns requests([TopicGroupedRegionOpenRequests]) grouped by topic and remaining requests.
718fn prepare_batch_open_requests(
719    requests: Vec<(RegionId, RegionOpenRequest)>,
720) -> Result<(
721    TopicGroupedRegionOpenRequests,
722    Vec<(RegionId, RegionOpenRequest)>,
723)> {
724    let mut topic_to_regions: HashMap<String, Vec<(RegionId, RegionOpenRequest)>> = HashMap::new();
725    let mut remaining_regions: Vec<(RegionId, RegionOpenRequest)> = Vec::new();
726    for (region_id, request) in requests {
727        match parse_wal_options(&request.options).context(SerdeJsonSnafu)? {
728            WalOptions::Kafka(options) => {
729                topic_to_regions
730                    .entry(options.topic)
731                    .or_default()
732                    .push((region_id, request));
733            }
734            WalOptions::RaftEngine | WalOptions::Noop => {
735                remaining_regions.push((region_id, request));
736            }
737        }
738    }
739
740    Ok((topic_to_regions, remaining_regions))
741}
742
743impl EngineInner {
744    #[cfg(feature = "enterprise")]
745    #[must_use]
746    fn with_extension_range_provider_factory(
747        self,
748        extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
749    ) -> Self {
750        Self {
751            extension_range_provider_factory,
752            ..self
753        }
754    }
755
756    /// Stop the inner engine.
757    async fn stop(&self) -> Result<()> {
758        self.workers.stop().await
759    }
760
761    fn find_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
762        self.workers
763            .get_region(region_id)
764            .context(RegionNotFoundSnafu { region_id })
765    }
766
767    /// Get metadata of a region.
768    ///
769    /// Returns error if the region doesn't exist.
770    fn get_metadata(&self, region_id: RegionId) -> Result<RegionMetadataRef> {
771        // Reading a region doesn't need to go through the region worker thread.
772        let region = self.find_region(region_id)?;
773        Ok(region.metadata())
774    }
775
776    async fn open_topic_regions(
777        &self,
778        topic: String,
779        region_requests: Vec<(RegionId, RegionOpenRequest)>,
780    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
781        let now = Instant::now();
782        let region_ids = region_requests
783            .iter()
784            .map(|(region_id, _)| *region_id)
785            .collect::<Vec<_>>();
786        let provider = Provider::kafka_provider(topic);
787        let (distributor, entry_receivers) = build_wal_entry_distributor_and_receivers(
788            provider.clone(),
789            self.wal_raw_entry_reader.clone(),
790            &region_ids,
791            DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE,
792        );
793
794        let mut responses = Vec::with_capacity(region_requests.len());
795        for ((region_id, request), entry_receiver) in
796            region_requests.into_iter().zip(entry_receivers)
797        {
798            let (request, receiver) =
799                WorkerRequest::new_open_region_request(region_id, request, Some(entry_receiver));
800            self.workers.submit_to_worker(region_id, request).await?;
801            responses.push(async move { receiver.await.context(RecvSnafu)? });
802        }
803
804        // Waits for entries distribution.
805        let distribution =
806            common_runtime::spawn_global(async move { distributor.distribute().await });
807        // Waits for worker returns.
808        let responses = join_all(responses).await;
809        distribution.await.context(JoinSnafu)??;
810
811        let num_failure = responses.iter().filter(|r| r.is_err()).count();
812        info!(
813            "Opened {} regions for topic '{}', failures: {}, elapsed: {:?}",
814            region_ids.len() - num_failure,
815            // Safety: provider is kafka provider.
816            provider.as_kafka_provider().unwrap(),
817            num_failure,
818            now.elapsed(),
819        );
820        Ok(region_ids.into_iter().zip(responses).collect())
821    }
822
823    async fn handle_batch_open_requests(
824        &self,
825        parallelism: usize,
826        requests: Vec<(RegionId, RegionOpenRequest)>,
827    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
828        let semaphore = Arc::new(Semaphore::new(parallelism));
829        let (topic_to_region_requests, remaining_region_requests) =
830            prepare_batch_open_requests(requests)?;
831        let mut responses =
832            Vec::with_capacity(topic_to_region_requests.len() + remaining_region_requests.len());
833
834        if !topic_to_region_requests.is_empty() {
835            let mut tasks = Vec::with_capacity(topic_to_region_requests.len());
836            for (topic, region_requests) in topic_to_region_requests {
837                let semaphore_moved = semaphore.clone();
838                tasks.push(async move {
839                    // Safety: semaphore must exist
840                    let _permit = semaphore_moved.acquire().await.unwrap();
841                    self.open_topic_regions(topic, region_requests).await
842                })
843            }
844            let r = try_join_all(tasks).await?;
845            responses.extend(r.into_iter().flatten());
846        }
847
848        if !remaining_region_requests.is_empty() {
849            let mut tasks = Vec::with_capacity(remaining_region_requests.len());
850            let mut region_ids = Vec::with_capacity(remaining_region_requests.len());
851            for (region_id, request) in remaining_region_requests {
852                let semaphore_moved = semaphore.clone();
853                region_ids.push(region_id);
854                tasks.push(async move {
855                    // Safety: semaphore must exist
856                    let _permit = semaphore_moved.acquire().await.unwrap();
857                    let (request, receiver) =
858                        WorkerRequest::new_open_region_request(region_id, request, None);
859
860                    self.workers.submit_to_worker(region_id, request).await?;
861
862                    receiver.await.context(RecvSnafu)?
863                })
864            }
865
866            let results = join_all(tasks).await;
867            responses.extend(region_ids.into_iter().zip(results));
868        }
869
870        Ok(responses)
871    }
872
873    async fn catchup_topic_regions(
874        &self,
875        provider: Provider,
876        region_requests: Vec<(RegionId, RegionCatchupRequest)>,
877    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
878        let now = Instant::now();
879        let region_ids = region_requests
880            .iter()
881            .map(|(region_id, _)| *region_id)
882            .collect::<Vec<_>>();
883        let (distributor, entry_receivers) = build_wal_entry_distributor_and_receivers(
884            provider.clone(),
885            self.wal_raw_entry_reader.clone(),
886            &region_ids,
887            DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE,
888        );
889
890        let mut responses = Vec::with_capacity(region_requests.len());
891        for ((region_id, request), entry_receiver) in
892            region_requests.into_iter().zip(entry_receivers)
893        {
894            let (request, receiver) =
895                WorkerRequest::new_catchup_region_request(region_id, request, Some(entry_receiver));
896            self.workers.submit_to_worker(region_id, request).await?;
897            responses.push(async move { receiver.await.context(RecvSnafu)? });
898        }
899
900        // Wait for entries distribution.
901        let distribution =
902            common_runtime::spawn_global(async move { distributor.distribute().await });
903        // Wait for worker returns.
904        let responses = join_all(responses).await;
905        distribution.await.context(JoinSnafu)??;
906
907        let num_failure = responses.iter().filter(|r| r.is_err()).count();
908        info!(
909            "Caught up {} regions for topic '{}', failures: {}, elapsed: {:?}",
910            region_ids.len() - num_failure,
911            // Safety: provider is kafka provider.
912            provider.as_kafka_provider().unwrap(),
913            num_failure,
914            now.elapsed(),
915        );
916
917        Ok(region_ids.into_iter().zip(responses).collect())
918    }
919
920    async fn handle_batch_catchup_requests(
921        &self,
922        parallelism: usize,
923        requests: Vec<(RegionId, RegionCatchupRequest)>,
924    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
925        let mut responses = Vec::with_capacity(requests.len());
926        let mut topic_regions: HashMap<Arc<KafkaProvider>, Vec<_>> = HashMap::new();
927        let mut remaining_region_requests = vec![];
928
929        for (region_id, request) in requests {
930            match self.workers.get_region(region_id) {
931                Some(region) => match region.provider.as_kafka_provider() {
932                    Some(provider) => {
933                        topic_regions
934                            .entry(provider.clone())
935                            .or_default()
936                            .push((region_id, request));
937                    }
938                    None => {
939                        remaining_region_requests.push((region_id, request));
940                    }
941                },
942                None => responses.push((region_id, RegionNotFoundSnafu { region_id }.fail())),
943            }
944        }
945
946        let semaphore = Arc::new(Semaphore::new(parallelism));
947
948        if !topic_regions.is_empty() {
949            let mut tasks = Vec::with_capacity(topic_regions.len());
950            for (provider, region_requests) in topic_regions {
951                let semaphore_moved = semaphore.clone();
952                tasks.push(async move {
953                    // Safety: semaphore must exist
954                    let _permit = semaphore_moved.acquire().await.unwrap();
955                    self.catchup_topic_regions(Provider::Kafka(provider), region_requests)
956                        .await
957                })
958            }
959
960            let r = try_join_all(tasks).await?;
961            responses.extend(r.into_iter().flatten());
962        }
963
964        if !remaining_region_requests.is_empty() {
965            let mut tasks = Vec::with_capacity(remaining_region_requests.len());
966            let mut region_ids = Vec::with_capacity(remaining_region_requests.len());
967            for (region_id, request) in remaining_region_requests {
968                let semaphore_moved = semaphore.clone();
969                region_ids.push(region_id);
970                tasks.push(async move {
971                    // Safety: semaphore must exist
972                    let _permit = semaphore_moved.acquire().await.unwrap();
973                    let (request, receiver) =
974                        WorkerRequest::new_catchup_region_request(region_id, request, None);
975
976                    self.workers.submit_to_worker(region_id, request).await?;
977
978                    receiver.await.context(RecvSnafu)?
979                })
980            }
981
982            let results = join_all(tasks).await;
983            responses.extend(region_ids.into_iter().zip(results));
984        }
985
986        Ok(responses)
987    }
988
989    /// Handles [RegionRequest] and return its executed result.
990    async fn handle_request(
991        &self,
992        region_id: RegionId,
993        request: RegionRequest,
994    ) -> Result<AffectedRows> {
995        let region_metadata = self.get_metadata(region_id).ok();
996        let (request, receiver) =
997            WorkerRequest::try_from_region_request(region_id, request, region_metadata)?;
998        self.workers.submit_to_worker(region_id, request).await?;
999
1000        receiver.await.context(RecvSnafu)?
1001    }
1002
1003    /// Returns the sequence of latest committed data.
1004    fn get_committed_sequence(&self, region_id: RegionId) -> Result<SequenceNumber> {
1005        // Reading a region doesn't need to go through the region worker thread.
1006        self.find_region(region_id)
1007            .map(|r| r.find_committed_sequence())
1008    }
1009
1010    /// Handles the scan `request` and returns a [ScanRegion].
1011    #[tracing::instrument(skip_all, fields(region_id = %region_id))]
1012    fn scan_region(&self, region_id: RegionId, request: ScanRequest) -> Result<ScanRegion> {
1013        let query_start = Instant::now();
1014        // Reading a region doesn't need to go through the region worker thread.
1015        let region = self.find_region(region_id)?;
1016        let version = region.version();
1017        // Get cache.
1018        let cache_manager = self.workers.cache_manager();
1019
1020        let scan_region = ScanRegion::new(
1021            version,
1022            region.access_layer.clone(),
1023            request,
1024            CacheStrategy::EnableAll(cache_manager),
1025        )
1026        .with_parallel_scan_channel_size(self.config.parallel_scan_channel_size)
1027        .with_max_concurrent_scan_files(self.config.max_concurrent_scan_files)
1028        .with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
1029        .with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
1030        .with_ignore_bloom_filter(self.config.bloom_filter_index.apply_on_query.disabled())
1031        .with_start_time(query_start);
1032
1033        #[cfg(feature = "enterprise")]
1034        let scan_region = self.maybe_fill_extension_range_provider(scan_region, region);
1035
1036        Ok(scan_region)
1037    }
1038
1039    #[cfg(feature = "enterprise")]
1040    fn maybe_fill_extension_range_provider(
1041        &self,
1042        mut scan_region: ScanRegion,
1043        region: MitoRegionRef,
1044    ) -> ScanRegion {
1045        if region.is_follower()
1046            && let Some(factory) = self.extension_range_provider_factory.as_ref()
1047        {
1048            scan_region
1049                .set_extension_range_provider(factory.create_extension_range_provider(region));
1050        }
1051        scan_region
1052    }
1053
1054    /// Converts the [`RegionRole`].
1055    fn set_region_role(&self, region_id: RegionId, role: RegionRole) -> Result<()> {
1056        let region = self.find_region(region_id)?;
1057        region.set_role(role);
1058        Ok(())
1059    }
1060
1061    /// Sets read-only for a region and ensures no more writes in the region after it returns.
1062    async fn set_region_role_state_gracefully(
1063        &self,
1064        region_id: RegionId,
1065        region_role_state: SettableRegionRoleState,
1066    ) -> Result<SetRegionRoleStateResponse> {
1067        // Notes: It acquires the mutable ownership to ensure no other threads,
1068        // Therefore, we submit it to the worker.
1069        let (request, receiver) =
1070            WorkerRequest::new_set_readonly_gracefully(region_id, region_role_state);
1071        self.workers.submit_to_worker(region_id, request).await?;
1072
1073        receiver.await.context(RecvSnafu)
1074    }
1075
1076    async fn sync_region(
1077        &self,
1078        region_id: RegionId,
1079        manifest_info: RegionManifestInfo,
1080    ) -> Result<(ManifestVersion, bool)> {
1081        ensure!(manifest_info.is_mito(), MitoManifestInfoSnafu);
1082        let manifest_version = manifest_info.data_manifest_version();
1083        let (request, receiver) =
1084            WorkerRequest::new_sync_region_request(region_id, manifest_version);
1085        self.workers.submit_to_worker(region_id, request).await?;
1086
1087        receiver.await.context(RecvSnafu)?
1088    }
1089
1090    async fn remap_manifests(
1091        &self,
1092        request: RemapManifestsRequest,
1093    ) -> Result<RemapManifestsResponse> {
1094        let region_id = request.region_id;
1095        let (request, receiver) = WorkerRequest::try_from_remap_manifests_request(request)?;
1096        self.workers.submit_to_worker(region_id, request).await?;
1097        let manifest_paths = receiver.await.context(RecvSnafu)??;
1098        Ok(RemapManifestsResponse { manifest_paths })
1099    }
1100
1101    async fn copy_region_from(
1102        &self,
1103        region_id: RegionId,
1104        request: MitoCopyRegionFromRequest,
1105    ) -> Result<MitoCopyRegionFromResponse> {
1106        let (request, receiver) =
1107            WorkerRequest::try_from_copy_region_from_request(region_id, request)?;
1108        self.workers.submit_to_worker(region_id, request).await?;
1109        let response = receiver.await.context(RecvSnafu)??;
1110        Ok(response)
1111    }
1112
1113    fn role(&self, region_id: RegionId) -> Option<RegionRole> {
1114        self.workers.get_region(region_id).map(|region| {
1115            if region.is_follower() {
1116                RegionRole::Follower
1117            } else {
1118                RegionRole::Leader
1119            }
1120        })
1121    }
1122}
1123
1124fn map_batch_responses(responses: Vec<(RegionId, Result<AffectedRows>)>) -> BatchResponses {
1125    responses
1126        .into_iter()
1127        .map(|(region_id, response)| {
1128            (
1129                region_id,
1130                response.map(RegionResponse::new).map_err(BoxedError::new),
1131            )
1132        })
1133        .collect()
1134}
1135
1136#[async_trait]
1137impl RegionEngine for MitoEngine {
1138    fn name(&self) -> &str {
1139        MITO_ENGINE_NAME
1140    }
1141
1142    #[tracing::instrument(skip_all)]
1143    async fn handle_batch_open_requests(
1144        &self,
1145        parallelism: usize,
1146        requests: Vec<(RegionId, RegionOpenRequest)>,
1147    ) -> Result<BatchResponses, BoxedError> {
1148        // TODO(weny): add metrics.
1149        self.inner
1150            .handle_batch_open_requests(parallelism, requests)
1151            .await
1152            .map(map_batch_responses)
1153            .map_err(BoxedError::new)
1154    }
1155
1156    #[tracing::instrument(skip_all)]
1157    async fn handle_batch_catchup_requests(
1158        &self,
1159        parallelism: usize,
1160        requests: Vec<(RegionId, RegionCatchupRequest)>,
1161    ) -> Result<BatchResponses, BoxedError> {
1162        self.inner
1163            .handle_batch_catchup_requests(parallelism, requests)
1164            .await
1165            .map(map_batch_responses)
1166            .map_err(BoxedError::new)
1167    }
1168
1169    #[tracing::instrument(skip_all)]
1170    async fn handle_request(
1171        &self,
1172        region_id: RegionId,
1173        request: RegionRequest,
1174    ) -> Result<RegionResponse, BoxedError> {
1175        let _timer = HANDLE_REQUEST_ELAPSED
1176            .with_label_values(&[request.request_type()])
1177            .start_timer();
1178
1179        let is_alter = matches!(request, RegionRequest::Alter(_));
1180        let is_create = matches!(request, RegionRequest::Create(_));
1181        let mut response = self
1182            .inner
1183            .handle_request(region_id, request)
1184            .await
1185            .map(RegionResponse::new)
1186            .map_err(BoxedError::new)?;
1187
1188        if is_alter {
1189            self.handle_alter_response(region_id, &mut response)
1190                .map_err(BoxedError::new)?;
1191        } else if is_create {
1192            self.handle_create_response(region_id, &mut response)
1193                .map_err(BoxedError::new)?;
1194        }
1195
1196        Ok(response)
1197    }
1198
1199    #[tracing::instrument(skip_all)]
1200    async fn handle_query(
1201        &self,
1202        region_id: RegionId,
1203        request: ScanRequest,
1204    ) -> Result<RegionScannerRef, BoxedError> {
1205        self.scan_region(region_id, request)
1206            .map_err(BoxedError::new)?
1207            .region_scanner()
1208            .await
1209            .map_err(BoxedError::new)
1210    }
1211
1212    fn query_memory_tracker(&self) -> Option<QueryMemoryTracker> {
1213        Some(self.inner.scan_memory_tracker.clone())
1214    }
1215
1216    async fn get_committed_sequence(
1217        &self,
1218        region_id: RegionId,
1219    ) -> Result<SequenceNumber, BoxedError> {
1220        self.inner
1221            .get_committed_sequence(region_id)
1222            .map_err(BoxedError::new)
1223    }
1224
1225    /// Retrieve region's metadata.
1226    async fn get_metadata(
1227        &self,
1228        region_id: RegionId,
1229    ) -> std::result::Result<RegionMetadataRef, BoxedError> {
1230        self.inner.get_metadata(region_id).map_err(BoxedError::new)
1231    }
1232
1233    /// Stop the engine.
1234    ///
1235    /// Stopping the engine doesn't stop the underlying log store as other components might
1236    /// still use it. (When no other components are referencing the log store, it will
1237    /// automatically shutdown.)
1238    async fn stop(&self) -> std::result::Result<(), BoxedError> {
1239        self.inner.stop().await.map_err(BoxedError::new)
1240    }
1241
1242    fn region_statistic(&self, region_id: RegionId) -> Option<RegionStatistic> {
1243        self.get_region_statistic(region_id)
1244    }
1245
1246    fn set_region_role(&self, region_id: RegionId, role: RegionRole) -> Result<(), BoxedError> {
1247        self.inner
1248            .set_region_role(region_id, role)
1249            .map_err(BoxedError::new)
1250    }
1251
1252    async fn set_region_role_state_gracefully(
1253        &self,
1254        region_id: RegionId,
1255        region_role_state: SettableRegionRoleState,
1256    ) -> Result<SetRegionRoleStateResponse, BoxedError> {
1257        let _timer = HANDLE_REQUEST_ELAPSED
1258            .with_label_values(&["set_region_role_state_gracefully"])
1259            .start_timer();
1260
1261        self.inner
1262            .set_region_role_state_gracefully(region_id, region_role_state)
1263            .await
1264            .map_err(BoxedError::new)
1265    }
1266
1267    async fn sync_region(
1268        &self,
1269        region_id: RegionId,
1270        request: SyncRegionFromRequest,
1271    ) -> Result<SyncRegionFromResponse, BoxedError> {
1272        let manifest_info = request
1273            .into_region_manifest_info()
1274            .context(UnexpectedSnafu {
1275                err_msg: "Expected a manifest info request",
1276            })
1277            .map_err(BoxedError::new)?;
1278        let (_, synced) = self
1279            .inner
1280            .sync_region(region_id, manifest_info)
1281            .await
1282            .map_err(BoxedError::new)?;
1283
1284        Ok(SyncRegionFromResponse::Mito { synced })
1285    }
1286
1287    async fn remap_manifests(
1288        &self,
1289        request: RemapManifestsRequest,
1290    ) -> Result<RemapManifestsResponse, BoxedError> {
1291        self.inner
1292            .remap_manifests(request)
1293            .await
1294            .map_err(BoxedError::new)
1295    }
1296
1297    fn role(&self, region_id: RegionId) -> Option<RegionRole> {
1298        self.inner.role(region_id)
1299    }
1300
1301    fn as_any(&self) -> &dyn Any {
1302        self
1303    }
1304}
1305
1306impl MitoEngine {
1307    fn handle_alter_response(
1308        &self,
1309        region_id: RegionId,
1310        response: &mut RegionResponse,
1311    ) -> Result<()> {
1312        if let Some(statistic) = self.region_statistic(region_id) {
1313            Self::encode_manifest_info_to_extensions(
1314                &region_id,
1315                statistic.manifest,
1316                &mut response.extensions,
1317            )?;
1318        }
1319        let column_metadatas = self
1320            .inner
1321            .find_region(region_id)
1322            .ok()
1323            .map(|r| r.metadata().column_metadatas.clone());
1324        if let Some(column_metadatas) = column_metadatas {
1325            Self::encode_column_metadatas_to_extensions(
1326                &region_id,
1327                column_metadatas,
1328                &mut response.extensions,
1329            )?;
1330        }
1331        Ok(())
1332    }
1333
1334    fn handle_create_response(
1335        &self,
1336        region_id: RegionId,
1337        response: &mut RegionResponse,
1338    ) -> Result<()> {
1339        let column_metadatas = self
1340            .inner
1341            .find_region(region_id)
1342            .ok()
1343            .map(|r| r.metadata().column_metadatas.clone());
1344        if let Some(column_metadatas) = column_metadatas {
1345            Self::encode_column_metadatas_to_extensions(
1346                &region_id,
1347                column_metadatas,
1348                &mut response.extensions,
1349            )?;
1350        }
1351        Ok(())
1352    }
1353}
1354
1355// Tests methods.
1356#[cfg(any(test, feature = "test"))]
1357#[allow(clippy::too_many_arguments)]
1358impl MitoEngine {
1359    /// Returns a new [MitoEngine] for tests.
1360    pub async fn new_for_test<S: LogStore>(
1361        data_home: &str,
1362        mut config: MitoConfig,
1363        log_store: Arc<S>,
1364        object_store_manager: ObjectStoreManagerRef,
1365        write_buffer_manager: Option<crate::flush::WriteBufferManagerRef>,
1366        listener: Option<crate::engine::listener::EventListenerRef>,
1367        time_provider: crate::time_provider::TimeProviderRef,
1368        schema_metadata_manager: SchemaMetadataManagerRef,
1369        file_ref_manager: FileReferenceManagerRef,
1370        partition_expr_fetcher: PartitionExprFetcherRef,
1371    ) -> Result<MitoEngine> {
1372        config.sanitize(data_home)?;
1373
1374        let config = Arc::new(config);
1375        let wal_raw_entry_reader = Arc::new(LogStoreRawEntryReader::new(log_store.clone()));
1376        let total_memory = get_total_memory_bytes().max(0) as u64;
1377        let scan_memory_limit = config.scan_memory_limit.resolve(total_memory) as usize;
1378        let scan_memory_tracker =
1379            QueryMemoryTracker::builder(scan_memory_limit, config.scan_memory_on_exhausted)
1380                .on_update(|usage| {
1381                    SCAN_MEMORY_USAGE_BYTES.set(usage as i64);
1382                })
1383                .on_reject(|| {
1384                    SCAN_REQUESTS_REJECTED_TOTAL.inc();
1385                })
1386                .build();
1387        Ok(MitoEngine {
1388            inner: Arc::new(EngineInner {
1389                workers: WorkerGroup::start_for_test(
1390                    config.clone(),
1391                    log_store,
1392                    object_store_manager,
1393                    write_buffer_manager,
1394                    listener,
1395                    schema_metadata_manager,
1396                    file_ref_manager,
1397                    time_provider,
1398                    partition_expr_fetcher,
1399                )
1400                .await?,
1401                config,
1402                wal_raw_entry_reader,
1403                scan_memory_tracker,
1404                #[cfg(feature = "enterprise")]
1405                extension_range_provider_factory: None,
1406            }),
1407        })
1408    }
1409
1410    /// Returns the purge scheduler.
1411    pub fn purge_scheduler(&self) -> &crate::schedule::scheduler::SchedulerRef {
1412        self.inner.workers.purge_scheduler()
1413    }
1414}
1415
1416#[cfg(test)]
1417mod tests {
1418    use std::time::Duration;
1419
1420    use super::*;
1421    use crate::sst::file::FileMeta;
1422
1423    #[test]
1424    fn test_is_valid_region_edit() {
1425        // Valid: has only "files_to_add"
1426        let edit = RegionEdit {
1427            files_to_add: vec![FileMeta::default()],
1428            files_to_remove: vec![],
1429            timestamp_ms: None,
1430            compaction_time_window: None,
1431            flushed_entry_id: None,
1432            flushed_sequence: None,
1433            committed_sequence: None,
1434        };
1435        assert!(is_valid_region_edit(&edit));
1436
1437        // Invalid: "files_to_add" and "files_to_remove" are both empty
1438        let edit = RegionEdit {
1439            files_to_add: vec![],
1440            files_to_remove: vec![],
1441            timestamp_ms: None,
1442            compaction_time_window: None,
1443            flushed_entry_id: None,
1444            flushed_sequence: None,
1445            committed_sequence: None,
1446        };
1447        assert!(!is_valid_region_edit(&edit));
1448
1449        // Valid: "files_to_remove" is not empty
1450        let edit = RegionEdit {
1451            files_to_add: vec![FileMeta::default()],
1452            files_to_remove: vec![FileMeta::default()],
1453            timestamp_ms: None,
1454            compaction_time_window: None,
1455            flushed_entry_id: None,
1456            flushed_sequence: None,
1457            committed_sequence: None,
1458        };
1459        assert!(is_valid_region_edit(&edit));
1460
1461        // Invalid: other fields are not all "None"s
1462        let edit = RegionEdit {
1463            files_to_add: vec![FileMeta::default()],
1464            files_to_remove: vec![],
1465            timestamp_ms: None,
1466            compaction_time_window: Some(Duration::from_secs(1)),
1467            flushed_entry_id: None,
1468            flushed_sequence: None,
1469            committed_sequence: None,
1470        };
1471        assert!(!is_valid_region_edit(&edit));
1472        let edit = RegionEdit {
1473            files_to_add: vec![FileMeta::default()],
1474            files_to_remove: vec![],
1475            timestamp_ms: None,
1476            compaction_time_window: None,
1477            flushed_entry_id: Some(1),
1478            flushed_sequence: None,
1479            committed_sequence: None,
1480        };
1481        assert!(!is_valid_region_edit(&edit));
1482        let edit = RegionEdit {
1483            files_to_add: vec![FileMeta::default()],
1484            files_to_remove: vec![],
1485            timestamp_ms: None,
1486            compaction_time_window: None,
1487            flushed_entry_id: None,
1488            flushed_sequence: Some(1),
1489            committed_sequence: None,
1490        };
1491        assert!(!is_valid_region_edit(&edit));
1492    }
1493}