Skip to main content

mito2/
engine.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Mito region engine.
16
17#[cfg(test)]
18mod alter_test;
19#[cfg(test)]
20mod append_mode_test;
21#[cfg(test)]
22mod basic_test;
23#[cfg(test)]
24mod batch_catchup_test;
25#[cfg(test)]
26mod batch_open_test;
27#[cfg(test)]
28mod bump_committed_sequence_test;
29#[cfg(test)]
30mod catchup_test;
31#[cfg(test)]
32mod close_test;
33#[cfg(test)]
34pub(crate) mod compaction_test;
35#[cfg(test)]
36mod create_test;
37#[cfg(test)]
38mod drop_test;
39#[cfg(test)]
40mod edit_region_test;
41#[cfg(test)]
42mod filter_deleted_test;
43#[cfg(test)]
44mod flush_test;
45#[cfg(test)]
46mod index_build_test;
47#[cfg(any(test, feature = "test"))]
48pub mod listener;
49#[cfg(test)]
50mod merge_mode_test;
51#[cfg(test)]
52mod open_test;
53#[cfg(test)]
54mod parallel_test;
55#[cfg(test)]
56mod projection_test;
57#[cfg(test)]
58mod prune_test;
59#[cfg(test)]
60mod row_selector_test;
61#[cfg(test)]
62mod scan_corrupt;
63#[cfg(test)]
64mod scan_test;
65#[cfg(test)]
66mod set_role_state_test;
67#[cfg(test)]
68mod skip_wal_test;
69#[cfg(test)]
70mod staging_test;
71#[cfg(test)]
72mod sync_test;
73#[cfg(test)]
74mod truncate_test;
75
76#[cfg(test)]
77mod copy_region_from_test;
78#[cfg(test)]
79mod remap_manifests_test;
80
81#[cfg(test)]
82mod apply_staging_manifest_test;
83#[cfg(test)]
84mod partition_filter_test;
85mod puffin_index;
86
87use std::any::Any;
88use std::collections::{HashMap, HashSet};
89use std::sync::Arc;
90use std::time::Instant;
91
92use api::region::RegionResponse;
93use async_trait::async_trait;
94use common_base::Plugins;
95use common_error::ext::BoxedError;
96use common_meta::error::UnexpectedSnafu;
97use common_meta::key::SchemaMetadataManagerRef;
98use common_recordbatch::{QueryMemoryTracker, SendableRecordBatchStream};
99use common_stat::get_total_memory_bytes;
100use common_telemetry::{info, tracing, warn};
101use common_wal::options::WalOptions;
102use futures::future::{join_all, try_join_all};
103use futures::stream::{self, Stream, StreamExt};
104use object_store::manager::ObjectStoreManagerRef;
105use snafu::{OptionExt, ResultExt, ensure};
106use store_api::ManifestVersion;
107use store_api::codec::PrimaryKeyEncoding;
108use store_api::logstore::LogStore;
109use store_api::logstore::provider::{KafkaProvider, Provider};
110use store_api::metadata::{ColumnMetadata, RegionMetadataRef};
111use store_api::metric_engine_consts::{
112    MANIFEST_INFO_EXTENSION_KEY, TABLE_COLUMN_METADATA_EXTENSION_KEY,
113};
114use store_api::region_engine::{
115    BatchResponses, MitoCopyRegionFromRequest, MitoCopyRegionFromResponse, RegionEngine,
116    RegionManifestInfo, RegionRole, RegionScannerRef, RegionStatistic, RemapManifestsRequest,
117    RemapManifestsResponse, SetRegionRoleStateResponse, SettableRegionRoleState,
118    SyncRegionFromRequest, SyncRegionFromResponse,
119};
120use store_api::region_request::{
121    AffectedRows, RegionCatchupRequest, RegionOpenRequest, RegionRequest,
122};
123use store_api::sst_entry::{ManifestSstEntry, PuffinIndexMetaEntry, StorageSstEntry};
124use store_api::storage::{FileId, FileRefsManifest, RegionId, ScanRequest, SequenceNumber};
125use tokio::sync::{Semaphore, oneshot};
126
127use crate::access_layer::RegionFilePathFactory;
128use crate::cache::{CacheManagerRef, CacheStrategy};
129use crate::config::MitoConfig;
130use crate::engine::puffin_index::{IndexEntryContext, collect_index_entries_from_puffin};
131use crate::error::{
132    IncrementalQueryStaleSnafu, InvalidRequestSnafu, JoinSnafu, MitoManifestInfoSnafu, RecvSnafu,
133    RegionNotFoundSnafu, Result, SerdeJsonSnafu, SerializeColumnMetadataSnafu,
134};
135#[cfg(feature = "enterprise")]
136use crate::extension::BoxedExtensionRangeProviderFactory;
137use crate::gc::GcLimiterRef;
138use crate::manifest::action::RegionEdit;
139use crate::memtable::MemtableStats;
140use crate::metrics::{
141    HANDLE_REQUEST_ELAPSED, SCAN_MEMORY_EXHAUSTED_TOTAL, SCAN_MEMORY_USAGE_BYTES,
142    SCAN_REQUESTS_REJECTED_TOTAL,
143};
144use crate::read::scan_region::{ScanRegion, Scanner};
145use crate::read::stream::ScanBatchStream;
146use crate::region::MitoRegionRef;
147use crate::region::opener::PartitionExprFetcherRef;
148use crate::region::options::parse_wal_options;
149use crate::request::{RegionEditRequest, WorkerRequest};
150use crate::sst::file::{FileMeta, RegionFileId, RegionIndexId};
151use crate::sst::file_ref::FileReferenceManagerRef;
152use crate::sst::index::intermediate::IntermediateManager;
153use crate::sst::index::puffin_manager::PuffinManagerFactory;
154use crate::wal::entry_distributor::{
155    DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE, build_wal_entry_distributor_and_receivers,
156};
157use crate::wal::raw_entry_reader::{LogStoreRawEntryReader, RawEntryReader};
158use crate::worker::WorkerGroup;
159
160pub const MITO_ENGINE_NAME: &str = "mito";
161
162pub struct MitoEngineBuilder<'a, S: LogStore> {
163    data_home: &'a str,
164    config: MitoConfig,
165    log_store: Arc<S>,
166    object_store_manager: ObjectStoreManagerRef,
167    schema_metadata_manager: SchemaMetadataManagerRef,
168    file_ref_manager: FileReferenceManagerRef,
169    partition_expr_fetcher: PartitionExprFetcherRef,
170    plugins: Plugins,
171    #[cfg(feature = "enterprise")]
172    extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
173}
174
175impl<'a, S: LogStore> MitoEngineBuilder<'a, S> {
176    #[allow(clippy::too_many_arguments)]
177    pub fn new(
178        data_home: &'a str,
179        config: MitoConfig,
180        log_store: Arc<S>,
181        object_store_manager: ObjectStoreManagerRef,
182        schema_metadata_manager: SchemaMetadataManagerRef,
183        file_ref_manager: FileReferenceManagerRef,
184        partition_expr_fetcher: PartitionExprFetcherRef,
185        plugins: Plugins,
186    ) -> Self {
187        Self {
188            data_home,
189            config,
190            log_store,
191            object_store_manager,
192            schema_metadata_manager,
193            file_ref_manager,
194            plugins,
195            partition_expr_fetcher,
196            #[cfg(feature = "enterprise")]
197            extension_range_provider_factory: None,
198        }
199    }
200
201    #[cfg(feature = "enterprise")]
202    #[must_use]
203    pub fn with_extension_range_provider_factory(
204        self,
205        extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
206    ) -> Self {
207        Self {
208            extension_range_provider_factory,
209            ..self
210        }
211    }
212
213    pub async fn try_build(mut self) -> Result<MitoEngine> {
214        self.config.sanitize(self.data_home)?;
215
216        let config = Arc::new(self.config);
217        let workers = WorkerGroup::start(
218            config.clone(),
219            self.log_store.clone(),
220            self.object_store_manager,
221            self.schema_metadata_manager,
222            self.file_ref_manager,
223            self.partition_expr_fetcher.clone(),
224            self.plugins,
225        )
226        .await?;
227        let wal_raw_entry_reader = Arc::new(LogStoreRawEntryReader::new(self.log_store));
228        let total_memory = get_total_memory_bytes().max(0) as u64;
229        let scan_memory_limit = config.scan_memory_limit.resolve(total_memory) as usize;
230        let scan_memory_tracker =
231            QueryMemoryTracker::builder(scan_memory_limit, config.scan_memory_on_exhausted)
232                .on_update(|usage| {
233                    SCAN_MEMORY_USAGE_BYTES.set(usage as i64);
234                })
235                .on_exhausted(|| {
236                    SCAN_MEMORY_EXHAUSTED_TOTAL.inc();
237                })
238                .on_reject(|| {
239                    SCAN_REQUESTS_REJECTED_TOTAL.inc();
240                })
241                .build();
242
243        let inner = EngineInner {
244            workers,
245            config,
246            wal_raw_entry_reader,
247            scan_memory_tracker,
248            #[cfg(feature = "enterprise")]
249            extension_range_provider_factory: None,
250        };
251
252        #[cfg(feature = "enterprise")]
253        let inner =
254            inner.with_extension_range_provider_factory(self.extension_range_provider_factory);
255
256        Ok(MitoEngine {
257            inner: Arc::new(inner),
258        })
259    }
260}
261
262/// Region engine implementation for timeseries data.
263#[derive(Clone)]
264pub struct MitoEngine {
265    inner: Arc<EngineInner>,
266}
267
268impl MitoEngine {
269    /// Returns a new [MitoEngine] with specific `config`, `log_store` and `object_store`.
270    #[allow(clippy::too_many_arguments)]
271    pub async fn new<S: LogStore>(
272        data_home: &str,
273        config: MitoConfig,
274        log_store: Arc<S>,
275        object_store_manager: ObjectStoreManagerRef,
276        schema_metadata_manager: SchemaMetadataManagerRef,
277        file_ref_manager: FileReferenceManagerRef,
278        partition_expr_fetcher: PartitionExprFetcherRef,
279        plugins: Plugins,
280    ) -> Result<MitoEngine> {
281        let builder = MitoEngineBuilder::new(
282            data_home,
283            config,
284            log_store,
285            object_store_manager,
286            schema_metadata_manager,
287            file_ref_manager,
288            partition_expr_fetcher,
289            plugins,
290        );
291        builder.try_build().await
292    }
293
294    pub fn mito_config(&self) -> &MitoConfig {
295        &self.inner.config
296    }
297
298    pub fn cache_manager(&self) -> CacheManagerRef {
299        self.inner.workers.cache_manager()
300    }
301
302    pub fn file_ref_manager(&self) -> FileReferenceManagerRef {
303        self.inner.workers.file_ref_manager()
304    }
305
306    pub fn gc_limiter(&self) -> GcLimiterRef {
307        self.inner.workers.gc_limiter()
308    }
309
310    pub fn object_store_manager(&self) -> &ObjectStoreManagerRef {
311        self.inner.workers.object_store_manager()
312    }
313
314    pub fn puffin_manager_factory(&self) -> &PuffinManagerFactory {
315        self.inner.workers.puffin_manager_factory()
316    }
317
318    pub fn intermediate_manager(&self) -> &IntermediateManager {
319        self.inner.workers.intermediate_manager()
320    }
321
322    pub fn schema_metadata_manager(&self) -> &SchemaMetadataManagerRef {
323        self.inner.workers.schema_metadata_manager()
324    }
325
326    /// Get all tmp ref files for given region ids, excluding files that's already in manifest.
327    pub async fn get_snapshot_of_file_refs(
328        &self,
329        file_handle_regions: impl IntoIterator<Item = RegionId>,
330        related_regions: HashMap<RegionId, HashSet<RegionId>>,
331    ) -> Result<FileRefsManifest> {
332        let file_ref_mgr = self.file_ref_manager();
333
334        let file_handle_regions = file_handle_regions.into_iter().collect::<Vec<_>>();
335        // Convert region IDs to MitoRegionRef objects, ignore regions that do not exist on current datanode
336        // as regions on other datanodes are not managed by this engine.
337        let query_regions: Vec<MitoRegionRef> = file_handle_regions
338            .into_iter()
339            .filter_map(|region_id| self.find_region(region_id))
340            .collect();
341
342        let dst_region_to_src_regions: Vec<(MitoRegionRef, HashSet<RegionId>)> = {
343            let dst2src = related_regions
344                .into_iter()
345                .flat_map(|(src, dsts)| dsts.into_iter().map(move |dst| (dst, src)))
346                .fold(
347                    HashMap::<RegionId, HashSet<RegionId>>::new(),
348                    |mut acc, (k, v)| {
349                        let entry = acc.entry(k).or_default();
350                        entry.insert(v);
351                        acc
352                    },
353                );
354            let mut dst_region_to_src_regions = Vec::with_capacity(dst2src.len());
355            for (dst_region, srcs) in dst2src {
356                let Some(dst_region) = self.find_region(dst_region) else {
357                    continue;
358                };
359                dst_region_to_src_regions.push((dst_region, srcs));
360            }
361            dst_region_to_src_regions
362        };
363
364        file_ref_mgr
365            .get_snapshot_of_file_refs(query_regions, dst_region_to_src_regions)
366            .await
367    }
368
369    /// Returns true if the specific region exists.
370    pub fn is_region_exists(&self, region_id: RegionId) -> bool {
371        self.inner.workers.is_region_exists(region_id)
372    }
373
374    /// Returns true if the specific region exists.
375    pub fn is_region_opening(&self, region_id: RegionId) -> bool {
376        self.inner.workers.is_region_opening(region_id)
377    }
378
379    /// Returns true if the specific region is catching up.
380    pub fn is_region_catching_up(&self, region_id: RegionId) -> bool {
381        self.inner.workers.is_region_catching_up(region_id)
382    }
383
384    /// Returns the region disk/memory statistic.
385    pub fn get_region_statistic(&self, region_id: RegionId) -> Option<RegionStatistic> {
386        self.find_region(region_id)
387            .map(|region| region.region_statistic())
388    }
389
390    /// Returns primary key encoding of the region.
391    pub fn get_primary_key_encoding(&self, region_id: RegionId) -> Option<PrimaryKeyEncoding> {
392        self.find_region(region_id)
393            .map(|r| r.primary_key_encoding())
394    }
395
396    /// Handle substrait query and return a stream of record batches
397    ///
398    /// Notice that the output stream's ordering is not guranateed. If order
399    /// matter, please use [`scanner`] to build a [`Scanner`] to consume.
400    #[tracing::instrument(skip_all)]
401    pub async fn scan_to_stream(
402        &self,
403        region_id: RegionId,
404        request: ScanRequest,
405    ) -> Result<SendableRecordBatchStream, BoxedError> {
406        self.scanner(region_id, request)
407            .await
408            .map_err(BoxedError::new)?
409            .scan()
410            .await
411    }
412
413    /// Scan [`Batch`]es by [`ScanRequest`].
414    pub async fn scan_batch(
415        &self,
416        region_id: RegionId,
417        request: ScanRequest,
418        filter_deleted: bool,
419    ) -> Result<ScanBatchStream> {
420        let mut scan_region = self.scan_region(region_id, request)?;
421        scan_region.set_filter_deleted(filter_deleted);
422        scan_region.scanner().await?.scan_batch()
423    }
424
425    /// Returns a scanner to scan for `request`.
426    pub(crate) async fn scanner(
427        &self,
428        region_id: RegionId,
429        request: ScanRequest,
430    ) -> Result<Scanner> {
431        self.scan_region(region_id, request)?.scanner().await
432    }
433
434    /// Scans a region.
435    #[tracing::instrument(skip_all, fields(region_id = %region_id))]
436    fn scan_region(&self, region_id: RegionId, request: ScanRequest) -> Result<ScanRegion> {
437        self.inner.scan_region(region_id, request)
438    }
439
440    /// Edit region's metadata by [RegionEdit] directly. Use with care.
441    /// Now we only allow adding files or removing files from region (the [RegionEdit] struct can only contain a non-empty "files_to_add" or "files_to_remove" field).
442    /// Other region editing intention will result in an "invalid request" error.
443    /// Also note that if a region is to be edited directly, we MUST not write data to it thereafter.
444    pub async fn edit_region(&self, region_id: RegionId, edit: RegionEdit) -> Result<()> {
445        let _timer = HANDLE_REQUEST_ELAPSED
446            .with_label_values(&["edit_region"])
447            .start_timer();
448
449        ensure!(
450            is_valid_region_edit(&edit),
451            InvalidRequestSnafu {
452                region_id,
453                reason: "invalid region edit"
454            }
455        );
456
457        let (tx, rx) = oneshot::channel();
458        let request = WorkerRequest::EditRegion(RegionEditRequest::new(region_id, edit, true, tx));
459        self.inner
460            .workers
461            .submit_to_worker(region_id, request)
462            .await?;
463        rx.await.context(RecvSnafu)?
464    }
465
466    /// Handles copy region from request.
467    ///
468    /// This method is only supported for internal use and is not exposed in the trait implementation.
469    pub async fn copy_region_from(
470        &self,
471        region_id: RegionId,
472        request: MitoCopyRegionFromRequest,
473    ) -> Result<MitoCopyRegionFromResponse> {
474        self.inner.copy_region_from(region_id, request).await
475    }
476
477    #[cfg(test)]
478    pub(crate) fn get_region(&self, id: RegionId) -> Option<crate::region::MitoRegionRef> {
479        self.find_region(id)
480    }
481
482    pub fn find_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
483        self.inner.workers.get_region(region_id)
484    }
485
486    /// Returns all regions.
487    pub fn regions(&self) -> Vec<MitoRegionRef> {
488        self.inner.workers.all_regions().collect()
489    }
490
491    fn encode_manifest_info_to_extensions(
492        region_id: &RegionId,
493        manifest_info: RegionManifestInfo,
494        extensions: &mut HashMap<String, Vec<u8>>,
495    ) -> Result<()> {
496        let region_manifest_info = vec![(*region_id, manifest_info)];
497
498        extensions.insert(
499            MANIFEST_INFO_EXTENSION_KEY.to_string(),
500            RegionManifestInfo::encode_list(&region_manifest_info).context(SerdeJsonSnafu)?,
501        );
502        info!(
503            "Added manifest info: {:?} to extensions, region_id: {:?}",
504            region_manifest_info, region_id
505        );
506        Ok(())
507    }
508
509    fn encode_column_metadatas_to_extensions(
510        region_id: &RegionId,
511        column_metadatas: Vec<ColumnMetadata>,
512        extensions: &mut HashMap<String, Vec<u8>>,
513    ) -> Result<()> {
514        extensions.insert(
515            TABLE_COLUMN_METADATA_EXTENSION_KEY.to_string(),
516            ColumnMetadata::encode_list(&column_metadatas).context(SerializeColumnMetadataSnafu)?,
517        );
518        info!(
519            "Added column metadatas: {:?} to extensions, region_id: {:?}",
520            column_metadatas, region_id
521        );
522        Ok(())
523    }
524
525    /// Find the current version's memtables and SSTs stats by region_id.
526    /// The stats must be collected in one place one time to ensure data consistency.
527    pub fn find_memtable_and_sst_stats(
528        &self,
529        region_id: RegionId,
530    ) -> Result<(Vec<MemtableStats>, Vec<FileMeta>)> {
531        let region = self
532            .find_region(region_id)
533            .context(RegionNotFoundSnafu { region_id })?;
534
535        let version = region.version();
536        let memtable_stats = version
537            .memtables
538            .list_memtables()
539            .iter()
540            .map(|x| x.stats())
541            .collect::<Vec<_>>();
542
543        let sst_stats = version
544            .ssts
545            .levels()
546            .iter()
547            .flat_map(|level| level.files().map(|x| x.meta_ref()))
548            .cloned()
549            .collect::<Vec<_>>();
550        Ok((memtable_stats, sst_stats))
551    }
552
553    /// Lists all SSTs from the manifest of all regions in the engine.
554    pub async fn all_ssts_from_manifest(&self) -> Vec<ManifestSstEntry> {
555        let node_id = self.inner.workers.file_ref_manager().node_id();
556        let regions = self.inner.workers.all_regions();
557
558        let mut results = Vec::new();
559        for region in regions {
560            let mut entries = region.manifest_sst_entries().await;
561            for e in &mut entries {
562                e.node_id = node_id;
563            }
564            results.extend(entries);
565        }
566
567        results
568    }
569
570    /// Lists metadata about all puffin index targets stored in the engine.
571    pub async fn all_index_metas(&self) -> Vec<PuffinIndexMetaEntry> {
572        let node_id = self.inner.workers.file_ref_manager().node_id();
573        let cache_manager = self.inner.workers.cache_manager();
574        let puffin_metadata_cache = cache_manager.puffin_metadata_cache().cloned();
575        let bloom_filter_cache = cache_manager.bloom_filter_index_cache().cloned();
576        let inverted_index_cache = cache_manager.inverted_index_cache().cloned();
577
578        let mut results = Vec::new();
579
580        for region in self.inner.workers.all_regions() {
581            let manifest_entries = region.manifest_sst_entries().await;
582            let access_layer = region.access_layer.clone();
583            let table_dir = access_layer.table_dir().to_string();
584            let path_type = access_layer.path_type();
585            let object_store = access_layer.object_store().clone();
586            let puffin_factory = access_layer.puffin_manager_factory().clone();
587            let path_factory = RegionFilePathFactory::new(table_dir, path_type);
588
589            let entry_futures = manifest_entries.into_iter().map(|entry| {
590                let object_store = object_store.clone();
591                let path_factory = path_factory.clone();
592                let puffin_factory = puffin_factory.clone();
593                let puffin_metadata_cache = puffin_metadata_cache.clone();
594                let bloom_filter_cache = bloom_filter_cache.clone();
595                let inverted_index_cache = inverted_index_cache.clone();
596
597                async move {
598                    let Some(index_file_path) = entry.index_file_path.as_ref() else {
599                        return Vec::new();
600                    };
601
602                    let index_version = entry.index_version;
603                    let file_id = match FileId::parse_str(&entry.file_id) {
604                        Ok(file_id) => file_id,
605                        Err(err) => {
606                            warn!(
607                                err;
608                                "Failed to parse puffin index file id, table_dir: {}, file_id: {}",
609                                entry.table_dir,
610                                entry.file_id
611                            );
612                            return Vec::new();
613                        }
614                    };
615                    let region_index_id = RegionIndexId::new(
616                        RegionFileId::new(entry.region_id, file_id),
617                        index_version,
618                    );
619                    let context = IndexEntryContext {
620                        table_dir: &entry.table_dir,
621                        index_file_path: index_file_path.as_str(),
622                        region_id: entry.region_id,
623                        table_id: entry.table_id,
624                        region_number: entry.region_number,
625                        region_group: entry.region_group,
626                        region_sequence: entry.region_sequence,
627                        file_id: &entry.file_id,
628                        index_file_size: entry.index_file_size,
629                        node_id,
630                    };
631
632                    let manager = puffin_factory
633                        .build(object_store, path_factory)
634                        .with_puffin_metadata_cache(puffin_metadata_cache);
635
636                    collect_index_entries_from_puffin(
637                        manager,
638                        region_index_id,
639                        context,
640                        bloom_filter_cache,
641                        inverted_index_cache,
642                    )
643                    .await
644                }
645            });
646
647            let mut meta_stream = stream::iter(entry_futures).buffer_unordered(8); // Parallelism is 8.
648            while let Some(mut metas) = meta_stream.next().await {
649                results.append(&mut metas);
650            }
651        }
652
653        results
654    }
655
656    /// Lists all SSTs from the storage layer of all regions in the engine.
657    pub fn all_ssts_from_storage(&self) -> impl Stream<Item = Result<StorageSstEntry>> {
658        let node_id = self.inner.workers.file_ref_manager().node_id();
659        let regions = self.inner.workers.all_regions();
660
661        let mut layers_distinct_table_dirs = HashMap::new();
662        for region in regions {
663            let table_dir = region.access_layer.table_dir();
664            if !layers_distinct_table_dirs.contains_key(table_dir) {
665                layers_distinct_table_dirs
666                    .insert(table_dir.to_string(), region.access_layer.clone());
667            }
668        }
669
670        stream::iter(layers_distinct_table_dirs)
671            .map(|(_, access_layer)| access_layer.storage_sst_entries())
672            .flatten()
673            .map(move |entry| {
674                entry.map(move |mut entry| {
675                    entry.node_id = node_id;
676                    entry
677                })
678            })
679    }
680}
681
682/// Check whether the region edit is valid.
683///
684/// Only adding or removing files to region is considered valid now.
685fn is_valid_region_edit(edit: &RegionEdit) -> bool {
686    (!edit.files_to_add.is_empty() || !edit.files_to_remove.is_empty())
687        && matches!(
688            edit,
689            RegionEdit {
690                files_to_add: _,
691                files_to_remove: _,
692                timestamp_ms: _,
693                compaction_time_window: None,
694                flushed_entry_id: None,
695                flushed_sequence: None,
696                ..
697            }
698        )
699}
700
701/// Inner struct of [MitoEngine].
702struct EngineInner {
703    /// Region workers group.
704    workers: WorkerGroup,
705    /// Config of the engine.
706    config: Arc<MitoConfig>,
707    /// The Wal raw entry reader.
708    wal_raw_entry_reader: Arc<dyn RawEntryReader>,
709    /// Memory tracker for table scans.
710    scan_memory_tracker: QueryMemoryTracker,
711    #[cfg(feature = "enterprise")]
712    extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
713}
714
715type TopicGroupedRegionOpenRequests = HashMap<String, Vec<(RegionId, RegionOpenRequest)>>;
716
717/// Returns requests([TopicGroupedRegionOpenRequests]) grouped by topic and remaining requests.
718fn prepare_batch_open_requests(
719    requests: Vec<(RegionId, RegionOpenRequest)>,
720) -> Result<(
721    TopicGroupedRegionOpenRequests,
722    Vec<(RegionId, RegionOpenRequest)>,
723)> {
724    let mut topic_to_regions: HashMap<String, Vec<(RegionId, RegionOpenRequest)>> = HashMap::new();
725    let mut remaining_regions: Vec<(RegionId, RegionOpenRequest)> = Vec::new();
726    for (region_id, request) in requests {
727        match parse_wal_options(&request.options).context(SerdeJsonSnafu)? {
728            WalOptions::Kafka(options) => {
729                topic_to_regions
730                    .entry(options.topic)
731                    .or_default()
732                    .push((region_id, request));
733            }
734            WalOptions::RaftEngine | WalOptions::Noop => {
735                remaining_regions.push((region_id, request));
736            }
737        }
738    }
739
740    Ok((topic_to_regions, remaining_regions))
741}
742
743impl EngineInner {
744    #[cfg(feature = "enterprise")]
745    #[must_use]
746    fn with_extension_range_provider_factory(
747        self,
748        extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
749    ) -> Self {
750        Self {
751            extension_range_provider_factory,
752            ..self
753        }
754    }
755
756    /// Stop the inner engine.
757    async fn stop(&self) -> Result<()> {
758        self.workers.stop().await
759    }
760
761    fn find_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
762        self.workers
763            .get_region(region_id)
764            .context(RegionNotFoundSnafu { region_id })
765    }
766
767    /// Get metadata of a region.
768    ///
769    /// Returns error if the region doesn't exist.
770    fn get_metadata(&self, region_id: RegionId) -> Result<RegionMetadataRef> {
771        // Reading a region doesn't need to go through the region worker thread.
772        let region = self.find_region(region_id)?;
773        Ok(region.metadata())
774    }
775
776    async fn open_topic_regions(
777        &self,
778        topic: String,
779        region_requests: Vec<(RegionId, RegionOpenRequest)>,
780    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
781        let now = Instant::now();
782        let region_ids = region_requests
783            .iter()
784            .map(|(region_id, _)| *region_id)
785            .collect::<Vec<_>>();
786        let provider = Provider::kafka_provider(topic);
787        let (distributor, entry_receivers) = build_wal_entry_distributor_and_receivers(
788            provider.clone(),
789            self.wal_raw_entry_reader.clone(),
790            &region_ids,
791            DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE,
792        );
793
794        let mut responses = Vec::with_capacity(region_requests.len());
795        for ((region_id, request), entry_receiver) in
796            region_requests.into_iter().zip(entry_receivers)
797        {
798            let (request, receiver) =
799                WorkerRequest::new_open_region_request(region_id, request, Some(entry_receiver));
800            self.workers.submit_to_worker(region_id, request).await?;
801            responses.push(async move { receiver.await.context(RecvSnafu)? });
802        }
803
804        // Waits for entries distribution.
805        let distribution =
806            common_runtime::spawn_global(async move { distributor.distribute().await });
807        // Waits for worker returns.
808        let responses = join_all(responses).await;
809        distribution.await.context(JoinSnafu)??;
810
811        let num_failure = responses.iter().filter(|r| r.is_err()).count();
812        info!(
813            "Opened {} regions for topic '{}', failures: {}, elapsed: {:?}",
814            region_ids.len() - num_failure,
815            // Safety: provider is kafka provider.
816            provider.as_kafka_provider().unwrap(),
817            num_failure,
818            now.elapsed(),
819        );
820        Ok(region_ids.into_iter().zip(responses).collect())
821    }
822
823    async fn handle_batch_open_requests(
824        &self,
825        parallelism: usize,
826        requests: Vec<(RegionId, RegionOpenRequest)>,
827    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
828        let semaphore = Arc::new(Semaphore::new(parallelism));
829        let (topic_to_region_requests, remaining_region_requests) =
830            prepare_batch_open_requests(requests)?;
831        let mut responses =
832            Vec::with_capacity(topic_to_region_requests.len() + remaining_region_requests.len());
833
834        if !topic_to_region_requests.is_empty() {
835            let mut tasks = Vec::with_capacity(topic_to_region_requests.len());
836            for (topic, region_requests) in topic_to_region_requests {
837                let semaphore_moved = semaphore.clone();
838                tasks.push(async move {
839                    // Safety: semaphore must exist
840                    let _permit = semaphore_moved.acquire().await.unwrap();
841                    self.open_topic_regions(topic, region_requests).await
842                })
843            }
844            let r = try_join_all(tasks).await?;
845            responses.extend(r.into_iter().flatten());
846        }
847
848        if !remaining_region_requests.is_empty() {
849            let mut tasks = Vec::with_capacity(remaining_region_requests.len());
850            let mut region_ids = Vec::with_capacity(remaining_region_requests.len());
851            for (region_id, request) in remaining_region_requests {
852                let semaphore_moved = semaphore.clone();
853                region_ids.push(region_id);
854                tasks.push(async move {
855                    // Safety: semaphore must exist
856                    let _permit = semaphore_moved.acquire().await.unwrap();
857                    let (request, receiver) =
858                        WorkerRequest::new_open_region_request(region_id, request, None);
859
860                    self.workers.submit_to_worker(region_id, request).await?;
861
862                    receiver.await.context(RecvSnafu)?
863                })
864            }
865
866            let results = join_all(tasks).await;
867            responses.extend(region_ids.into_iter().zip(results));
868        }
869
870        Ok(responses)
871    }
872
873    async fn catchup_topic_regions(
874        &self,
875        provider: Provider,
876        region_requests: Vec<(RegionId, RegionCatchupRequest)>,
877    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
878        let now = Instant::now();
879        let region_ids = region_requests
880            .iter()
881            .map(|(region_id, _)| *region_id)
882            .collect::<Vec<_>>();
883        let (distributor, entry_receivers) = build_wal_entry_distributor_and_receivers(
884            provider.clone(),
885            self.wal_raw_entry_reader.clone(),
886            &region_ids,
887            DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE,
888        );
889
890        let mut responses = Vec::with_capacity(region_requests.len());
891        for ((region_id, request), entry_receiver) in
892            region_requests.into_iter().zip(entry_receivers)
893        {
894            let (request, receiver) =
895                WorkerRequest::new_catchup_region_request(region_id, request, Some(entry_receiver));
896            self.workers.submit_to_worker(region_id, request).await?;
897            responses.push(async move { receiver.await.context(RecvSnafu)? });
898        }
899
900        // Wait for entries distribution.
901        let distribution =
902            common_runtime::spawn_global(async move { distributor.distribute().await });
903        // Wait for worker returns.
904        let responses = join_all(responses).await;
905        distribution.await.context(JoinSnafu)??;
906
907        let num_failure = responses.iter().filter(|r| r.is_err()).count();
908        info!(
909            "Caught up {} regions for topic '{}', failures: {}, elapsed: {:?}",
910            region_ids.len() - num_failure,
911            // Safety: provider is kafka provider.
912            provider.as_kafka_provider().unwrap(),
913            num_failure,
914            now.elapsed(),
915        );
916
917        Ok(region_ids.into_iter().zip(responses).collect())
918    }
919
920    async fn handle_batch_catchup_requests(
921        &self,
922        parallelism: usize,
923        requests: Vec<(RegionId, RegionCatchupRequest)>,
924    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
925        let mut responses = Vec::with_capacity(requests.len());
926        let mut topic_regions: HashMap<Arc<KafkaProvider>, Vec<_>> = HashMap::new();
927        let mut remaining_region_requests = vec![];
928
929        for (region_id, request) in requests {
930            match self.workers.get_region(region_id) {
931                Some(region) => match region.provider.as_kafka_provider() {
932                    Some(provider) => {
933                        topic_regions
934                            .entry(provider.clone())
935                            .or_default()
936                            .push((region_id, request));
937                    }
938                    None => {
939                        remaining_region_requests.push((region_id, request));
940                    }
941                },
942                None => responses.push((region_id, RegionNotFoundSnafu { region_id }.fail())),
943            }
944        }
945
946        let semaphore = Arc::new(Semaphore::new(parallelism));
947
948        if !topic_regions.is_empty() {
949            let mut tasks = Vec::with_capacity(topic_regions.len());
950            for (provider, region_requests) in topic_regions {
951                let semaphore_moved = semaphore.clone();
952                tasks.push(async move {
953                    // Safety: semaphore must exist
954                    let _permit = semaphore_moved.acquire().await.unwrap();
955                    self.catchup_topic_regions(Provider::Kafka(provider), region_requests)
956                        .await
957                })
958            }
959
960            let r = try_join_all(tasks).await?;
961            responses.extend(r.into_iter().flatten());
962        }
963
964        if !remaining_region_requests.is_empty() {
965            let mut tasks = Vec::with_capacity(remaining_region_requests.len());
966            let mut region_ids = Vec::with_capacity(remaining_region_requests.len());
967            for (region_id, request) in remaining_region_requests {
968                let semaphore_moved = semaphore.clone();
969                region_ids.push(region_id);
970                tasks.push(async move {
971                    // Safety: semaphore must exist
972                    let _permit = semaphore_moved.acquire().await.unwrap();
973                    let (request, receiver) =
974                        WorkerRequest::new_catchup_region_request(region_id, request, None);
975
976                    self.workers.submit_to_worker(region_id, request).await?;
977
978                    receiver.await.context(RecvSnafu)?
979                })
980            }
981
982            let results = join_all(tasks).await;
983            responses.extend(region_ids.into_iter().zip(results));
984        }
985
986        Ok(responses)
987    }
988
989    /// Handles [RegionRequest] and return its executed result.
990    async fn handle_request(
991        &self,
992        region_id: RegionId,
993        request: RegionRequest,
994    ) -> Result<AffectedRows> {
995        let region_metadata = self.get_metadata(region_id).ok();
996        let (request, receiver) =
997            WorkerRequest::try_from_region_request(region_id, request, region_metadata)?;
998        self.workers.submit_to_worker(region_id, request).await?;
999
1000        receiver.await.context(RecvSnafu)?
1001    }
1002
1003    /// Returns the sequence of latest committed data.
1004    fn get_committed_sequence(&self, region_id: RegionId) -> Result<SequenceNumber> {
1005        // Reading a region doesn't need to go through the region worker thread.
1006        self.find_region(region_id)
1007            .map(|r| r.find_committed_sequence())
1008    }
1009
1010    /// Handles the scan `request` and returns a [ScanRegion].
1011    #[tracing::instrument(skip_all, fields(region_id = %region_id))]
1012    fn scan_region(&self, region_id: RegionId, mut request: ScanRequest) -> Result<ScanRegion> {
1013        let query_start = Instant::now();
1014        // Reading a region doesn't need to go through the region worker thread.
1015        let region = self.find_region(region_id)?;
1016        let version_data = region.version_control.current();
1017        let version = version_data.version;
1018
1019        if request.snapshot_on_scan && request.memtable_max_sequence.is_none() {
1020            request.memtable_max_sequence = Some(version_data.committed_sequence);
1021        }
1022
1023        if let Some(given_seq) = request.memtable_min_sequence {
1024            let min_readable_seq = version.flushed_sequence;
1025            ensure!(
1026                given_seq >= min_readable_seq,
1027                IncrementalQueryStaleSnafu {
1028                    region_id,
1029                    given_seq,
1030                    min_readable_seq,
1031                }
1032            );
1033        }
1034
1035        // Get cache.
1036        let cache_manager = self.workers.cache_manager();
1037
1038        let scan_region = ScanRegion::new(
1039            version,
1040            region.access_layer.clone(),
1041            request,
1042            CacheStrategy::EnableAll(cache_manager),
1043        )
1044        .with_max_concurrent_scan_files(self.config.max_concurrent_scan_files)
1045        .with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
1046        .with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
1047        .with_ignore_bloom_filter(self.config.bloom_filter_index.apply_on_query.disabled())
1048        .with_start_time(query_start);
1049
1050        #[cfg(feature = "enterprise")]
1051        let scan_region = self.maybe_fill_extension_range_provider(scan_region, region);
1052
1053        Ok(scan_region)
1054    }
1055
1056    #[cfg(feature = "enterprise")]
1057    fn maybe_fill_extension_range_provider(
1058        &self,
1059        mut scan_region: ScanRegion,
1060        region: MitoRegionRef,
1061    ) -> ScanRegion {
1062        if region.is_follower()
1063            && let Some(factory) = self.extension_range_provider_factory.as_ref()
1064        {
1065            scan_region
1066                .set_extension_range_provider(factory.create_extension_range_provider(region));
1067        }
1068        scan_region
1069    }
1070
1071    /// Converts the [`RegionRole`].
1072    fn set_region_role(&self, region_id: RegionId, role: RegionRole) -> Result<()> {
1073        let region = self.find_region(region_id)?;
1074        region.set_role(role);
1075        Ok(())
1076    }
1077
1078    /// Sets read-only for a region and ensures no more writes in the region after it returns.
1079    async fn set_region_role_state_gracefully(
1080        &self,
1081        region_id: RegionId,
1082        region_role_state: SettableRegionRoleState,
1083    ) -> Result<SetRegionRoleStateResponse> {
1084        // Notes: It acquires the mutable ownership to ensure no other threads,
1085        // Therefore, we submit it to the worker.
1086        let (request, receiver) =
1087            WorkerRequest::new_set_readonly_gracefully(region_id, region_role_state);
1088        self.workers.submit_to_worker(region_id, request).await?;
1089
1090        receiver.await.context(RecvSnafu)
1091    }
1092
1093    async fn sync_region(
1094        &self,
1095        region_id: RegionId,
1096        manifest_info: RegionManifestInfo,
1097    ) -> Result<(ManifestVersion, bool)> {
1098        ensure!(manifest_info.is_mito(), MitoManifestInfoSnafu);
1099        let manifest_version = manifest_info.data_manifest_version();
1100        let (request, receiver) =
1101            WorkerRequest::new_sync_region_request(region_id, manifest_version);
1102        self.workers.submit_to_worker(region_id, request).await?;
1103
1104        receiver.await.context(RecvSnafu)?
1105    }
1106
1107    async fn remap_manifests(
1108        &self,
1109        request: RemapManifestsRequest,
1110    ) -> Result<RemapManifestsResponse> {
1111        let region_id = request.region_id;
1112        let (request, receiver) = WorkerRequest::try_from_remap_manifests_request(request)?;
1113        self.workers.submit_to_worker(region_id, request).await?;
1114        let manifest_paths = receiver.await.context(RecvSnafu)??;
1115        Ok(RemapManifestsResponse { manifest_paths })
1116    }
1117
1118    async fn copy_region_from(
1119        &self,
1120        region_id: RegionId,
1121        request: MitoCopyRegionFromRequest,
1122    ) -> Result<MitoCopyRegionFromResponse> {
1123        let (request, receiver) =
1124            WorkerRequest::try_from_copy_region_from_request(region_id, request)?;
1125        self.workers.submit_to_worker(region_id, request).await?;
1126        let response = receiver.await.context(RecvSnafu)??;
1127        Ok(response)
1128    }
1129
1130    fn role(&self, region_id: RegionId) -> Option<RegionRole> {
1131        self.workers
1132            .get_region(region_id)
1133            .map(|region| region.region_role())
1134    }
1135}
1136
1137fn map_batch_responses(responses: Vec<(RegionId, Result<AffectedRows>)>) -> BatchResponses {
1138    responses
1139        .into_iter()
1140        .map(|(region_id, response)| {
1141            (
1142                region_id,
1143                response.map(RegionResponse::new).map_err(BoxedError::new),
1144            )
1145        })
1146        .collect()
1147}
1148
1149#[async_trait]
1150impl RegionEngine for MitoEngine {
1151    fn name(&self) -> &str {
1152        MITO_ENGINE_NAME
1153    }
1154
1155    #[tracing::instrument(skip_all)]
1156    async fn handle_batch_open_requests(
1157        &self,
1158        parallelism: usize,
1159        requests: Vec<(RegionId, RegionOpenRequest)>,
1160    ) -> Result<BatchResponses, BoxedError> {
1161        // TODO(weny): add metrics.
1162        self.inner
1163            .handle_batch_open_requests(parallelism, requests)
1164            .await
1165            .map(map_batch_responses)
1166            .map_err(BoxedError::new)
1167    }
1168
1169    #[tracing::instrument(skip_all)]
1170    async fn handle_batch_catchup_requests(
1171        &self,
1172        parallelism: usize,
1173        requests: Vec<(RegionId, RegionCatchupRequest)>,
1174    ) -> Result<BatchResponses, BoxedError> {
1175        self.inner
1176            .handle_batch_catchup_requests(parallelism, requests)
1177            .await
1178            .map(map_batch_responses)
1179            .map_err(BoxedError::new)
1180    }
1181
1182    #[tracing::instrument(skip_all)]
1183    async fn handle_request(
1184        &self,
1185        region_id: RegionId,
1186        request: RegionRequest,
1187    ) -> Result<RegionResponse, BoxedError> {
1188        let _timer = HANDLE_REQUEST_ELAPSED
1189            .with_label_values(&[request.request_type()])
1190            .start_timer();
1191
1192        let is_alter = matches!(request, RegionRequest::Alter(_));
1193        let is_create = matches!(request, RegionRequest::Create(_));
1194        let mut response = self
1195            .inner
1196            .handle_request(region_id, request)
1197            .await
1198            .map(RegionResponse::new)
1199            .map_err(BoxedError::new)?;
1200
1201        if is_alter {
1202            self.handle_alter_response(region_id, &mut response)
1203                .map_err(BoxedError::new)?;
1204        } else if is_create {
1205            self.handle_create_response(region_id, &mut response)
1206                .map_err(BoxedError::new)?;
1207        }
1208
1209        Ok(response)
1210    }
1211
1212    #[tracing::instrument(skip_all)]
1213    async fn handle_query(
1214        &self,
1215        region_id: RegionId,
1216        request: ScanRequest,
1217    ) -> Result<RegionScannerRef, BoxedError> {
1218        self.scan_region(region_id, request)
1219            .map_err(BoxedError::new)?
1220            .region_scanner()
1221            .await
1222            .map_err(BoxedError::new)
1223    }
1224
1225    fn query_memory_tracker(&self) -> Option<QueryMemoryTracker> {
1226        Some(self.inner.scan_memory_tracker.clone())
1227    }
1228
1229    async fn get_committed_sequence(
1230        &self,
1231        region_id: RegionId,
1232    ) -> Result<SequenceNumber, BoxedError> {
1233        self.inner
1234            .get_committed_sequence(region_id)
1235            .map_err(BoxedError::new)
1236    }
1237
1238    /// Retrieve region's metadata.
1239    async fn get_metadata(
1240        &self,
1241        region_id: RegionId,
1242    ) -> std::result::Result<RegionMetadataRef, BoxedError> {
1243        self.inner.get_metadata(region_id).map_err(BoxedError::new)
1244    }
1245
1246    /// Stop the engine.
1247    ///
1248    /// Stopping the engine doesn't stop the underlying log store as other components might
1249    /// still use it. (When no other components are referencing the log store, it will
1250    /// automatically shutdown.)
1251    async fn stop(&self) -> std::result::Result<(), BoxedError> {
1252        self.inner.stop().await.map_err(BoxedError::new)
1253    }
1254
1255    fn region_statistic(&self, region_id: RegionId) -> Option<RegionStatistic> {
1256        self.get_region_statistic(region_id)
1257    }
1258
1259    fn set_region_role(&self, region_id: RegionId, role: RegionRole) -> Result<(), BoxedError> {
1260        self.inner
1261            .set_region_role(region_id, role)
1262            .map_err(BoxedError::new)
1263    }
1264
1265    async fn set_region_role_state_gracefully(
1266        &self,
1267        region_id: RegionId,
1268        region_role_state: SettableRegionRoleState,
1269    ) -> Result<SetRegionRoleStateResponse, BoxedError> {
1270        let _timer = HANDLE_REQUEST_ELAPSED
1271            .with_label_values(&["set_region_role_state_gracefully"])
1272            .start_timer();
1273
1274        self.inner
1275            .set_region_role_state_gracefully(region_id, region_role_state)
1276            .await
1277            .map_err(BoxedError::new)
1278    }
1279
1280    async fn sync_region(
1281        &self,
1282        region_id: RegionId,
1283        request: SyncRegionFromRequest,
1284    ) -> Result<SyncRegionFromResponse, BoxedError> {
1285        let manifest_info = request
1286            .into_region_manifest_info()
1287            .context(UnexpectedSnafu {
1288                err_msg: "Expected a manifest info request",
1289            })
1290            .map_err(BoxedError::new)?;
1291        let (_, synced) = self
1292            .inner
1293            .sync_region(region_id, manifest_info)
1294            .await
1295            .map_err(BoxedError::new)?;
1296
1297        Ok(SyncRegionFromResponse::Mito { synced })
1298    }
1299
1300    async fn remap_manifests(
1301        &self,
1302        request: RemapManifestsRequest,
1303    ) -> Result<RemapManifestsResponse, BoxedError> {
1304        self.inner
1305            .remap_manifests(request)
1306            .await
1307            .map_err(BoxedError::new)
1308    }
1309
1310    fn role(&self, region_id: RegionId) -> Option<RegionRole> {
1311        self.inner.role(region_id)
1312    }
1313
1314    fn as_any(&self) -> &dyn Any {
1315        self
1316    }
1317}
1318
1319impl MitoEngine {
1320    fn handle_alter_response(
1321        &self,
1322        region_id: RegionId,
1323        response: &mut RegionResponse,
1324    ) -> Result<()> {
1325        if let Some(statistic) = self.region_statistic(region_id) {
1326            Self::encode_manifest_info_to_extensions(
1327                &region_id,
1328                statistic.manifest,
1329                &mut response.extensions,
1330            )?;
1331        }
1332        let column_metadatas = self
1333            .inner
1334            .find_region(region_id)
1335            .ok()
1336            .map(|r| r.metadata().column_metadatas.clone());
1337        if let Some(column_metadatas) = column_metadatas {
1338            Self::encode_column_metadatas_to_extensions(
1339                &region_id,
1340                column_metadatas,
1341                &mut response.extensions,
1342            )?;
1343        }
1344        Ok(())
1345    }
1346
1347    fn handle_create_response(
1348        &self,
1349        region_id: RegionId,
1350        response: &mut RegionResponse,
1351    ) -> Result<()> {
1352        let column_metadatas = self
1353            .inner
1354            .find_region(region_id)
1355            .ok()
1356            .map(|r| r.metadata().column_metadatas.clone());
1357        if let Some(column_metadatas) = column_metadatas {
1358            Self::encode_column_metadatas_to_extensions(
1359                &region_id,
1360                column_metadatas,
1361                &mut response.extensions,
1362            )?;
1363        }
1364        Ok(())
1365    }
1366}
1367
1368// Tests methods.
1369#[cfg(any(test, feature = "test"))]
1370#[allow(clippy::too_many_arguments)]
1371impl MitoEngine {
1372    /// Returns a new [MitoEngine] for tests.
1373    pub async fn new_for_test<S: LogStore>(
1374        data_home: &str,
1375        mut config: MitoConfig,
1376        log_store: Arc<S>,
1377        object_store_manager: ObjectStoreManagerRef,
1378        write_buffer_manager: Option<crate::flush::WriteBufferManagerRef>,
1379        listener: Option<crate::engine::listener::EventListenerRef>,
1380        time_provider: crate::time_provider::TimeProviderRef,
1381        schema_metadata_manager: SchemaMetadataManagerRef,
1382        file_ref_manager: FileReferenceManagerRef,
1383        partition_expr_fetcher: PartitionExprFetcherRef,
1384    ) -> Result<MitoEngine> {
1385        config.sanitize(data_home)?;
1386
1387        let config = Arc::new(config);
1388        let wal_raw_entry_reader = Arc::new(LogStoreRawEntryReader::new(log_store.clone()));
1389        let total_memory = get_total_memory_bytes().max(0) as u64;
1390        let scan_memory_limit = config.scan_memory_limit.resolve(total_memory) as usize;
1391        let scan_memory_tracker =
1392            QueryMemoryTracker::builder(scan_memory_limit, config.scan_memory_on_exhausted)
1393                .on_update(|usage| {
1394                    SCAN_MEMORY_USAGE_BYTES.set(usage as i64);
1395                })
1396                .on_exhausted(|| {
1397                    SCAN_MEMORY_EXHAUSTED_TOTAL.inc();
1398                })
1399                .on_reject(|| {
1400                    SCAN_REQUESTS_REJECTED_TOTAL.inc();
1401                })
1402                .build();
1403        Ok(MitoEngine {
1404            inner: Arc::new(EngineInner {
1405                workers: WorkerGroup::start_for_test(
1406                    config.clone(),
1407                    log_store,
1408                    object_store_manager,
1409                    write_buffer_manager,
1410                    listener,
1411                    schema_metadata_manager,
1412                    file_ref_manager,
1413                    time_provider,
1414                    partition_expr_fetcher,
1415                )
1416                .await?,
1417                config,
1418                wal_raw_entry_reader,
1419                scan_memory_tracker,
1420                #[cfg(feature = "enterprise")]
1421                extension_range_provider_factory: None,
1422            }),
1423        })
1424    }
1425
1426    /// Returns the purge scheduler.
1427    pub fn purge_scheduler(&self) -> &crate::schedule::scheduler::SchedulerRef {
1428        self.inner.workers.purge_scheduler()
1429    }
1430}
1431
1432#[cfg(test)]
1433mod tests {
1434    use std::time::Duration;
1435
1436    use super::*;
1437    use crate::sst::file::FileMeta;
1438
1439    #[test]
1440    fn test_is_valid_region_edit() {
1441        // Valid: has only "files_to_add"
1442        let edit = RegionEdit {
1443            files_to_add: vec![FileMeta::default()],
1444            files_to_remove: vec![],
1445            timestamp_ms: None,
1446            compaction_time_window: None,
1447            flushed_entry_id: None,
1448            flushed_sequence: None,
1449            committed_sequence: None,
1450        };
1451        assert!(is_valid_region_edit(&edit));
1452
1453        // Invalid: "files_to_add" and "files_to_remove" are both empty
1454        let edit = RegionEdit {
1455            files_to_add: vec![],
1456            files_to_remove: vec![],
1457            timestamp_ms: None,
1458            compaction_time_window: None,
1459            flushed_entry_id: None,
1460            flushed_sequence: None,
1461            committed_sequence: None,
1462        };
1463        assert!(!is_valid_region_edit(&edit));
1464
1465        // Valid: has only "files_to_remove"
1466        let edit = RegionEdit {
1467            files_to_add: vec![],
1468            files_to_remove: vec![FileMeta::default()],
1469            timestamp_ms: None,
1470            compaction_time_window: None,
1471            flushed_entry_id: None,
1472            flushed_sequence: None,
1473            committed_sequence: None,
1474        };
1475        assert!(is_valid_region_edit(&edit));
1476
1477        // Valid: both "files_to_add" and "files_to_remove" are not empty
1478        let edit = RegionEdit {
1479            files_to_add: vec![FileMeta::default()],
1480            files_to_remove: vec![FileMeta::default()],
1481            timestamp_ms: None,
1482            compaction_time_window: None,
1483            flushed_entry_id: None,
1484            flushed_sequence: None,
1485            committed_sequence: None,
1486        };
1487        assert!(is_valid_region_edit(&edit));
1488
1489        // Invalid: other fields are not all "None"s
1490        let edit = RegionEdit {
1491            files_to_add: vec![FileMeta::default()],
1492            files_to_remove: vec![],
1493            timestamp_ms: None,
1494            compaction_time_window: Some(Duration::from_secs(1)),
1495            flushed_entry_id: None,
1496            flushed_sequence: None,
1497            committed_sequence: None,
1498        };
1499        assert!(!is_valid_region_edit(&edit));
1500        let edit = RegionEdit {
1501            files_to_add: vec![FileMeta::default()],
1502            files_to_remove: vec![],
1503            timestamp_ms: None,
1504            compaction_time_window: None,
1505            flushed_entry_id: Some(1),
1506            flushed_sequence: None,
1507            committed_sequence: None,
1508        };
1509        assert!(!is_valid_region_edit(&edit));
1510        let edit = RegionEdit {
1511            files_to_add: vec![FileMeta::default()],
1512            files_to_remove: vec![],
1513            timestamp_ms: None,
1514            compaction_time_window: None,
1515            flushed_entry_id: None,
1516            flushed_sequence: Some(1),
1517            committed_sequence: None,
1518        };
1519        assert!(!is_valid_region_edit(&edit));
1520    }
1521}