Skip to main content

mito2/
engine.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Mito region engine.
16
17#[cfg(test)]
18mod alter_test;
19#[cfg(test)]
20mod append_mode_test;
21#[cfg(test)]
22mod basic_test;
23#[cfg(test)]
24mod batch_catchup_test;
25#[cfg(test)]
26mod batch_open_test;
27#[cfg(test)]
28mod bump_committed_sequence_test;
29#[cfg(test)]
30mod catchup_test;
31#[cfg(test)]
32mod close_test;
33#[cfg(test)]
34pub(crate) mod compaction_test;
35#[cfg(test)]
36mod create_test;
37#[cfg(test)]
38mod drop_test;
39#[cfg(test)]
40mod edit_region_test;
41#[cfg(test)]
42mod filter_deleted_test;
43#[cfg(test)]
44mod flush_test;
45#[cfg(test)]
46mod index_build_test;
47#[cfg(any(test, feature = "test"))]
48pub mod listener;
49#[cfg(test)]
50mod merge_mode_test;
51#[cfg(test)]
52mod open_test;
53#[cfg(test)]
54mod parallel_test;
55#[cfg(test)]
56mod projection_test;
57#[cfg(test)]
58mod prune_test;
59#[cfg(test)]
60mod row_selector_test;
61#[cfg(test)]
62mod scan_corrupt;
63#[cfg(test)]
64mod scan_test;
65#[cfg(test)]
66mod set_role_state_test;
67#[cfg(test)]
68mod skip_wal_test;
69#[cfg(test)]
70mod staging_test;
71#[cfg(test)]
72mod sync_test;
73#[cfg(test)]
74mod truncate_test;
75
76#[cfg(test)]
77mod copy_region_from_test;
78#[cfg(test)]
79mod remap_manifests_test;
80
81#[cfg(test)]
82mod apply_staging_manifest_test;
83#[cfg(test)]
84mod partition_filter_test;
85mod puffin_index;
86
87use std::any::Any;
88use std::collections::{HashMap, HashSet};
89use std::sync::Arc;
90use std::time::Instant;
91
92use api::region::RegionResponse;
93use async_trait::async_trait;
94use common_base::Plugins;
95use common_error::ext::BoxedError;
96use common_meta::error::UnexpectedSnafu;
97use common_meta::key::SchemaMetadataManagerRef;
98use common_recordbatch::{QueryMemoryTracker, SendableRecordBatchStream};
99use common_stat::get_total_memory_bytes;
100use common_telemetry::{info, tracing, warn};
101use common_wal::options::WalOptions;
102use futures::future::{join_all, try_join_all};
103use futures::stream::{self, Stream, StreamExt};
104use object_store::manager::ObjectStoreManagerRef;
105use snafu::{OptionExt, ResultExt, ensure};
106use store_api::ManifestVersion;
107use store_api::codec::PrimaryKeyEncoding;
108use store_api::logstore::LogStore;
109use store_api::logstore::provider::{KafkaProvider, Provider};
110use store_api::metadata::{ColumnMetadata, RegionMetadataRef};
111use store_api::metric_engine_consts::{
112    MANIFEST_INFO_EXTENSION_KEY, TABLE_COLUMN_METADATA_EXTENSION_KEY,
113};
114use store_api::region_engine::{
115    BatchResponses, MitoCopyRegionFromRequest, MitoCopyRegionFromResponse, RegionEngine,
116    RegionManifestInfo, RegionRole, RegionScannerRef, RegionStatistic, RemapManifestsRequest,
117    RemapManifestsResponse, SetRegionRoleStateResponse, SettableRegionRoleState,
118    SyncRegionFromRequest, SyncRegionFromResponse,
119};
120use store_api::region_info::RegionInfoEntry;
121use store_api::region_request::{
122    AffectedRows, RegionCatchupRequest, RegionOpenRequest, RegionRequest,
123};
124use store_api::sst_entry::{ManifestSstEntry, PuffinIndexMetaEntry, StorageSstEntry};
125use store_api::storage::{FileId, FileRefsManifest, RegionId, ScanRequest, SequenceNumber};
126use tokio::sync::{Semaphore, oneshot};
127
128use crate::access_layer::RegionFilePathFactory;
129use crate::cache::{CacheManagerRef, CacheStrategy};
130use crate::config::MitoConfig;
131use crate::engine::puffin_index::{IndexEntryContext, collect_index_entries_from_puffin};
132use crate::error::{
133    IncrementalQueryStaleSnafu, InvalidRequestSnafu, JoinSnafu, MitoManifestInfoSnafu, RecvSnafu,
134    RegionNotFoundSnafu, Result, SerdeJsonSnafu, SerializeColumnMetadataSnafu,
135};
136#[cfg(feature = "enterprise")]
137use crate::extension::BoxedExtensionRangeProviderFactory;
138use crate::gc::GcLimiterRef;
139use crate::manifest::action::RegionEdit;
140use crate::memtable::MemtableStats;
141use crate::metrics::{
142    HANDLE_REQUEST_ELAPSED, SCAN_MEMORY_EXHAUSTED_TOTAL, SCAN_MEMORY_USAGE_BYTES,
143    SCAN_REQUESTS_REJECTED_TOTAL,
144};
145use crate::read::scan_region::{ScanRegion, Scanner};
146use crate::read::stream::ScanBatchStream;
147use crate::region::MitoRegionRef;
148use crate::region::opener::PartitionExprFetcherRef;
149use crate::region::options::parse_wal_options;
150use crate::request::{RegionEditRequest, WorkerRequest};
151use crate::sst::file::{FileMeta, RegionFileId, RegionIndexId};
152use crate::sst::file_ref::FileReferenceManagerRef;
153use crate::sst::index::intermediate::IntermediateManager;
154use crate::sst::index::puffin_manager::PuffinManagerFactory;
155use crate::wal::entry_distributor::{
156    DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE, build_wal_entry_distributor_and_receivers,
157};
158use crate::wal::raw_entry_reader::{LogStoreRawEntryReader, RawEntryReader};
159use crate::worker::WorkerGroup;
160
161pub const MITO_ENGINE_NAME: &str = "mito";
162
163pub struct MitoEngineBuilder<'a, S: LogStore> {
164    data_home: &'a str,
165    config: MitoConfig,
166    log_store: Arc<S>,
167    object_store_manager: ObjectStoreManagerRef,
168    schema_metadata_manager: SchemaMetadataManagerRef,
169    file_ref_manager: FileReferenceManagerRef,
170    partition_expr_fetcher: PartitionExprFetcherRef,
171    plugins: Plugins,
172    #[cfg(feature = "enterprise")]
173    extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
174}
175
176impl<'a, S: LogStore> MitoEngineBuilder<'a, S> {
177    #[allow(clippy::too_many_arguments)]
178    pub fn new(
179        data_home: &'a str,
180        config: MitoConfig,
181        log_store: Arc<S>,
182        object_store_manager: ObjectStoreManagerRef,
183        schema_metadata_manager: SchemaMetadataManagerRef,
184        file_ref_manager: FileReferenceManagerRef,
185        partition_expr_fetcher: PartitionExprFetcherRef,
186        plugins: Plugins,
187    ) -> Self {
188        Self {
189            data_home,
190            config,
191            log_store,
192            object_store_manager,
193            schema_metadata_manager,
194            file_ref_manager,
195            plugins,
196            partition_expr_fetcher,
197            #[cfg(feature = "enterprise")]
198            extension_range_provider_factory: None,
199        }
200    }
201
202    #[cfg(feature = "enterprise")]
203    #[must_use]
204    pub fn with_extension_range_provider_factory(
205        self,
206        extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
207    ) -> Self {
208        Self {
209            extension_range_provider_factory,
210            ..self
211        }
212    }
213
214    pub async fn try_build(mut self) -> Result<MitoEngine> {
215        self.config.sanitize(self.data_home)?;
216
217        let config = Arc::new(self.config);
218        let workers = WorkerGroup::start(
219            config.clone(),
220            self.log_store.clone(),
221            self.object_store_manager,
222            self.schema_metadata_manager,
223            self.file_ref_manager,
224            self.partition_expr_fetcher.clone(),
225            self.plugins,
226        )
227        .await?;
228        let wal_raw_entry_reader = Arc::new(LogStoreRawEntryReader::new(self.log_store));
229        let total_memory = get_total_memory_bytes().max(0) as u64;
230        let scan_memory_limit = config.scan_memory_limit.resolve(total_memory) as usize;
231        let scan_memory_tracker =
232            QueryMemoryTracker::builder(scan_memory_limit, config.scan_memory_on_exhausted)
233                .on_update(|usage| {
234                    SCAN_MEMORY_USAGE_BYTES.set(usage as i64);
235                })
236                .on_exhausted(|| {
237                    SCAN_MEMORY_EXHAUSTED_TOTAL.inc();
238                })
239                .on_reject(|| {
240                    SCAN_REQUESTS_REJECTED_TOTAL.inc();
241                })
242                .build();
243
244        let inner = EngineInner {
245            workers,
246            config,
247            wal_raw_entry_reader,
248            scan_memory_tracker,
249            #[cfg(feature = "enterprise")]
250            extension_range_provider_factory: None,
251        };
252
253        #[cfg(feature = "enterprise")]
254        let inner =
255            inner.with_extension_range_provider_factory(self.extension_range_provider_factory);
256
257        Ok(MitoEngine {
258            inner: Arc::new(inner),
259        })
260    }
261}
262
263/// Region engine implementation for timeseries data.
264#[derive(Clone)]
265pub struct MitoEngine {
266    inner: Arc<EngineInner>,
267}
268
269impl MitoEngine {
270    /// Returns a new [MitoEngine] with specific `config`, `log_store` and `object_store`.
271    #[allow(clippy::too_many_arguments)]
272    pub async fn new<S: LogStore>(
273        data_home: &str,
274        config: MitoConfig,
275        log_store: Arc<S>,
276        object_store_manager: ObjectStoreManagerRef,
277        schema_metadata_manager: SchemaMetadataManagerRef,
278        file_ref_manager: FileReferenceManagerRef,
279        partition_expr_fetcher: PartitionExprFetcherRef,
280        plugins: Plugins,
281    ) -> Result<MitoEngine> {
282        let builder = MitoEngineBuilder::new(
283            data_home,
284            config,
285            log_store,
286            object_store_manager,
287            schema_metadata_manager,
288            file_ref_manager,
289            partition_expr_fetcher,
290            plugins,
291        );
292        builder.try_build().await
293    }
294
295    pub fn mito_config(&self) -> &MitoConfig {
296        &self.inner.config
297    }
298
299    pub fn cache_manager(&self) -> CacheManagerRef {
300        self.inner.workers.cache_manager()
301    }
302
303    pub fn file_ref_manager(&self) -> FileReferenceManagerRef {
304        self.inner.workers.file_ref_manager()
305    }
306
307    pub fn gc_limiter(&self) -> GcLimiterRef {
308        self.inner.workers.gc_limiter()
309    }
310
311    pub fn object_store_manager(&self) -> &ObjectStoreManagerRef {
312        self.inner.workers.object_store_manager()
313    }
314
315    pub fn puffin_manager_factory(&self) -> &PuffinManagerFactory {
316        self.inner.workers.puffin_manager_factory()
317    }
318
319    pub fn intermediate_manager(&self) -> &IntermediateManager {
320        self.inner.workers.intermediate_manager()
321    }
322
323    pub fn schema_metadata_manager(&self) -> &SchemaMetadataManagerRef {
324        self.inner.workers.schema_metadata_manager()
325    }
326
327    /// Get all tmp ref files for given region ids, excluding files that's already in manifest.
328    pub async fn get_snapshot_of_file_refs(
329        &self,
330        file_handle_regions: impl IntoIterator<Item = RegionId>,
331        related_regions: HashMap<RegionId, HashSet<RegionId>>,
332    ) -> Result<FileRefsManifest> {
333        let file_ref_mgr = self.file_ref_manager();
334
335        let file_handle_regions = file_handle_regions.into_iter().collect::<Vec<_>>();
336        // Convert region IDs to MitoRegionRef objects, ignore regions that do not exist on current datanode
337        // as regions on other datanodes are not managed by this engine.
338        let query_regions: Vec<MitoRegionRef> = file_handle_regions
339            .into_iter()
340            .filter_map(|region_id| self.find_region(region_id))
341            .collect();
342
343        let dst_region_to_src_regions: Vec<(MitoRegionRef, HashSet<RegionId>)> = {
344            let dst2src = related_regions
345                .into_iter()
346                .flat_map(|(src, dsts)| dsts.into_iter().map(move |dst| (dst, src)))
347                .fold(
348                    HashMap::<RegionId, HashSet<RegionId>>::new(),
349                    |mut acc, (k, v)| {
350                        let entry = acc.entry(k).or_default();
351                        entry.insert(v);
352                        acc
353                    },
354                );
355            let mut dst_region_to_src_regions = Vec::with_capacity(dst2src.len());
356            for (dst_region, srcs) in dst2src {
357                let Some(dst_region) = self.find_region(dst_region) else {
358                    continue;
359                };
360                dst_region_to_src_regions.push((dst_region, srcs));
361            }
362            dst_region_to_src_regions
363        };
364
365        file_ref_mgr
366            .get_snapshot_of_file_refs(query_regions, dst_region_to_src_regions)
367            .await
368    }
369
370    /// Returns true if the specific region exists.
371    pub fn is_region_exists(&self, region_id: RegionId) -> bool {
372        self.inner.workers.is_region_exists(region_id)
373    }
374
375    /// Returns true if the specific region exists.
376    pub fn is_region_opening(&self, region_id: RegionId) -> bool {
377        self.inner.workers.is_region_opening(region_id)
378    }
379
380    /// Returns true if the specific region is catching up.
381    pub fn is_region_catching_up(&self, region_id: RegionId) -> bool {
382        self.inner.workers.is_region_catching_up(region_id)
383    }
384
385    /// Returns the region disk/memory statistic.
386    pub fn get_region_statistic(&self, region_id: RegionId) -> Option<RegionStatistic> {
387        self.find_region(region_id)
388            .map(|region| region.region_statistic())
389    }
390
391    /// Returns primary key encoding of the region.
392    pub fn get_primary_key_encoding(&self, region_id: RegionId) -> Option<PrimaryKeyEncoding> {
393        self.find_region(region_id)
394            .map(|r| r.primary_key_encoding())
395    }
396
397    /// Handle substrait query and return a stream of record batches
398    ///
399    /// Notice that the output stream's ordering is not guranateed. If order
400    /// matter, please use [`scanner`] to build a [`Scanner`] to consume.
401    #[tracing::instrument(skip_all)]
402    pub async fn scan_to_stream(
403        &self,
404        region_id: RegionId,
405        request: ScanRequest,
406    ) -> Result<SendableRecordBatchStream, BoxedError> {
407        self.scanner(region_id, request)
408            .await
409            .map_err(BoxedError::new)?
410            .scan()
411            .await
412    }
413
414    /// Scan [`Batch`]es by [`ScanRequest`].
415    pub async fn scan_batch(
416        &self,
417        region_id: RegionId,
418        request: ScanRequest,
419        filter_deleted: bool,
420    ) -> Result<ScanBatchStream> {
421        let mut scan_region = self.scan_region(region_id, request)?;
422        scan_region.set_filter_deleted(filter_deleted);
423        scan_region.scanner().await?.scan_batch()
424    }
425
426    /// Returns a scanner to scan for `request`.
427    pub(crate) async fn scanner(
428        &self,
429        region_id: RegionId,
430        request: ScanRequest,
431    ) -> Result<Scanner> {
432        self.scan_region(region_id, request)?.scanner().await
433    }
434
435    /// Scans a region.
436    #[tracing::instrument(skip_all, fields(region_id = %region_id))]
437    fn scan_region(&self, region_id: RegionId, request: ScanRequest) -> Result<ScanRegion> {
438        self.inner.scan_region(region_id, request)
439    }
440
441    /// Edit region's metadata by [RegionEdit] directly. Use with care.
442    /// Now we only allow adding files or removing files from region (the [RegionEdit] struct can only contain a non-empty "files_to_add" or "files_to_remove" field).
443    /// Other region editing intention will result in an "invalid request" error.
444    /// Also note that if a region is to be edited directly, we MUST not write data to it thereafter.
445    pub async fn edit_region(&self, region_id: RegionId, edit: RegionEdit) -> Result<()> {
446        let _timer = HANDLE_REQUEST_ELAPSED
447            .with_label_values(&["edit_region"])
448            .start_timer();
449
450        ensure!(
451            is_valid_region_edit(&edit),
452            InvalidRequestSnafu {
453                region_id,
454                reason: "invalid region edit"
455            }
456        );
457
458        let (tx, rx) = oneshot::channel();
459        let request = WorkerRequest::EditRegion(RegionEditRequest::new(region_id, edit, true, tx));
460        self.inner
461            .workers
462            .submit_to_worker(region_id, request)
463            .await?;
464        rx.await.context(RecvSnafu)?
465    }
466
467    /// Handles copy region from request.
468    ///
469    /// This method is only supported for internal use and is not exposed in the trait implementation.
470    pub async fn copy_region_from(
471        &self,
472        region_id: RegionId,
473        request: MitoCopyRegionFromRequest,
474    ) -> Result<MitoCopyRegionFromResponse> {
475        self.inner.copy_region_from(region_id, request).await
476    }
477
478    #[cfg(test)]
479    pub(crate) fn get_region(&self, id: RegionId) -> Option<crate::region::MitoRegionRef> {
480        self.find_region(id)
481    }
482
483    pub fn find_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
484        self.inner.workers.get_region(region_id)
485    }
486
487    /// Returns all regions.
488    pub fn regions(&self) -> Vec<MitoRegionRef> {
489        self.inner.workers.all_regions().collect()
490    }
491
492    fn encode_manifest_info_to_extensions(
493        region_id: &RegionId,
494        manifest_info: RegionManifestInfo,
495        extensions: &mut HashMap<String, Vec<u8>>,
496    ) -> Result<()> {
497        let region_manifest_info = vec![(*region_id, manifest_info)];
498
499        extensions.insert(
500            MANIFEST_INFO_EXTENSION_KEY.to_string(),
501            RegionManifestInfo::encode_list(&region_manifest_info).context(SerdeJsonSnafu)?,
502        );
503        info!(
504            "Added manifest info: {:?} to extensions, region_id: {:?}",
505            region_manifest_info, region_id
506        );
507        Ok(())
508    }
509
510    fn encode_column_metadatas_to_extensions(
511        region_id: &RegionId,
512        column_metadatas: Vec<ColumnMetadata>,
513        extensions: &mut HashMap<String, Vec<u8>>,
514    ) -> Result<()> {
515        extensions.insert(
516            TABLE_COLUMN_METADATA_EXTENSION_KEY.to_string(),
517            ColumnMetadata::encode_list(&column_metadatas).context(SerializeColumnMetadataSnafu)?,
518        );
519        info!(
520            "Added column metadatas: {:?} to extensions, region_id: {:?}",
521            column_metadatas, region_id
522        );
523        Ok(())
524    }
525
526    /// Find the current version's memtables and SSTs stats by region_id.
527    /// The stats must be collected in one place one time to ensure data consistency.
528    pub fn find_memtable_and_sst_stats(
529        &self,
530        region_id: RegionId,
531    ) -> Result<(Vec<MemtableStats>, Vec<FileMeta>)> {
532        let region = self
533            .find_region(region_id)
534            .context(RegionNotFoundSnafu { region_id })?;
535
536        let version = region.version();
537        let memtable_stats = version
538            .memtables
539            .list_memtables()
540            .iter()
541            .map(|x| x.stats())
542            .collect::<Vec<_>>();
543
544        let sst_stats = version
545            .ssts
546            .levels()
547            .iter()
548            .flat_map(|level| level.files().map(|x| x.meta_ref()))
549            .cloned()
550            .collect::<Vec<_>>();
551        Ok((memtable_stats, sst_stats))
552    }
553
554    /// Lists all SSTs from the manifest of all regions in the engine.
555    pub async fn all_ssts_from_manifest(&self) -> Vec<ManifestSstEntry> {
556        let node_id = self.inner.workers.file_ref_manager().node_id();
557        let regions = self.inner.workers.all_regions();
558
559        let mut results = Vec::new();
560        for region in regions {
561            let mut entries = region.manifest_sst_entries().await;
562            for e in &mut entries {
563                e.node_id = node_id;
564            }
565            results.extend(entries);
566        }
567
568        results
569    }
570
571    /// Lists metadata about all puffin index targets stored in the engine.
572    pub async fn all_index_metas(&self) -> Vec<PuffinIndexMetaEntry> {
573        let node_id = self.inner.workers.file_ref_manager().node_id();
574        let cache_manager = self.inner.workers.cache_manager();
575        let puffin_metadata_cache = cache_manager.puffin_metadata_cache().cloned();
576        let bloom_filter_cache = cache_manager.bloom_filter_index_cache().cloned();
577        let inverted_index_cache = cache_manager.inverted_index_cache().cloned();
578
579        let mut results = Vec::new();
580
581        for region in self.inner.workers.all_regions() {
582            let manifest_entries = region.manifest_sst_entries().await;
583            let access_layer = region.access_layer.clone();
584            let table_dir = access_layer.table_dir().to_string();
585            let path_type = access_layer.path_type();
586            let object_store = access_layer.object_store().clone();
587            let puffin_factory = access_layer.puffin_manager_factory().clone();
588            let path_factory = RegionFilePathFactory::new(table_dir, path_type);
589
590            let entry_futures = manifest_entries.into_iter().map(|entry| {
591                let object_store = object_store.clone();
592                let path_factory = path_factory.clone();
593                let puffin_factory = puffin_factory.clone();
594                let puffin_metadata_cache = puffin_metadata_cache.clone();
595                let bloom_filter_cache = bloom_filter_cache.clone();
596                let inverted_index_cache = inverted_index_cache.clone();
597
598                async move {
599                    let Some(index_file_path) = entry.index_file_path.as_ref() else {
600                        return Vec::new();
601                    };
602
603                    let index_version = entry.index_version;
604                    let file_id = match FileId::parse_str(&entry.file_id) {
605                        Ok(file_id) => file_id,
606                        Err(err) => {
607                            warn!(
608                                err;
609                                "Failed to parse puffin index file id, table_dir: {}, file_id: {}",
610                                entry.table_dir,
611                                entry.file_id
612                            );
613                            return Vec::new();
614                        }
615                    };
616                    // The index file path is derived from the physical file owner. After
617                    // repartition, `entry.region_id` is only the referring region.
618                    let region_index_id = RegionIndexId::new(
619                        RegionFileId::new(entry.origin_region_id, file_id),
620                        index_version,
621                    );
622                    let context = IndexEntryContext {
623                        table_dir: &entry.table_dir,
624                        index_file_path: index_file_path.as_str(),
625                        region_id: entry.region_id,
626                        table_id: entry.table_id,
627                        region_number: entry.region_number,
628                        region_group: entry.region_group,
629                        region_sequence: entry.region_sequence,
630                        file_id: &entry.file_id,
631                        index_file_size: entry.index_file_size,
632                        node_id,
633                    };
634
635                    let manager = puffin_factory
636                        .build(object_store, path_factory)
637                        .with_puffin_metadata_cache(puffin_metadata_cache);
638
639                    collect_index_entries_from_puffin(
640                        manager,
641                        region_index_id,
642                        context,
643                        bloom_filter_cache,
644                        inverted_index_cache,
645                    )
646                    .await
647                }
648            });
649
650            let mut meta_stream = stream::iter(entry_futures).buffer_unordered(8); // Parallelism is 8.
651            while let Some(mut metas) = meta_stream.next().await {
652                results.append(&mut metas);
653            }
654        }
655
656        results
657    }
658
659    /// Lists region info entries of all regions in the engine.
660    pub async fn all_region_infos(&self) -> Vec<RegionInfoEntry> {
661        let node_id = self.inner.workers.file_ref_manager().node_id();
662        self.inner
663            .workers
664            .all_regions()
665            .map(|region| region.region_info_entry(node_id))
666            .collect()
667    }
668
669    /// Lists all SSTs from the storage layer of all regions in the engine.
670    pub fn all_ssts_from_storage(&self) -> impl Stream<Item = Result<StorageSstEntry>> {
671        let node_id = self.inner.workers.file_ref_manager().node_id();
672        let regions = self.inner.workers.all_regions();
673
674        let mut layers_distinct_table_dirs = HashMap::new();
675        for region in regions {
676            let table_dir = region.access_layer.table_dir();
677            if !layers_distinct_table_dirs.contains_key(table_dir) {
678                layers_distinct_table_dirs
679                    .insert(table_dir.to_string(), region.access_layer.clone());
680            }
681        }
682
683        stream::iter(layers_distinct_table_dirs)
684            .map(|(_, access_layer)| access_layer.storage_sst_entries())
685            .flatten()
686            .map(move |entry| {
687                entry.map(move |mut entry| {
688                    entry.node_id = node_id;
689                    entry
690                })
691            })
692    }
693}
694
695/// Check whether the region edit is valid.
696///
697/// Only adding or removing files to region is considered valid now.
698fn is_valid_region_edit(edit: &RegionEdit) -> bool {
699    (!edit.files_to_add.is_empty() || !edit.files_to_remove.is_empty())
700        && matches!(
701            edit,
702            RegionEdit {
703                files_to_add: _,
704                files_to_remove: _,
705                timestamp_ms: _,
706                compaction_time_window: None,
707                flushed_entry_id: None,
708                flushed_sequence: None,
709                ..
710            }
711        )
712}
713
714/// Inner struct of [MitoEngine].
715struct EngineInner {
716    /// Region workers group.
717    workers: WorkerGroup,
718    /// Config of the engine.
719    config: Arc<MitoConfig>,
720    /// The Wal raw entry reader.
721    wal_raw_entry_reader: Arc<dyn RawEntryReader>,
722    /// Memory tracker for table scans.
723    scan_memory_tracker: QueryMemoryTracker,
724    #[cfg(feature = "enterprise")]
725    extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
726}
727
728type TopicGroupedRegionOpenRequests = HashMap<String, Vec<(RegionId, RegionOpenRequest)>>;
729
730/// Returns requests([TopicGroupedRegionOpenRequests]) grouped by topic and remaining requests.
731fn prepare_batch_open_requests(
732    requests: Vec<(RegionId, RegionOpenRequest)>,
733) -> Result<(
734    TopicGroupedRegionOpenRequests,
735    Vec<(RegionId, RegionOpenRequest)>,
736)> {
737    let mut topic_to_regions: HashMap<String, Vec<(RegionId, RegionOpenRequest)>> = HashMap::new();
738    let mut remaining_regions: Vec<(RegionId, RegionOpenRequest)> = Vec::new();
739    for (region_id, request) in requests {
740        match parse_wal_options(&request.options).context(SerdeJsonSnafu)? {
741            WalOptions::Kafka(options) => {
742                topic_to_regions
743                    .entry(options.topic)
744                    .or_default()
745                    .push((region_id, request));
746            }
747            WalOptions::RaftEngine | WalOptions::Noop => {
748                remaining_regions.push((region_id, request));
749            }
750        }
751    }
752
753    Ok((topic_to_regions, remaining_regions))
754}
755
756impl EngineInner {
757    #[cfg(feature = "enterprise")]
758    #[must_use]
759    fn with_extension_range_provider_factory(
760        self,
761        extension_range_provider_factory: Option<BoxedExtensionRangeProviderFactory>,
762    ) -> Self {
763        Self {
764            extension_range_provider_factory,
765            ..self
766        }
767    }
768
769    /// Stop the inner engine.
770    async fn stop(&self) -> Result<()> {
771        self.workers.stop().await
772    }
773
774    fn find_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
775        self.workers
776            .get_region(region_id)
777            .context(RegionNotFoundSnafu { region_id })
778    }
779
780    /// Get metadata of a region.
781    ///
782    /// Returns error if the region doesn't exist.
783    fn get_metadata(&self, region_id: RegionId) -> Result<RegionMetadataRef> {
784        // Reading a region doesn't need to go through the region worker thread.
785        let region = self.find_region(region_id)?;
786        Ok(region.metadata())
787    }
788
789    async fn open_topic_regions(
790        &self,
791        topic: String,
792        region_requests: Vec<(RegionId, RegionOpenRequest)>,
793    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
794        let now = Instant::now();
795        let region_ids = region_requests
796            .iter()
797            .map(|(region_id, _)| *region_id)
798            .collect::<Vec<_>>();
799        let provider = Provider::kafka_provider(topic);
800        let (distributor, entry_receivers) = build_wal_entry_distributor_and_receivers(
801            provider.clone(),
802            self.wal_raw_entry_reader.clone(),
803            &region_ids,
804            DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE,
805        );
806
807        let mut responses = Vec::with_capacity(region_requests.len());
808        for ((region_id, request), entry_receiver) in
809            region_requests.into_iter().zip(entry_receivers)
810        {
811            let (request, receiver) =
812                WorkerRequest::new_open_region_request(region_id, request, Some(entry_receiver));
813            self.workers.submit_to_worker(region_id, request).await?;
814            responses.push(async move { receiver.await.context(RecvSnafu)? });
815        }
816
817        // Waits for entries distribution.
818        let distribution =
819            common_runtime::spawn_global(async move { distributor.distribute().await });
820        // Waits for worker returns.
821        let responses = join_all(responses).await;
822        distribution.await.context(JoinSnafu)??;
823
824        let num_failure = responses.iter().filter(|r| r.is_err()).count();
825        info!(
826            "Opened {} regions for topic '{}', failures: {}, elapsed: {:?}",
827            region_ids.len() - num_failure,
828            // Safety: provider is kafka provider.
829            provider.as_kafka_provider().unwrap(),
830            num_failure,
831            now.elapsed(),
832        );
833        Ok(region_ids.into_iter().zip(responses).collect())
834    }
835
836    async fn handle_batch_open_requests(
837        &self,
838        parallelism: usize,
839        requests: Vec<(RegionId, RegionOpenRequest)>,
840    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
841        let semaphore = Arc::new(Semaphore::new(parallelism));
842        let (topic_to_region_requests, remaining_region_requests) =
843            prepare_batch_open_requests(requests)?;
844        let mut responses =
845            Vec::with_capacity(topic_to_region_requests.len() + remaining_region_requests.len());
846
847        if !topic_to_region_requests.is_empty() {
848            let mut tasks = Vec::with_capacity(topic_to_region_requests.len());
849            for (topic, region_requests) in topic_to_region_requests {
850                let semaphore_moved = semaphore.clone();
851                tasks.push(async move {
852                    // Safety: semaphore must exist
853                    let _permit = semaphore_moved.acquire().await.unwrap();
854                    self.open_topic_regions(topic, region_requests).await
855                })
856            }
857            let r = try_join_all(tasks).await?;
858            responses.extend(r.into_iter().flatten());
859        }
860
861        if !remaining_region_requests.is_empty() {
862            let mut tasks = Vec::with_capacity(remaining_region_requests.len());
863            let mut region_ids = Vec::with_capacity(remaining_region_requests.len());
864            for (region_id, request) in remaining_region_requests {
865                let semaphore_moved = semaphore.clone();
866                region_ids.push(region_id);
867                tasks.push(async move {
868                    // Safety: semaphore must exist
869                    let _permit = semaphore_moved.acquire().await.unwrap();
870                    let (request, receiver) =
871                        WorkerRequest::new_open_region_request(region_id, request, None);
872
873                    self.workers.submit_to_worker(region_id, request).await?;
874
875                    receiver.await.context(RecvSnafu)?
876                })
877            }
878
879            let results = join_all(tasks).await;
880            responses.extend(region_ids.into_iter().zip(results));
881        }
882
883        Ok(responses)
884    }
885
886    async fn catchup_topic_regions(
887        &self,
888        provider: Provider,
889        region_requests: Vec<(RegionId, RegionCatchupRequest)>,
890    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
891        let now = Instant::now();
892        let region_ids = region_requests
893            .iter()
894            .map(|(region_id, _)| *region_id)
895            .collect::<Vec<_>>();
896        let (distributor, entry_receivers) = build_wal_entry_distributor_and_receivers(
897            provider.clone(),
898            self.wal_raw_entry_reader.clone(),
899            &region_ids,
900            DEFAULT_ENTRY_RECEIVER_BUFFER_SIZE,
901        );
902
903        let mut responses = Vec::with_capacity(region_requests.len());
904        for ((region_id, request), entry_receiver) in
905            region_requests.into_iter().zip(entry_receivers)
906        {
907            let (request, receiver) =
908                WorkerRequest::new_catchup_region_request(region_id, request, Some(entry_receiver));
909            self.workers.submit_to_worker(region_id, request).await?;
910            responses.push(async move { receiver.await.context(RecvSnafu)? });
911        }
912
913        // Wait for entries distribution.
914        let distribution =
915            common_runtime::spawn_global(async move { distributor.distribute().await });
916        // Wait for worker returns.
917        let responses = join_all(responses).await;
918        distribution.await.context(JoinSnafu)??;
919
920        let num_failure = responses.iter().filter(|r| r.is_err()).count();
921        info!(
922            "Caught up {} regions for topic '{}', failures: {}, elapsed: {:?}",
923            region_ids.len() - num_failure,
924            // Safety: provider is kafka provider.
925            provider.as_kafka_provider().unwrap(),
926            num_failure,
927            now.elapsed(),
928        );
929
930        Ok(region_ids.into_iter().zip(responses).collect())
931    }
932
933    async fn handle_batch_catchup_requests(
934        &self,
935        parallelism: usize,
936        requests: Vec<(RegionId, RegionCatchupRequest)>,
937    ) -> Result<Vec<(RegionId, Result<AffectedRows>)>> {
938        let mut responses = Vec::with_capacity(requests.len());
939        let mut topic_regions: HashMap<Arc<KafkaProvider>, Vec<_>> = HashMap::new();
940        let mut remaining_region_requests = vec![];
941
942        for (region_id, request) in requests {
943            match self.workers.get_region(region_id) {
944                Some(region) => match region.provider.as_kafka_provider() {
945                    Some(provider) => {
946                        topic_regions
947                            .entry(provider.clone())
948                            .or_default()
949                            .push((region_id, request));
950                    }
951                    None => {
952                        remaining_region_requests.push((region_id, request));
953                    }
954                },
955                None => responses.push((region_id, RegionNotFoundSnafu { region_id }.fail())),
956            }
957        }
958
959        let semaphore = Arc::new(Semaphore::new(parallelism));
960
961        if !topic_regions.is_empty() {
962            let mut tasks = Vec::with_capacity(topic_regions.len());
963            for (provider, region_requests) in topic_regions {
964                let semaphore_moved = semaphore.clone();
965                tasks.push(async move {
966                    // Safety: semaphore must exist
967                    let _permit = semaphore_moved.acquire().await.unwrap();
968                    self.catchup_topic_regions(Provider::Kafka(provider), region_requests)
969                        .await
970                })
971            }
972
973            let r = try_join_all(tasks).await?;
974            responses.extend(r.into_iter().flatten());
975        }
976
977        if !remaining_region_requests.is_empty() {
978            let mut tasks = Vec::with_capacity(remaining_region_requests.len());
979            let mut region_ids = Vec::with_capacity(remaining_region_requests.len());
980            for (region_id, request) in remaining_region_requests {
981                let semaphore_moved = semaphore.clone();
982                region_ids.push(region_id);
983                tasks.push(async move {
984                    // Safety: semaphore must exist
985                    let _permit = semaphore_moved.acquire().await.unwrap();
986                    let (request, receiver) =
987                        WorkerRequest::new_catchup_region_request(region_id, request, None);
988
989                    self.workers.submit_to_worker(region_id, request).await?;
990
991                    receiver.await.context(RecvSnafu)?
992                })
993            }
994
995            let results = join_all(tasks).await;
996            responses.extend(region_ids.into_iter().zip(results));
997        }
998
999        Ok(responses)
1000    }
1001
1002    /// Handles [RegionRequest] and return its executed result.
1003    async fn handle_request(
1004        &self,
1005        region_id: RegionId,
1006        request: RegionRequest,
1007    ) -> Result<AffectedRows> {
1008        let region_metadata = self.get_metadata(region_id).ok();
1009        let (request, receiver) =
1010            WorkerRequest::try_from_region_request(region_id, request, region_metadata)?;
1011        self.workers.submit_to_worker(region_id, request).await?;
1012
1013        receiver.await.context(RecvSnafu)?
1014    }
1015
1016    /// Returns the sequence of latest committed data.
1017    fn get_committed_sequence(&self, region_id: RegionId) -> Result<SequenceNumber> {
1018        // Reading a region doesn't need to go through the region worker thread.
1019        self.find_region(region_id)
1020            .map(|r| r.find_committed_sequence())
1021    }
1022
1023    /// Handles the scan `request` and returns a [ScanRegion].
1024    #[tracing::instrument(skip_all, fields(region_id = %region_id))]
1025    fn scan_region(&self, region_id: RegionId, mut request: ScanRequest) -> Result<ScanRegion> {
1026        let query_start = Instant::now();
1027        // Reading a region doesn't need to go through the region worker thread.
1028        let region = self.find_region(region_id)?;
1029        let version_data = region.version_control.current();
1030        let version = version_data.version;
1031
1032        if request.snapshot_on_scan && request.memtable_max_sequence.is_none() {
1033            request.memtable_max_sequence = Some(version_data.committed_sequence);
1034        }
1035
1036        if let Some(given_seq) = request.memtable_min_sequence {
1037            let min_readable_seq = version.flushed_sequence;
1038            ensure!(
1039                given_seq >= min_readable_seq,
1040                IncrementalQueryStaleSnafu {
1041                    region_id,
1042                    given_seq,
1043                    min_readable_seq,
1044                }
1045            );
1046        }
1047
1048        // Get cache.
1049        let cache_manager = self.workers.cache_manager();
1050
1051        let scan_region = ScanRegion::new(
1052            version,
1053            region.access_layer.clone(),
1054            request,
1055            CacheStrategy::EnableAll(cache_manager),
1056        )
1057        .with_max_concurrent_scan_files(self.config.max_concurrent_scan_files)
1058        .with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled())
1059        .with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled())
1060        .with_ignore_bloom_filter(self.config.bloom_filter_index.apply_on_query.disabled())
1061        .with_start_time(query_start);
1062
1063        #[cfg(feature = "enterprise")]
1064        let scan_region = self.maybe_fill_extension_range_provider(scan_region, region);
1065
1066        Ok(scan_region)
1067    }
1068
1069    #[cfg(feature = "enterprise")]
1070    fn maybe_fill_extension_range_provider(
1071        &self,
1072        mut scan_region: ScanRegion,
1073        region: MitoRegionRef,
1074    ) -> ScanRegion {
1075        if region.is_follower()
1076            && let Some(factory) = self.extension_range_provider_factory.as_ref()
1077        {
1078            scan_region
1079                .set_extension_range_provider(factory.create_extension_range_provider(region));
1080        }
1081        scan_region
1082    }
1083
1084    /// Converts the [`RegionRole`].
1085    fn set_region_role(&self, region_id: RegionId, role: RegionRole) -> Result<()> {
1086        let region = self.find_region(region_id)?;
1087        region.set_role(role);
1088        Ok(())
1089    }
1090
1091    /// Sets read-only for a region and ensures no more writes in the region after it returns.
1092    async fn set_region_role_state_gracefully(
1093        &self,
1094        region_id: RegionId,
1095        region_role_state: SettableRegionRoleState,
1096    ) -> Result<SetRegionRoleStateResponse> {
1097        // Notes: It acquires the mutable ownership to ensure no other threads,
1098        // Therefore, we submit it to the worker.
1099        let (request, receiver) =
1100            WorkerRequest::new_set_readonly_gracefully(region_id, region_role_state);
1101        self.workers.submit_to_worker(region_id, request).await?;
1102
1103        receiver.await.context(RecvSnafu)
1104    }
1105
1106    async fn sync_region(
1107        &self,
1108        region_id: RegionId,
1109        manifest_info: RegionManifestInfo,
1110    ) -> Result<(ManifestVersion, bool)> {
1111        ensure!(manifest_info.is_mito(), MitoManifestInfoSnafu);
1112        let manifest_version = manifest_info.data_manifest_version();
1113        let (request, receiver) =
1114            WorkerRequest::new_sync_region_request(region_id, manifest_version);
1115        self.workers.submit_to_worker(region_id, request).await?;
1116
1117        receiver.await.context(RecvSnafu)?
1118    }
1119
1120    async fn remap_manifests(
1121        &self,
1122        request: RemapManifestsRequest,
1123    ) -> Result<RemapManifestsResponse> {
1124        let region_id = request.region_id;
1125        let (request, receiver) = WorkerRequest::try_from_remap_manifests_request(request)?;
1126        self.workers.submit_to_worker(region_id, request).await?;
1127        let manifest_paths = receiver.await.context(RecvSnafu)??;
1128        Ok(RemapManifestsResponse { manifest_paths })
1129    }
1130
1131    async fn copy_region_from(
1132        &self,
1133        region_id: RegionId,
1134        request: MitoCopyRegionFromRequest,
1135    ) -> Result<MitoCopyRegionFromResponse> {
1136        let (request, receiver) =
1137            WorkerRequest::try_from_copy_region_from_request(region_id, request)?;
1138        self.workers.submit_to_worker(region_id, request).await?;
1139        let response = receiver.await.context(RecvSnafu)??;
1140        Ok(response)
1141    }
1142
1143    fn role(&self, region_id: RegionId) -> Option<RegionRole> {
1144        self.workers
1145            .get_region(region_id)
1146            .map(|region| region.region_role())
1147    }
1148}
1149
1150fn map_batch_responses(responses: Vec<(RegionId, Result<AffectedRows>)>) -> BatchResponses {
1151    responses
1152        .into_iter()
1153        .map(|(region_id, response)| {
1154            (
1155                region_id,
1156                response.map(RegionResponse::new).map_err(BoxedError::new),
1157            )
1158        })
1159        .collect()
1160}
1161
1162#[async_trait]
1163impl RegionEngine for MitoEngine {
1164    fn name(&self) -> &str {
1165        MITO_ENGINE_NAME
1166    }
1167
1168    #[tracing::instrument(skip_all)]
1169    async fn handle_batch_open_requests(
1170        &self,
1171        parallelism: usize,
1172        requests: Vec<(RegionId, RegionOpenRequest)>,
1173    ) -> Result<BatchResponses, BoxedError> {
1174        // TODO(weny): add metrics.
1175        self.inner
1176            .handle_batch_open_requests(parallelism, requests)
1177            .await
1178            .map(map_batch_responses)
1179            .map_err(BoxedError::new)
1180    }
1181
1182    #[tracing::instrument(skip_all)]
1183    async fn handle_batch_catchup_requests(
1184        &self,
1185        parallelism: usize,
1186        requests: Vec<(RegionId, RegionCatchupRequest)>,
1187    ) -> Result<BatchResponses, BoxedError> {
1188        self.inner
1189            .handle_batch_catchup_requests(parallelism, requests)
1190            .await
1191            .map(map_batch_responses)
1192            .map_err(BoxedError::new)
1193    }
1194
1195    #[tracing::instrument(skip_all)]
1196    async fn handle_request(
1197        &self,
1198        region_id: RegionId,
1199        request: RegionRequest,
1200    ) -> Result<RegionResponse, BoxedError> {
1201        let _timer = HANDLE_REQUEST_ELAPSED
1202            .with_label_values(&[request.request_type()])
1203            .start_timer();
1204
1205        let is_alter = matches!(request, RegionRequest::Alter(_));
1206        let is_create = matches!(request, RegionRequest::Create(_));
1207        let mut response = self
1208            .inner
1209            .handle_request(region_id, request)
1210            .await
1211            .map(RegionResponse::new)
1212            .map_err(BoxedError::new)?;
1213
1214        if is_alter {
1215            self.handle_alter_response(region_id, &mut response)
1216                .map_err(BoxedError::new)?;
1217        } else if is_create {
1218            self.handle_create_response(region_id, &mut response)
1219                .map_err(BoxedError::new)?;
1220        }
1221
1222        Ok(response)
1223    }
1224
1225    #[tracing::instrument(skip_all)]
1226    async fn handle_query(
1227        &self,
1228        region_id: RegionId,
1229        request: ScanRequest,
1230    ) -> Result<RegionScannerRef, BoxedError> {
1231        self.scan_region(region_id, request)
1232            .map_err(BoxedError::new)?
1233            .region_scanner()
1234            .await
1235            .map_err(BoxedError::new)
1236    }
1237
1238    fn query_memory_tracker(&self) -> Option<QueryMemoryTracker> {
1239        Some(self.inner.scan_memory_tracker.clone())
1240    }
1241
1242    async fn get_committed_sequence(
1243        &self,
1244        region_id: RegionId,
1245    ) -> Result<SequenceNumber, BoxedError> {
1246        self.inner
1247            .get_committed_sequence(region_id)
1248            .map_err(BoxedError::new)
1249    }
1250
1251    /// Retrieve region's metadata.
1252    async fn get_metadata(
1253        &self,
1254        region_id: RegionId,
1255    ) -> std::result::Result<RegionMetadataRef, BoxedError> {
1256        self.inner.get_metadata(region_id).map_err(BoxedError::new)
1257    }
1258
1259    /// Stop the engine.
1260    ///
1261    /// Stopping the engine doesn't stop the underlying log store as other components might
1262    /// still use it. (When no other components are referencing the log store, it will
1263    /// automatically shutdown.)
1264    async fn stop(&self) -> std::result::Result<(), BoxedError> {
1265        self.inner.stop().await.map_err(BoxedError::new)
1266    }
1267
1268    fn region_statistic(&self, region_id: RegionId) -> Option<RegionStatistic> {
1269        self.get_region_statistic(region_id)
1270    }
1271
1272    fn set_region_role(&self, region_id: RegionId, role: RegionRole) -> Result<(), BoxedError> {
1273        self.inner
1274            .set_region_role(region_id, role)
1275            .map_err(BoxedError::new)
1276    }
1277
1278    async fn set_region_role_state_gracefully(
1279        &self,
1280        region_id: RegionId,
1281        region_role_state: SettableRegionRoleState,
1282    ) -> Result<SetRegionRoleStateResponse, BoxedError> {
1283        let _timer = HANDLE_REQUEST_ELAPSED
1284            .with_label_values(&["set_region_role_state_gracefully"])
1285            .start_timer();
1286
1287        self.inner
1288            .set_region_role_state_gracefully(region_id, region_role_state)
1289            .await
1290            .map_err(BoxedError::new)
1291    }
1292
1293    async fn sync_region(
1294        &self,
1295        region_id: RegionId,
1296        request: SyncRegionFromRequest,
1297    ) -> Result<SyncRegionFromResponse, BoxedError> {
1298        let manifest_info = request
1299            .into_region_manifest_info()
1300            .context(UnexpectedSnafu {
1301                err_msg: "Expected a manifest info request",
1302            })
1303            .map_err(BoxedError::new)?;
1304        let (_, synced) = self
1305            .inner
1306            .sync_region(region_id, manifest_info)
1307            .await
1308            .map_err(BoxedError::new)?;
1309
1310        Ok(SyncRegionFromResponse::Mito { synced })
1311    }
1312
1313    async fn remap_manifests(
1314        &self,
1315        request: RemapManifestsRequest,
1316    ) -> Result<RemapManifestsResponse, BoxedError> {
1317        self.inner
1318            .remap_manifests(request)
1319            .await
1320            .map_err(BoxedError::new)
1321    }
1322
1323    fn role(&self, region_id: RegionId) -> Option<RegionRole> {
1324        self.inner.role(region_id)
1325    }
1326
1327    fn as_any(&self) -> &dyn Any {
1328        self
1329    }
1330}
1331
1332impl MitoEngine {
1333    fn handle_alter_response(
1334        &self,
1335        region_id: RegionId,
1336        response: &mut RegionResponse,
1337    ) -> Result<()> {
1338        if let Some(statistic) = self.region_statistic(region_id) {
1339            Self::encode_manifest_info_to_extensions(
1340                &region_id,
1341                statistic.manifest,
1342                &mut response.extensions,
1343            )?;
1344        }
1345        let column_metadatas = self
1346            .inner
1347            .find_region(region_id)
1348            .ok()
1349            .map(|r| r.metadata().column_metadatas.clone());
1350        if let Some(column_metadatas) = column_metadatas {
1351            Self::encode_column_metadatas_to_extensions(
1352                &region_id,
1353                column_metadatas,
1354                &mut response.extensions,
1355            )?;
1356        }
1357        Ok(())
1358    }
1359
1360    fn handle_create_response(
1361        &self,
1362        region_id: RegionId,
1363        response: &mut RegionResponse,
1364    ) -> Result<()> {
1365        let column_metadatas = self
1366            .inner
1367            .find_region(region_id)
1368            .ok()
1369            .map(|r| r.metadata().column_metadatas.clone());
1370        if let Some(column_metadatas) = column_metadatas {
1371            Self::encode_column_metadatas_to_extensions(
1372                &region_id,
1373                column_metadatas,
1374                &mut response.extensions,
1375            )?;
1376        }
1377        Ok(())
1378    }
1379}
1380
1381// Tests methods.
1382#[cfg(any(test, feature = "test"))]
1383#[allow(clippy::too_many_arguments)]
1384impl MitoEngine {
1385    /// Returns a new [MitoEngine] for tests.
1386    pub async fn new_for_test<S: LogStore>(
1387        data_home: &str,
1388        mut config: MitoConfig,
1389        log_store: Arc<S>,
1390        object_store_manager: ObjectStoreManagerRef,
1391        write_buffer_manager: Option<crate::flush::WriteBufferManagerRef>,
1392        listener: Option<crate::engine::listener::EventListenerRef>,
1393        time_provider: crate::time_provider::TimeProviderRef,
1394        schema_metadata_manager: SchemaMetadataManagerRef,
1395        file_ref_manager: FileReferenceManagerRef,
1396        partition_expr_fetcher: PartitionExprFetcherRef,
1397    ) -> Result<MitoEngine> {
1398        config.sanitize(data_home)?;
1399
1400        let config = Arc::new(config);
1401        let wal_raw_entry_reader = Arc::new(LogStoreRawEntryReader::new(log_store.clone()));
1402        let total_memory = get_total_memory_bytes().max(0) as u64;
1403        let scan_memory_limit = config.scan_memory_limit.resolve(total_memory) as usize;
1404        let scan_memory_tracker =
1405            QueryMemoryTracker::builder(scan_memory_limit, config.scan_memory_on_exhausted)
1406                .on_update(|usage| {
1407                    SCAN_MEMORY_USAGE_BYTES.set(usage as i64);
1408                })
1409                .on_exhausted(|| {
1410                    SCAN_MEMORY_EXHAUSTED_TOTAL.inc();
1411                })
1412                .on_reject(|| {
1413                    SCAN_REQUESTS_REJECTED_TOTAL.inc();
1414                })
1415                .build();
1416        Ok(MitoEngine {
1417            inner: Arc::new(EngineInner {
1418                workers: WorkerGroup::start_for_test(
1419                    config.clone(),
1420                    log_store,
1421                    object_store_manager,
1422                    write_buffer_manager,
1423                    listener,
1424                    schema_metadata_manager,
1425                    file_ref_manager,
1426                    time_provider,
1427                    partition_expr_fetcher,
1428                )
1429                .await?,
1430                config,
1431                wal_raw_entry_reader,
1432                scan_memory_tracker,
1433                #[cfg(feature = "enterprise")]
1434                extension_range_provider_factory: None,
1435            }),
1436        })
1437    }
1438
1439    /// Returns the purge scheduler.
1440    pub fn purge_scheduler(&self) -> &crate::schedule::scheduler::SchedulerRef {
1441        self.inner.workers.purge_scheduler()
1442    }
1443}
1444
1445#[cfg(test)]
1446mod tests {
1447    use std::time::Duration;
1448
1449    use super::*;
1450    use crate::sst::file::FileMeta;
1451
1452    #[test]
1453    fn test_is_valid_region_edit() {
1454        // Valid: has only "files_to_add"
1455        let edit = RegionEdit {
1456            files_to_add: vec![FileMeta::default()],
1457            files_to_remove: vec![],
1458            timestamp_ms: None,
1459            compaction_time_window: None,
1460            flushed_entry_id: None,
1461            flushed_sequence: None,
1462            committed_sequence: None,
1463        };
1464        assert!(is_valid_region_edit(&edit));
1465
1466        // Invalid: "files_to_add" and "files_to_remove" are both empty
1467        let edit = RegionEdit {
1468            files_to_add: vec![],
1469            files_to_remove: vec![],
1470            timestamp_ms: None,
1471            compaction_time_window: None,
1472            flushed_entry_id: None,
1473            flushed_sequence: None,
1474            committed_sequence: None,
1475        };
1476        assert!(!is_valid_region_edit(&edit));
1477
1478        // Valid: has only "files_to_remove"
1479        let edit = RegionEdit {
1480            files_to_add: vec![],
1481            files_to_remove: vec![FileMeta::default()],
1482            timestamp_ms: None,
1483            compaction_time_window: None,
1484            flushed_entry_id: None,
1485            flushed_sequence: None,
1486            committed_sequence: None,
1487        };
1488        assert!(is_valid_region_edit(&edit));
1489
1490        // Valid: both "files_to_add" and "files_to_remove" are not empty
1491        let edit = RegionEdit {
1492            files_to_add: vec![FileMeta::default()],
1493            files_to_remove: vec![FileMeta::default()],
1494            timestamp_ms: None,
1495            compaction_time_window: None,
1496            flushed_entry_id: None,
1497            flushed_sequence: None,
1498            committed_sequence: None,
1499        };
1500        assert!(is_valid_region_edit(&edit));
1501
1502        // Invalid: other fields are not all "None"s
1503        let edit = RegionEdit {
1504            files_to_add: vec![FileMeta::default()],
1505            files_to_remove: vec![],
1506            timestamp_ms: None,
1507            compaction_time_window: Some(Duration::from_secs(1)),
1508            flushed_entry_id: None,
1509            flushed_sequence: None,
1510            committed_sequence: None,
1511        };
1512        assert!(!is_valid_region_edit(&edit));
1513        let edit = RegionEdit {
1514            files_to_add: vec![FileMeta::default()],
1515            files_to_remove: vec![],
1516            timestamp_ms: None,
1517            compaction_time_window: None,
1518            flushed_entry_id: Some(1),
1519            flushed_sequence: None,
1520            committed_sequence: None,
1521        };
1522        assert!(!is_valid_region_edit(&edit));
1523        let edit = RegionEdit {
1524            files_to_add: vec![FileMeta::default()],
1525            files_to_remove: vec![],
1526            timestamp_ms: None,
1527            compaction_time_window: None,
1528            flushed_entry_id: None,
1529            flushed_sequence: Some(1),
1530            committed_sequence: None,
1531        };
1532        assert!(!is_valid_region_edit(&edit));
1533    }
1534}