Skip to main content

mito2/
region.rs

1// Copyright 2023 Greptime Team
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Mito region.
16
17pub mod catchup;
18pub mod opener;
19pub mod options;
20pub mod utils;
21pub(crate) mod version;
22
23use std::collections::hash_map::Entry;
24use std::collections::{HashMap, HashSet};
25use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
26use std::sync::{Arc, Mutex, RwLock};
27
28use common_base::hash::partition_expr_version;
29use common_telemetry::{error, info, warn};
30use crossbeam_utils::atomic::AtomicCell;
31use partition::expr::PartitionExpr;
32use snafu::{OptionExt, ResultExt, ensure};
33use store_api::ManifestVersion;
34use store_api::codec::PrimaryKeyEncoding;
35use store_api::logstore::provider::Provider;
36use store_api::metadata::RegionMetadataRef;
37use store_api::region_engine::{
38    RegionManifestInfo, RegionRole, RegionStatistic, SettableRegionRoleState,
39};
40use store_api::region_request::{PathType, StagingPartitionDirective};
41use store_api::sst_entry::ManifestSstEntry;
42use store_api::storage::{FileId, RegionId, SequenceNumber};
43use tokio::sync::RwLockWriteGuard;
44pub use utils::*;
45
46use crate::access_layer::AccessLayerRef;
47use crate::error::{
48    InvalidPartitionExprSnafu, RegionNotFoundSnafu, RegionStateSnafu, RegionTruncatedSnafu, Result,
49    UnexpectedSnafu, UpdateManifestSnafu,
50};
51use crate::manifest::action::{
52    RegionChange, RegionManifest, RegionMetaAction, RegionMetaActionList,
53};
54use crate::manifest::manager::RegionManifestManager;
55use crate::region::version::{VersionControlRef, VersionRef};
56use crate::request::{OnFailure, OptionOutputTx};
57use crate::sst::file::FileMeta;
58use crate::sst::file_purger::FilePurgerRef;
59use crate::sst::location::{index_file_path, sst_file_path};
60use crate::time_provider::TimeProviderRef;
61
62/// This is the approximate factor to estimate the size of wal.
63const ESTIMATED_WAL_FACTOR: f32 = 0.42825;
64
65/// Region status include region id, memtable usage, sst usage, wal usage and manifest usage.
66#[derive(Debug)]
67pub struct RegionUsage {
68    pub region_id: RegionId,
69    pub wal_usage: u64,
70    pub sst_usage: u64,
71    pub manifest_usage: u64,
72}
73
74impl RegionUsage {
75    pub fn disk_usage(&self) -> u64 {
76        self.wal_usage + self.sst_usage + self.manifest_usage
77    }
78}
79
80#[derive(Debug, Clone, Copy, PartialEq, Eq)]
81pub enum RegionLeaderState {
82    /// The region is opened and is writable.
83    Writable,
84    /// The region is in staging mode - writable but no checkpoint/compaction.
85    Staging,
86    /// The region is entering staging mode. - write requests will be stalled.
87    EnteringStaging,
88    /// The region is altering.
89    Altering,
90    /// The region is dropping.
91    Dropping,
92    /// The region is truncating.
93    Truncating,
94    /// The region is handling a region edit.
95    Editing,
96    /// The region is stepping down.
97    Downgrading,
98}
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum RegionRoleState {
102    Leader(RegionLeaderState),
103    Follower,
104}
105
106impl RegionRoleState {
107    /// Converts the region role state to leader state if it is a leader state.
108    pub fn into_leader_state(self) -> Option<RegionLeaderState> {
109        match self {
110            RegionRoleState::Leader(leader_state) => Some(leader_state),
111            RegionRoleState::Follower => None,
112        }
113    }
114}
115
116/// Metadata and runtime status of a region.
117///
118/// Writing and reading a region follow a single-writer-multi-reader rule:
119/// - Only the region worker thread this region belongs to can modify the metadata.
120/// - Multiple reader threads are allowed to read a specific `version` of a region.
121#[derive(Debug)]
122pub struct MitoRegion {
123    /// Id of this region.
124    ///
125    /// Accessing region id from the version control is inconvenient so
126    /// we also store it here.
127    pub(crate) region_id: RegionId,
128
129    /// Version controller for this region.
130    ///
131    /// We MUST update the version control inside the write lock of the region manifest manager.
132    pub(crate) version_control: VersionControlRef,
133    /// SSTs accessor for this region.
134    pub(crate) access_layer: AccessLayerRef,
135    /// Context to maintain manifest for this region.
136    pub(crate) manifest_ctx: ManifestContextRef,
137    /// SST file purger.
138    pub(crate) file_purger: FilePurgerRef,
139    /// The provider of log store.
140    pub(crate) provider: Provider,
141    /// Last flush time in millis.
142    last_flush_millis: AtomicI64,
143    /// Last compaction time in millis.
144    last_compaction_millis: AtomicI64,
145    /// Provider to get current time.
146    time_provider: TimeProviderRef,
147    /// The topic's latest entry id since the region's last flushing.
148    /// **Only used for remote WAL pruning.**
149    ///
150    /// The value will be updated to the latest offset of the topic
151    /// if region receives a flush request or schedules a periodic flush task
152    /// and the region's memtable is empty.
153    ///
154    /// There are no WAL entries in range [flushed_entry_id, topic_latest_entry_id] for current region,
155    /// which means these WAL entries maybe able to be pruned up to `topic_latest_entry_id`.
156    pub(crate) topic_latest_entry_id: AtomicU64,
157    /// The total bytes written to the region.
158    pub(crate) written_bytes: Arc<AtomicU64>,
159    /// manifest stats
160    stats: ManifestStats,
161}
162
163pub type MitoRegionRef = Arc<MitoRegion>;
164
165#[derive(Debug, Clone)]
166pub(crate) struct StagingPartitionInfo {
167    pub(crate) partition_directive: StagingPartitionDirective,
168    pub(crate) partition_rule_version: u64,
169}
170
171impl StagingPartitionInfo {
172    /// Returns the partition expression carried by the staging directive, if any.
173    pub(crate) fn partition_expr(&self) -> Option<&str> {
174        self.partition_directive.partition_expr()
175    }
176
177    /// Builds staging partition info from a directive and derives its version marker.
178    pub(crate) fn from_partition_directive(partition_directive: StagingPartitionDirective) -> Self {
179        let partition_rule_version = match &partition_directive {
180            StagingPartitionDirective::UpdatePartitionExpr(expr) => {
181                partition_expr_version(Some(expr))
182            }
183            StagingPartitionDirective::RejectAllWrites => 0,
184        };
185        Self {
186            partition_directive,
187            partition_rule_version,
188        }
189    }
190}
191
192impl MitoRegion {
193    /// Stop background managers for this region.
194    pub(crate) async fn stop(&self) {
195        self.manifest_ctx
196            .manifest_manager
197            .write()
198            .await
199            .stop()
200            .await;
201
202        info!(
203            "Stopped region manifest manager, region_id: {}",
204            self.region_id
205        );
206    }
207
208    /// Returns current metadata of the region.
209    pub fn metadata(&self) -> RegionMetadataRef {
210        let version_data = self.version_control.current();
211        version_data.version.metadata.clone()
212    }
213
214    /// Returns primary key encoding of the region.
215    pub(crate) fn primary_key_encoding(&self) -> PrimaryKeyEncoding {
216        let version_data = self.version_control.current();
217        version_data.version.metadata.primary_key_encoding
218    }
219
220    /// Returns current version of the region.
221    pub(crate) fn version(&self) -> VersionRef {
222        let version_data = self.version_control.current();
223        version_data.version
224    }
225
226    /// Returns last flush timestamp in millis.
227    pub(crate) fn last_flush_millis(&self) -> i64 {
228        self.last_flush_millis.load(Ordering::Relaxed)
229    }
230
231    /// Update flush time to current time.
232    pub(crate) fn update_flush_millis(&self) {
233        let now = self.time_provider.current_time_millis();
234        self.last_flush_millis.store(now, Ordering::Relaxed);
235    }
236
237    /// Returns last compaction timestamp in millis.
238    pub(crate) fn last_compaction_millis(&self) -> i64 {
239        self.last_compaction_millis.load(Ordering::Relaxed)
240    }
241
242    /// Update compaction time to current time.
243    pub(crate) fn update_compaction_millis(&self) {
244        let now = self.time_provider.current_time_millis();
245        self.last_compaction_millis.store(now, Ordering::Relaxed);
246    }
247
248    /// Returns the table dir.
249    pub(crate) fn table_dir(&self) -> &str {
250        self.access_layer.table_dir()
251    }
252
253    /// Returns the path type of the region.
254    pub(crate) fn path_type(&self) -> PathType {
255        self.access_layer.path_type()
256    }
257
258    /// Returns whether the region is writable.
259    pub(crate) fn is_writable(&self) -> bool {
260        matches!(
261            self.manifest_ctx.state.load(),
262            RegionRoleState::Leader(RegionLeaderState::Writable)
263                | RegionRoleState::Leader(RegionLeaderState::Staging)
264        )
265    }
266
267    /// Returns whether the region is flushable.
268    pub(crate) fn is_flushable(&self) -> bool {
269        matches!(
270            self.manifest_ctx.state.load(),
271            RegionRoleState::Leader(RegionLeaderState::Writable)
272                | RegionRoleState::Leader(RegionLeaderState::Staging)
273                | RegionRoleState::Leader(RegionLeaderState::Downgrading)
274        )
275    }
276
277    /// Returns whether the region should abort index building.
278    pub(crate) fn should_abort_index(&self) -> bool {
279        matches!(
280            self.manifest_ctx.state.load(),
281            RegionRoleState::Follower
282                | RegionRoleState::Leader(RegionLeaderState::Dropping)
283                | RegionRoleState::Leader(RegionLeaderState::Truncating)
284                | RegionRoleState::Leader(RegionLeaderState::Downgrading)
285                | RegionRoleState::Leader(RegionLeaderState::Staging)
286        )
287    }
288
289    /// Returns whether the region is downgrading.
290    pub(crate) fn is_downgrading(&self) -> bool {
291        matches!(
292            self.manifest_ctx.state.load(),
293            RegionRoleState::Leader(RegionLeaderState::Downgrading)
294        )
295    }
296
297    /// Returns whether the region is in staging mode.
298    pub(crate) fn is_staging(&self) -> bool {
299        self.manifest_ctx.state.load() == RegionRoleState::Leader(RegionLeaderState::Staging)
300    }
301
302    /// Returns whether the region is entering staging mode.
303    pub(crate) fn is_enter_staging(&self) -> bool {
304        self.manifest_ctx.state.load()
305            == RegionRoleState::Leader(RegionLeaderState::EnteringStaging)
306    }
307
308    pub fn region_id(&self) -> RegionId {
309        self.region_id
310    }
311
312    pub fn find_committed_sequence(&self) -> SequenceNumber {
313        self.version_control.committed_sequence()
314    }
315
316    /// Returns the latest sequence that has already been persisted into SSTs.
317    ///
318    /// Incremental memtable-only reads must use a cursor greater than or equal to
319    /// this boundary; older cursors are stale because the corresponding updates may
320    /// already have been flushed out of memtables.
321    pub fn flushed_sequence(&self) -> SequenceNumber {
322        self.version_control.current().version.flushed_sequence
323    }
324
325    /// Returns whether the region is readonly.
326    pub fn is_follower(&self) -> bool {
327        self.manifest_ctx.state.load() == RegionRoleState::Follower
328    }
329
330    /// Returns the state of the region.
331    pub(crate) fn state(&self) -> RegionRoleState {
332        self.manifest_ctx.state.load()
333    }
334
335    /// Sets the region role state.
336    pub(crate) fn set_role(&self, next_role: RegionRole) {
337        self.manifest_ctx.set_role(next_role, self.region_id);
338    }
339
340    pub(crate) fn region_role(&self) -> RegionRole {
341        match self.state() {
342            RegionRoleState::Follower => RegionRole::Follower,
343            RegionRoleState::Leader(RegionLeaderState::Staging) => RegionRole::StagingLeader,
344            RegionRoleState::Leader(RegionLeaderState::Downgrading) => {
345                RegionRole::DowngradingLeader
346            }
347            RegionRoleState::Leader(_) => RegionRole::Leader,
348        }
349    }
350
351    /// Sets the altering state.
352    /// You should call this method in the worker loop.
353    pub(crate) fn set_altering(&self) -> Result<()> {
354        self.compare_exchange_state(
355            RegionLeaderState::Writable,
356            RegionRoleState::Leader(RegionLeaderState::Altering),
357        )
358    }
359
360    /// Sets the dropping state.
361    /// You should call this method in the worker loop.
362    pub(crate) fn set_dropping(&self, expect: RegionLeaderState) -> Result<()> {
363        self.compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Dropping))
364    }
365
366    /// Sets the truncating state.
367    /// You should call this method in the worker loop.
368    pub(crate) fn set_truncating(&self) -> Result<()> {
369        self.compare_exchange_state(
370            RegionLeaderState::Writable,
371            RegionRoleState::Leader(RegionLeaderState::Truncating),
372        )
373    }
374
375    /// Sets the editing state.
376    /// You should call this method in the worker loop.
377    pub(crate) fn set_editing(&self, expect: RegionLeaderState) -> Result<()> {
378        self.compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Editing))
379    }
380
381    /// Sets the staging state.
382    ///
383    /// You should call this method in the worker loop.
384    /// Transitions from Writable to Staging state.
385    /// Cleans any existing staging manifests before entering staging mode.
386    pub(crate) async fn set_staging(
387        &self,
388        manager: &mut RwLockWriteGuard<'_, RegionManifestManager>,
389    ) -> Result<()> {
390        manager.store().clear_staging_manifests().await?;
391
392        self.compare_exchange_state(
393            RegionLeaderState::Writable,
394            RegionRoleState::Leader(RegionLeaderState::Staging),
395        )
396    }
397
398    /// Sets the entering staging state.
399    pub(crate) fn set_entering_staging(&self) -> Result<()> {
400        self.compare_exchange_state(
401            RegionLeaderState::Writable,
402            RegionRoleState::Leader(RegionLeaderState::EnteringStaging),
403        )
404    }
405
406    /// Exits the staging state back to writable.
407    ///
408    /// You should call this method in the worker loop.
409    /// Transitions from Staging to Writable state.
410    pub fn exit_staging(&self) -> Result<()> {
411        self.manifest_ctx.exit_staging(
412            self.region_id,
413            RegionRoleState::Leader(RegionLeaderState::Writable),
414        )
415    }
416
417    /// Sets the region role state gracefully. This acquires the manifest write lock.
418    pub(crate) async fn set_role_state_gracefully(
419        &self,
420        state: SettableRegionRoleState,
421    ) -> Result<()> {
422        let mut manager: RwLockWriteGuard<'_, RegionManifestManager> =
423            self.manifest_ctx.manifest_manager.write().await;
424        let current_state = self.state();
425
426        match state {
427            SettableRegionRoleState::Leader => {
428                // Exit staging mode and return to normal writable leader
429                // Only allowed from staging state
430                match current_state {
431                    RegionRoleState::Leader(RegionLeaderState::Staging) => {
432                        info!("Exiting staging mode for region {}", self.region_id);
433                        // Use the success exit path that merges all staged manifests
434                        self.exit_staging_on_success(&mut manager).await?;
435                    }
436                    RegionRoleState::Leader(RegionLeaderState::Writable) => {
437                        // Already in desired state - no-op
438                        info!("Region {} already in normal leader mode", self.region_id);
439                    }
440                    _ => {
441                        // Only staging -> leader transition is allowed
442                        return Err(RegionStateSnafu {
443                            region_id: self.region_id,
444                            state: current_state,
445                            expect: RegionRoleState::Leader(RegionLeaderState::Staging),
446                        }
447                        .build());
448                    }
449                }
450            }
451
452            SettableRegionRoleState::StagingLeader => {
453                // Enter staging mode from normal writable leader
454                // Only allowed from writable leader state
455                match current_state {
456                    RegionRoleState::Leader(RegionLeaderState::Writable) => {
457                        info!("Entering staging mode for region {}", self.region_id);
458                        self.set_staging(&mut manager).await?;
459                    }
460                    RegionRoleState::Leader(RegionLeaderState::Staging) => {
461                        // Already in desired state - no-op
462                        info!("Region {} already in staging mode", self.region_id);
463                    }
464                    _ => {
465                        return Err(RegionStateSnafu {
466                            region_id: self.region_id,
467                            state: current_state,
468                            expect: RegionRoleState::Leader(RegionLeaderState::Writable),
469                        }
470                        .build());
471                    }
472                }
473            }
474
475            SettableRegionRoleState::Follower => {
476                // Make this region a follower
477                match current_state {
478                    RegionRoleState::Leader(RegionLeaderState::Staging) => {
479                        info!(
480                            "Exiting staging and demoting region {} to follower",
481                            self.region_id
482                        );
483                        self.exit_staging()?;
484                        self.set_role(RegionRole::Follower);
485                    }
486                    RegionRoleState::Leader(_) => {
487                        info!("Demoting region {} from leader to follower", self.region_id);
488                        self.set_role(RegionRole::Follower);
489                    }
490                    RegionRoleState::Follower => {
491                        // Already in desired state - no-op
492                        info!("Region {} already in follower mode", self.region_id);
493                    }
494                }
495            }
496
497            SettableRegionRoleState::DowngradingLeader => {
498                // downgrade this region to downgrading leader
499                match current_state {
500                    RegionRoleState::Leader(RegionLeaderState::Staging) => {
501                        info!(
502                            "Exiting staging and entering downgrade for region {}",
503                            self.region_id
504                        );
505                        self.exit_staging()?;
506                        self.set_role(RegionRole::DowngradingLeader);
507                    }
508                    RegionRoleState::Leader(RegionLeaderState::Writable) => {
509                        info!("Starting downgrade for region {}", self.region_id);
510                        self.set_role(RegionRole::DowngradingLeader);
511                    }
512                    RegionRoleState::Leader(RegionLeaderState::Downgrading) => {
513                        // Already in desired state - no-op
514                        info!("Region {} already in downgrading mode", self.region_id);
515                    }
516                    _ => {
517                        warn!(
518                            "Cannot start downgrade for region {} from state {:?}",
519                            self.region_id, current_state
520                        );
521                    }
522                }
523            }
524        }
525
526        // Hack(zhongzc): If we have just become leader (writable), persist any backfilled metadata.
527        if self.state() == RegionRoleState::Leader(RegionLeaderState::Writable) {
528            // Persist backfilled metadata if manifest is missing fields (e.g., partition_expr)
529            let manifest_meta = &manager.manifest().metadata;
530            let current_version = self.version();
531            let current_meta = &current_version.metadata;
532            if manifest_meta.partition_expr.is_none() && current_meta.partition_expr.is_some() {
533                let action = RegionMetaAction::Change(RegionChange {
534                    metadata: current_meta.clone(),
535                    sst_format: current_version.options.sst_format.unwrap_or_default(),
536                    append_mode: None,
537                });
538                let result = manager
539                    .update(RegionMetaActionList::with_action(action), false)
540                    .await;
541
542                match result {
543                    Ok(version) => {
544                        info!(
545                            "Successfully persisted backfilled metadata for region {}, version: {}",
546                            self.region_id, version
547                        );
548                    }
549                    Err(e) => {
550                        warn!(e; "Failed to persist backfilled metadata for region {}", self.region_id);
551                    }
552                }
553            }
554        }
555
556        drop(manager);
557
558        Ok(())
559    }
560
561    /// Switches the region state to `RegionRoleState::Leader(RegionLeaderState::Writable)` if the current state is `expect`.
562    /// Otherwise, logs an error.
563    pub(crate) fn switch_state_to_writable(&self, expect: RegionLeaderState) {
564        if let Err(e) = self
565            .compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Writable))
566        {
567            error!(e; "failed to switch region state to writable, expect state is {:?}", expect);
568        }
569    }
570
571    /// Switches the region state to `RegionRoleState::Leader(RegionLeaderState::Staging)` if the current state is `expect`.
572    /// Otherwise, logs an error.
573    pub(crate) fn switch_state_to_staging(&self, expect: RegionLeaderState) {
574        if let Err(e) =
575            self.compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Staging))
576        {
577            error!(e; "failed to switch region state to staging, expect state is {:?}", expect);
578        }
579    }
580
581    /// Returns the region statistic.
582    pub(crate) fn region_statistic(&self) -> RegionStatistic {
583        let version = self.version();
584        let memtables = &version.memtables;
585        let memtable_usage = (memtables.mutable_usage() + memtables.immutables_usage()) as u64;
586
587        let sst_usage = version.ssts.sst_usage();
588        let index_usage = version.ssts.index_usage();
589        let flushed_entry_id = version.flushed_entry_id;
590
591        let wal_usage = self.estimated_wal_usage(memtable_usage);
592        let manifest_usage = self.stats.total_manifest_size();
593        let num_rows = version.ssts.num_rows() + version.memtables.num_rows();
594        let num_files = version.ssts.num_files();
595        let manifest_version = self.stats.manifest_version();
596        let file_removed_cnt = self.stats.file_removed_cnt();
597
598        let topic_latest_entry_id = self.topic_latest_entry_id.load(Ordering::Relaxed);
599        let written_bytes = self.written_bytes.load(Ordering::Relaxed);
600
601        RegionStatistic {
602            num_rows,
603            memtable_size: memtable_usage,
604            wal_size: wal_usage,
605            manifest_size: manifest_usage,
606            sst_size: sst_usage,
607            sst_num: num_files,
608            index_size: index_usage,
609            manifest: RegionManifestInfo::Mito {
610                manifest_version,
611                flushed_entry_id,
612                file_removed_cnt,
613            },
614            data_topic_latest_entry_id: topic_latest_entry_id,
615            metadata_topic_latest_entry_id: topic_latest_entry_id,
616            written_bytes,
617        }
618    }
619
620    /// Estimated WAL size in bytes.
621    /// Use the memtables size to estimate the size of wal.
622    fn estimated_wal_usage(&self, memtable_usage: u64) -> u64 {
623        ((memtable_usage as f32) * ESTIMATED_WAL_FACTOR) as u64
624    }
625
626    /// Sets the state of the region to given state if the current state equals to
627    /// the expected.
628    fn compare_exchange_state(
629        &self,
630        expect: RegionLeaderState,
631        state: RegionRoleState,
632    ) -> Result<()> {
633        self.manifest_ctx
634            .state
635            .compare_exchange(RegionRoleState::Leader(expect), state)
636            .map_err(|actual| {
637                RegionStateSnafu {
638                    region_id: self.region_id,
639                    state: actual,
640                    expect: RegionRoleState::Leader(expect),
641                }
642                .build()
643            })?;
644        Ok(())
645    }
646
647    pub fn access_layer(&self) -> AccessLayerRef {
648        self.access_layer.clone()
649    }
650
651    /// Returns the SST entries of the region.
652    pub async fn manifest_sst_entries(&self) -> Vec<ManifestSstEntry> {
653        let table_dir = self.table_dir();
654        let path_type = self.access_layer.path_type();
655
656        let visible_ssts = self
657            .version()
658            .ssts
659            .levels()
660            .iter()
661            .flat_map(|level| level.files().map(|file| file.file_id().file_id()))
662            .collect::<HashSet<_>>();
663
664        let manifest_files = self.manifest_ctx.manifest().await.files.clone();
665        let staging_files = self
666            .manifest_ctx
667            .staging_manifest()
668            .await
669            .map(|m| m.files.clone())
670            .unwrap_or_default();
671        let files = manifest_files
672            .into_iter()
673            .chain(staging_files)
674            .collect::<HashMap<_, _>>();
675
676        files
677            .values()
678            .map(|meta| {
679                let region_id = self.region_id;
680                let origin_region_id = meta.region_id;
681                let (index_version, index_file_path, index_file_size) = if meta.index_file_size > 0
682                {
683                    let index_file_path = index_file_path(table_dir, meta.index_id(), path_type);
684                    (
685                        meta.index_version,
686                        Some(index_file_path),
687                        Some(meta.index_file_size),
688                    )
689                } else {
690                    (0, None, None)
691                };
692                let visible = visible_ssts.contains(&meta.file_id);
693                ManifestSstEntry {
694                    table_dir: table_dir.to_string(),
695                    region_id,
696                    table_id: region_id.table_id(),
697                    region_number: region_id.region_number(),
698                    region_group: region_id.region_group(),
699                    region_sequence: region_id.region_sequence(),
700                    file_id: meta.file_id.to_string(),
701                    index_version,
702                    level: meta.level,
703                    file_path: sst_file_path(table_dir, meta.file_id(), path_type),
704                    file_size: meta.file_size,
705                    index_file_path,
706                    index_file_size,
707                    num_rows: meta.num_rows,
708                    num_row_groups: meta.num_row_groups,
709                    num_series: Some(meta.num_series),
710                    min_ts: meta.time_range.0,
711                    max_ts: meta.time_range.1,
712                    sequence: meta.sequence.map(|s| s.get()),
713                    origin_region_id,
714                    node_id: None,
715                    visible,
716                }
717            })
718            .collect()
719    }
720
721    /// Returns the file metas of the region by file ids.
722    pub async fn file_metas(&self, file_ids: &[FileId]) -> Vec<Option<FileMeta>> {
723        let manifest_files = self.manifest_ctx.manifest().await.files.clone();
724
725        file_ids
726            .iter()
727            .map(|file_id| manifest_files.get(file_id).cloned())
728            .collect::<Vec<_>>()
729    }
730
731    /// Exit staging mode successfully by merging all staged manifests and making them visible.
732    pub(crate) async fn exit_staging_on_success(
733        &self,
734        manager: &mut RwLockWriteGuard<'_, RegionManifestManager>,
735    ) -> Result<()> {
736        let current_state = self.manifest_ctx.current_state();
737        ensure!(
738            current_state == RegionRoleState::Leader(RegionLeaderState::Staging),
739            RegionStateSnafu {
740                region_id: self.region_id,
741                state: current_state,
742                expect: RegionRoleState::Leader(RegionLeaderState::Staging),
743            }
744        );
745
746        // Merge all staged manifest actions
747        let merged_actions = match manager.merge_staged_actions(current_state).await? {
748            Some(actions) => actions,
749            None => {
750                info!(
751                    "No staged manifests to merge for region {}, exiting staging mode without changes",
752                    self.region_id
753                );
754                // Even if no manifests to merge, we still need to exit staging mode
755                self.exit_staging()?;
756                return Ok(());
757            }
758        };
759        let expect_change = merged_actions.actions.iter().any(|a| a.is_change());
760        let expect_partition_expr_change = merged_actions
761            .actions
762            .iter()
763            .any(|a| a.is_partition_expr_change());
764        let expect_edit = merged_actions.actions.iter().any(|a| a.is_edit());
765        ensure!(
766            !(expect_change && expect_partition_expr_change),
767            UnexpectedSnafu {
768                reason: "unexpected both change and partition expr change actions in merged actions"
769            }
770        );
771        ensure!(
772            expect_change || expect_partition_expr_change,
773            UnexpectedSnafu {
774                reason: "expect a change or partition expr change action in merged actions"
775            }
776        );
777        ensure!(
778            expect_edit,
779            UnexpectedSnafu {
780                reason: "expect an edit action in merged actions"
781            }
782        );
783
784        let (merged_partition_expr_change, merged_change, merged_edit) =
785            merged_actions.clone().split_region_change_and_edit();
786        if let Some(change) = &merged_change {
787            // In staging exit we only allow metadata-only updates. A `Change`
788            // action is accepted only when column definitions are unchanged;
789            // otherwise it is treated as a schema change and rejected.
790            let current_column_metadatas = &self.version().metadata.column_metadatas;
791            ensure!(
792                change.metadata.column_metadatas == *current_column_metadatas,
793                UnexpectedSnafu {
794                    reason: "change action alters column metadata in staging exit"
795                }
796            );
797        }
798
799        // Submit merged actions using the manifest manager's update method
800        // Pass the `false` so it saves to normal directory, not staging
801        let new_version = manager.update(merged_actions, false).await?;
802        info!(
803            "Successfully submitted merged staged manifests for region {}, new version: {}",
804            self.region_id, new_version
805        );
806
807        // Apply the merged changes to in-memory version control
808        if let Some(change) = merged_partition_expr_change {
809            let mut new_metadata = self.version().metadata.as_ref().clone();
810            new_metadata.set_partition_expr(change.partition_expr);
811            self.version_control.alter_metadata(new_metadata.into());
812        }
813        if let Some(change) = merged_change {
814            self.version_control.alter_metadata(change.metadata);
815        }
816        self.version_control
817            .apply_edit(Some(merged_edit), &[], self.file_purger.clone());
818
819        // Clear all staging manifests and transit state
820        if let Err(e) = manager.clear_staging_manifest_and_dir().await {
821            error!(e; "Failed to clear staging manifest dir for region {}", self.region_id);
822        }
823        self.exit_staging()?;
824
825        Ok(())
826    }
827
828    /// Returns the partition expression string for this region.
829    ///
830    /// If the region is currently in staging state, this returns the partition expression held in
831    /// the staging partition field. Otherwise, it returns the partition expression from the primary
832    /// region metadata (current committed version).
833    pub fn maybe_staging_partition_expr_str(&self) -> Option<String> {
834        let is_staging = self.is_staging();
835        if is_staging {
836            let staging_partition_info = self.manifest_ctx.staging_partition_info();
837            if staging_partition_info.is_none() {
838                warn!(
839                    "Staging partition expr is none for region {} in staging state",
840                    self.region_id
841                );
842            }
843            staging_partition_info
844                .as_ref()
845                .and_then(|info| info.partition_expr().map(ToString::to_string))
846        } else {
847            let version = self.version();
848            version.metadata.partition_expr.clone()
849        }
850    }
851
852    pub fn expected_partition_expr_version(&self) -> u64 {
853        if self.is_staging() {
854            self.manifest_ctx
855                .staging_partition_info()
856                .as_ref()
857                .map(|info| info.partition_rule_version)
858                .unwrap_or_default()
859        } else {
860            self.version().metadata.partition_expr_version
861        }
862    }
863
864    /// Returns whether writes should be rejected for this region in staging mode.
865    pub(crate) fn reject_all_writes_in_staging(&self) -> bool {
866        if !self.is_staging() {
867            return false;
868        }
869        self.manifest_ctx
870            .staging_partition_info()
871            .as_ref()
872            .map(|info| {
873                matches!(
874                    info.partition_directive,
875                    StagingPartitionDirective::RejectAllWrites
876                )
877            })
878            .unwrap_or(false)
879    }
880}
881
882/// Context to update the region manifest.
883#[derive(Debug)]
884pub(crate) struct ManifestContext {
885    /// Manager to maintain manifest for this region.
886    pub(crate) manifest_manager: tokio::sync::RwLock<RegionManifestManager>,
887    /// The state of the region. The region checks the state before updating
888    /// manifest.
889    state: AtomicCell<RegionRoleState>,
890    /// Partition info of the region in staging mode.
891    ///
892    /// During the staging mode, the region metadata in [`VersionControlRef`] is not updated,
893    /// so we need to store the partition info separately.
894    staging_partition_info: Mutex<Option<StagingPartitionInfo>>,
895}
896
897impl ManifestContext {
898    pub(crate) fn new(manager: RegionManifestManager, state: RegionRoleState) -> Self {
899        ManifestContext {
900            manifest_manager: tokio::sync::RwLock::new(manager),
901            state: AtomicCell::new(state),
902            staging_partition_info: Mutex::new(None),
903        }
904    }
905
906    pub(crate) fn staging_partition_info(&self) -> Option<StagingPartitionInfo> {
907        self.staging_partition_info.lock().unwrap().clone()
908    }
909
910    pub(crate) fn set_staging_partition_info(&self, staging_partition_info: StagingPartitionInfo) {
911        let mut current = self.staging_partition_info.lock().unwrap();
912        debug_assert!(current.is_none());
913        *current = Some(staging_partition_info);
914    }
915
916    fn clear_staging_partition_info(&self) {
917        *self.staging_partition_info.lock().unwrap() = None;
918    }
919
920    pub(crate) fn exit_staging(
921        &self,
922        region_id: RegionId,
923        next_state: RegionRoleState,
924    ) -> Result<()> {
925        self.state
926            .compare_exchange(
927                RegionRoleState::Leader(RegionLeaderState::Staging),
928                next_state,
929            )
930            .map_err(|actual| {
931                RegionStateSnafu {
932                    region_id,
933                    state: actual,
934                    expect: RegionRoleState::Leader(RegionLeaderState::Staging),
935                }
936                .build()
937            })?;
938        self.clear_staging_partition_info();
939        Ok(())
940    }
941
942    pub(crate) async fn manifest_version(&self) -> ManifestVersion {
943        self.manifest_manager
944            .read()
945            .await
946            .manifest()
947            .manifest_version
948    }
949
950    pub(crate) async fn has_update(&self) -> Result<bool> {
951        self.manifest_manager.read().await.has_update().await
952    }
953
954    /// Returns the current region role state.
955    pub(crate) fn current_state(&self) -> RegionRoleState {
956        self.state.load()
957    }
958
959    /// Installs the manifest changes from the current version to the target version (inclusive).
960    ///
961    /// Returns installed [RegionManifest].
962    /// **Note**: This function is not guaranteed to install the target version strictly.
963    /// The installed version may be greater than the target version.
964    pub(crate) async fn install_manifest_to(
965        &self,
966        version: ManifestVersion,
967    ) -> Result<Arc<RegionManifest>> {
968        let mut manager = self.manifest_manager.write().await;
969        manager.install_manifest_to(version).await?;
970
971        Ok(manager.manifest())
972    }
973
974    /// Updates the manifest if current state is `expect_state`.
975    pub(crate) async fn update_manifest(
976        &self,
977        expect_state: RegionLeaderState,
978        action_list: RegionMetaActionList,
979        is_staging: bool,
980    ) -> Result<ManifestVersion> {
981        self.update_manifest_with_state_check(action_list, is_staging, |current_state, region_id| {
982            // If expect_state is not downgrading, the current state must be either `expect_state` or downgrading.
983            //
984            // A downgrading leader rejects user writes but still allows
985            // flushing the memtable and updating the manifest.
986            if expect_state != RegionLeaderState::Downgrading {
987                if current_state == RegionRoleState::Leader(RegionLeaderState::Downgrading) {
988                    info!(
989                        "Region {} is in downgrading leader state, updating manifest. Expect state is {:?}",
990                        region_id, expect_state
991                    );
992                }
993                ensure!(
994                    current_state == RegionRoleState::Leader(expect_state)
995                        || current_state == RegionRoleState::Leader(RegionLeaderState::Downgrading),
996                    UpdateManifestSnafu {
997                        region_id,
998                        state: current_state,
999                    }
1000                );
1001            } else {
1002                ensure!(
1003                    current_state == RegionRoleState::Leader(expect_state),
1004                    RegionStateSnafu {
1005                        region_id,
1006                        state: current_state,
1007                        expect: RegionRoleState::Leader(expect_state),
1008                    }
1009                );
1010            }
1011
1012            Ok(())
1013        })
1014        .await
1015    }
1016
1017    /// Updates the manifest for compaction.
1018    ///
1019    /// Compaction may finish while a direct external region edit is in the transient
1020    /// `Editing` state. Direct external edits can remove files both when followers
1021    /// apply sync-region metadata and when a writable leader performs a direct edit
1022    /// such as `edit_region()`. Allowing compaction to publish in `Editing` is still
1023    /// safe because publication happens under the manifest write lock and compaction
1024    /// rechecks that its input files are still valid before committing.
1025    ///
1026    /// This intentionally writes to the normal manifest path (`is_staging = false`).
1027    /// Entering staging cancels or waits for active compactions before switching the
1028    /// region to `Staging`, so a compaction that started before staging still finishes
1029    /// against the normal manifest. Even if a manual compaction is requested while the
1030    /// region is already staging, compaction only sees SSTs in the normal visible
1031    /// region version; SSTs from staging manifests are not applied to region version
1032    /// control until staging exits successfully.
1033    pub(crate) async fn update_manifest_for_compaction(
1034        &self,
1035        action_list: RegionMetaActionList,
1036    ) -> Result<ManifestVersion> {
1037        self.update_manifest_with_state_check(action_list, false, |current_state, region_id| {
1038            ensure!(
1039                matches!(
1040                    current_state,
1041                    RegionRoleState::Leader(RegionLeaderState::Writable)
1042                        | RegionRoleState::Leader(RegionLeaderState::Editing)
1043                        | RegionRoleState::Leader(RegionLeaderState::Downgrading)
1044                ),
1045                UpdateManifestSnafu {
1046                    region_id,
1047                    state: current_state,
1048                }
1049            );
1050
1051            Ok(())
1052        })
1053        .await
1054    }
1055
1056    async fn update_manifest_with_state_check(
1057        &self,
1058        action_list: RegionMetaActionList,
1059        is_staging: bool,
1060        check_state: impl FnOnce(RegionRoleState, RegionId) -> Result<()>,
1061    ) -> Result<ManifestVersion> {
1062        // Acquires the write lock of the manifest manager.
1063        let mut manager = self.manifest_manager.write().await;
1064        // Gets current manifest.
1065        let manifest = manager.manifest();
1066        // Checks state inside the lock. This is to ensure that we won't update the manifest
1067        // after `set_readonly_gracefully()` is called.
1068        let current_state = self.state.load();
1069        check_state(current_state, manifest.metadata.region_id)?;
1070
1071        for action in &action_list.actions {
1072            // Checks whether the edit is still applicable.
1073            let RegionMetaAction::Edit(edit) = &action else {
1074                continue;
1075            };
1076
1077            // Checks whether the region is truncated.
1078            let Some(truncated_entry_id) = manifest.truncated_entry_id else {
1079                continue;
1080            };
1081
1082            // This is an edit from flush.
1083            if let Some(flushed_entry_id) = edit.flushed_entry_id {
1084                // A flush edit is valid after truncate in two cases:
1085                // 1. `flushed_entry_id` moves past `truncated_entry_id`, meaning it definitely
1086                //    flushed data newer than the truncate point.
1087                // 2. `flushed_entry_id` equals `truncated_entry_id`, but `flushed_sequence`
1088                //    increases. This happens in skip-WAL tables where entry id can stay at 0,
1089                //    while sequence still advances for post-truncate writes.
1090                //
1091                // We still reject stale flushes from before truncate:
1092                // if entry id is equal and sequence does not advance, the flush is outdated.
1093                let is_newer_entry = truncated_entry_id < flushed_entry_id;
1094                let is_same_entry_with_newer_sequence = truncated_entry_id == flushed_entry_id
1095                    && edit.flushed_sequence.is_some_and(|flushed_sequence| {
1096                        manifest.flushed_sequence < flushed_sequence
1097                    });
1098
1099                ensure!(
1100                    is_newer_entry || is_same_entry_with_newer_sequence,
1101                    RegionTruncatedSnafu {
1102                        region_id: manifest.metadata.region_id,
1103                    }
1104                );
1105            }
1106
1107            // This is an edit from compaction.
1108            if !edit.files_to_remove.is_empty() {
1109                // Input files of the compaction task has been truncated.
1110                for file in &edit.files_to_remove {
1111                    ensure!(
1112                        manifest.files.contains_key(&file.file_id),
1113                        RegionTruncatedSnafu {
1114                            region_id: manifest.metadata.region_id,
1115                        }
1116                    );
1117                }
1118            }
1119        }
1120
1121        // Now we can update the manifest.
1122        let version = manager.update(action_list, is_staging).await.inspect_err(
1123            |e| error!(e; "Failed to update manifest, region_id: {}", manifest.metadata.region_id),
1124        )?;
1125
1126        if self.state.load() == RegionRoleState::Follower {
1127            warn!(
1128                "Region {} becomes follower while updating manifest which may cause inconsistency, manifest version: {version}",
1129                manifest.metadata.region_id
1130            );
1131        }
1132
1133        Ok(version)
1134    }
1135
1136    /// Sets the [`RegionRole`].
1137    ///
1138    /// ```text
1139    ///                  +---------------------+
1140    ///                  |   Staging Leader    |
1141    ///                  +----------+----------+
1142    ///                             |
1143    ///                             v
1144    ///     +----------+     +------+-------+     +-------------+
1145    ///     | Follower | <-> |    Leader    | <-> | Downgrading |
1146    ///     +-----+----+     +------+-------+     +------+------+
1147    ///           ^                 ^                    |
1148    ///           +-----------------+--------------------+
1149    ///
1150    /// ```
1151    ///
1152    /// # State Transitions
1153    ///
1154    /// From `Follower`:
1155    /// - `Follower -> Leader`
1156    ///
1157    /// From `Leader`:
1158    /// - `Leader -> Follower`
1159    /// - `Leader -> Downgrading Leader`
1160    ///
1161    /// From `Staging Leader`:
1162    /// - `Staging Leader -> Leader`
1163    /// - `Staging Leader -> Follower`
1164    /// - `Staging Leader -> Downgrading Leader`
1165    ///
1166    /// From `Downgrading Leader`:
1167    /// - `Downgrading Leader -> Leader`
1168    /// - `Downgrading Leader -> Follower`
1169    pub(crate) fn set_role(&self, next_role: RegionRole, region_id: RegionId) {
1170        match next_role {
1171            RegionRole::Follower => {
1172                if self
1173                    .exit_staging(region_id, RegionRoleState::Follower)
1174                    .is_ok()
1175                {
1176                    info!(
1177                        "Convert region {} to follower, previous role state: {:?}",
1178                        region_id,
1179                        RegionRoleState::Leader(RegionLeaderState::Staging)
1180                    );
1181                    return;
1182                }
1183                match self.state.fetch_update(|state| {
1184                    if !matches!(state, RegionRoleState::Follower) {
1185                        Some(RegionRoleState::Follower)
1186                    } else {
1187                        None
1188                    }
1189                }) {
1190                    Ok(state) => info!(
1191                        "Convert region {} to follower, previous role state: {:?}",
1192                        region_id, state
1193                    ),
1194                    Err(state) => {
1195                        if state != RegionRoleState::Follower {
1196                            warn!(
1197                                "Failed to convert region {} to follower, current role state: {:?}",
1198                                region_id, state
1199                            )
1200                        }
1201                    }
1202                }
1203            }
1204            RegionRole::Leader => {
1205                if self
1206                    .exit_staging(
1207                        region_id,
1208                        RegionRoleState::Leader(RegionLeaderState::Writable),
1209                    )
1210                    .is_ok()
1211                {
1212                    info!(
1213                        "Convert region {} to leader, previous role state: {:?}",
1214                        region_id,
1215                        RegionRoleState::Leader(RegionLeaderState::Staging)
1216                    );
1217                    return;
1218                }
1219                match self.state.fetch_update(|state| {
1220                    if matches!(
1221                        state,
1222                        RegionRoleState::Follower
1223                            | RegionRoleState::Leader(RegionLeaderState::Downgrading)
1224                    ) {
1225                        Some(RegionRoleState::Leader(RegionLeaderState::Writable))
1226                    } else {
1227                        None
1228                    }
1229                }) {
1230                    Ok(state) => info!(
1231                        "Convert region {} to leader, previous role state: {:?}",
1232                        region_id, state
1233                    ),
1234                    Err(state) => {
1235                        if state != RegionRoleState::Leader(RegionLeaderState::Writable) {
1236                            warn!(
1237                                "Failed to convert region {} to leader, current role state: {:?}",
1238                                region_id, state
1239                            )
1240                        }
1241                    }
1242                }
1243            }
1244            RegionRole::StagingLeader => {
1245                info!(
1246                    "Ignore direct conversion of region {} to staging leader; staging requires the dedicated workflow",
1247                    region_id
1248                );
1249            }
1250            RegionRole::DowngradingLeader => {
1251                if self
1252                    .exit_staging(
1253                        region_id,
1254                        RegionRoleState::Leader(RegionLeaderState::Downgrading),
1255                    )
1256                    .is_ok()
1257                {
1258                    info!(
1259                        "Convert region {} to downgrading region, previous role state: {:?}",
1260                        region_id,
1261                        RegionRoleState::Leader(RegionLeaderState::Staging)
1262                    );
1263                    return;
1264                }
1265                match self.state.compare_exchange(
1266                    RegionRoleState::Leader(RegionLeaderState::Writable),
1267                    RegionRoleState::Leader(RegionLeaderState::Downgrading),
1268                ) {
1269                    Ok(state) => info!(
1270                        "Convert region {} to downgrading region, previous role state: {:?}",
1271                        region_id, state
1272                    ),
1273                    Err(state) => {
1274                        if state != RegionRoleState::Leader(RegionLeaderState::Downgrading) {
1275                            warn!(
1276                                "Failed to convert region {} to downgrading leader, current role state: {:?}",
1277                                region_id, state
1278                            )
1279                        }
1280                    }
1281                }
1282            }
1283        }
1284    }
1285
1286    /// Returns the normal manifest of the region.
1287    pub(crate) async fn manifest(&self) -> Arc<crate::manifest::action::RegionManifest> {
1288        self.manifest_manager.read().await.manifest()
1289    }
1290
1291    /// Returns the staging manifest of the region.
1292    pub(crate) async fn staging_manifest(
1293        &self,
1294    ) -> Option<Arc<crate::manifest::action::RegionManifest>> {
1295        self.manifest_manager.read().await.staging_manifest()
1296    }
1297}
1298
1299pub(crate) type ManifestContextRef = Arc<ManifestContext>;
1300
1301/// Regions indexed by ids.
1302#[derive(Debug, Default)]
1303pub(crate) struct RegionMap {
1304    regions: RwLock<HashMap<RegionId, MitoRegionRef>>,
1305}
1306
1307impl RegionMap {
1308    /// Returns true if the region exists.
1309    pub(crate) fn is_region_exists(&self, region_id: RegionId) -> bool {
1310        let regions = self.regions.read().unwrap();
1311        regions.contains_key(&region_id)
1312    }
1313
1314    /// Inserts a new region into the map.
1315    pub(crate) fn insert_region(&self, region: MitoRegionRef) {
1316        let mut regions = self.regions.write().unwrap();
1317        regions.insert(region.region_id, region);
1318    }
1319
1320    /// Gets region by region id.
1321    pub(crate) fn get_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
1322        let regions = self.regions.read().unwrap();
1323        regions.get(&region_id).cloned()
1324    }
1325
1326    /// Gets writable region by region id.
1327    ///
1328    /// Returns error if the region does not exist or is readonly.
1329    pub(crate) fn writable_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1330        let region = self
1331            .get_region(region_id)
1332            .context(RegionNotFoundSnafu { region_id })?;
1333        ensure!(
1334            region.is_writable(),
1335            RegionStateSnafu {
1336                region_id,
1337                state: region.state(),
1338                expect: RegionRoleState::Leader(RegionLeaderState::Writable),
1339            }
1340        );
1341        Ok(region)
1342    }
1343
1344    /// Gets readonly region by region id.
1345    ///
1346    /// Returns error if the region does not exist or is writable.
1347    pub(crate) fn follower_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1348        let region = self
1349            .get_region(region_id)
1350            .context(RegionNotFoundSnafu { region_id })?;
1351        ensure!(
1352            region.is_follower(),
1353            RegionStateSnafu {
1354                region_id,
1355                state: region.state(),
1356                expect: RegionRoleState::Follower,
1357            }
1358        );
1359
1360        Ok(region)
1361    }
1362
1363    /// Gets region by region id.
1364    ///
1365    /// Calls the callback if the region does not exist.
1366    pub(crate) fn get_region_or<F: OnFailure>(
1367        &self,
1368        region_id: RegionId,
1369        cb: &mut F,
1370    ) -> Option<MitoRegionRef> {
1371        match self
1372            .get_region(region_id)
1373            .context(RegionNotFoundSnafu { region_id })
1374        {
1375            Ok(region) => Some(region),
1376            Err(e) => {
1377                cb.on_failure(e);
1378                None
1379            }
1380        }
1381    }
1382
1383    /// Gets writable region by region id.
1384    ///
1385    /// Calls the callback if the region does not exist or is readonly.
1386    pub(crate) fn writable_region_or<F: OnFailure>(
1387        &self,
1388        region_id: RegionId,
1389        cb: &mut F,
1390    ) -> Option<MitoRegionRef> {
1391        match self.writable_region(region_id) {
1392            Ok(region) => Some(region),
1393            Err(e) => {
1394                cb.on_failure(e);
1395                None
1396            }
1397        }
1398    }
1399
1400    /// Gets writable non-staging region by region id.
1401    ///
1402    /// Returns error if the region does not exist, is readonly, or is in staging mode.
1403    pub(crate) fn writable_non_staging_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1404        let region = self.writable_region(region_id)?;
1405        if region.is_staging() {
1406            return Err(crate::error::RegionStateSnafu {
1407                region_id,
1408                state: region.state(),
1409                expect: RegionRoleState::Leader(RegionLeaderState::Writable),
1410            }
1411            .build());
1412        }
1413        Ok(region)
1414    }
1415
1416    /// Gets staging region by region id.
1417    ///
1418    /// Returns error if the region does not exist or is not in staging state.
1419    pub(crate) fn staging_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1420        let region = self
1421            .get_region(region_id)
1422            .context(RegionNotFoundSnafu { region_id })?;
1423        ensure!(
1424            region.is_staging(),
1425            RegionStateSnafu {
1426                region_id,
1427                state: region.state(),
1428                expect: RegionRoleState::Leader(RegionLeaderState::Staging),
1429            }
1430        );
1431        Ok(region)
1432    }
1433
1434    /// Gets flushable region by region id.
1435    ///
1436    /// Returns error if the region does not exist.
1437    /// Returns None if the region exists but not operatable.
1438    fn flushable_region(&self, region_id: RegionId) -> Result<Option<MitoRegionRef>> {
1439        let region = self
1440            .get_region(region_id)
1441            .context(RegionNotFoundSnafu { region_id })?;
1442        if region.is_flushable() {
1443            Ok(Some(region))
1444        } else {
1445            Ok(None)
1446        }
1447    }
1448
1449    /// Gets flushable region by region id.
1450    ///
1451    /// Calls the callback if the region does not exist.
1452    /// Returns None if the region exists but not operatable.
1453    pub(crate) fn flushable_region_or<F: OnFailure>(
1454        &self,
1455        region_id: RegionId,
1456        cb: &mut F,
1457    ) -> Option<MitoRegionRef> {
1458        match self.flushable_region(region_id) {
1459            Ok(region) => region,
1460            Err(e) => {
1461                cb.on_failure(e);
1462                None
1463            }
1464        }
1465    }
1466
1467    /// Remove region by id.
1468    pub(crate) fn remove_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
1469        let mut regions = self.regions.write().unwrap();
1470        regions.remove(&region_id)
1471    }
1472
1473    /// List all regions.
1474    pub(crate) fn list_regions(&self) -> Vec<MitoRegionRef> {
1475        let regions = self.regions.read().unwrap();
1476        regions.values().cloned().collect()
1477    }
1478
1479    /// Clear the map.
1480    pub(crate) fn clear(&self) {
1481        self.regions.write().unwrap().clear();
1482    }
1483}
1484
1485pub(crate) type RegionMapRef = Arc<RegionMap>;
1486
1487/// Opening regions
1488#[derive(Debug, Default)]
1489pub(crate) struct OpeningRegions {
1490    regions: RwLock<HashMap<RegionId, Vec<OptionOutputTx>>>,
1491}
1492
1493impl OpeningRegions {
1494    /// Registers `sender` for an opening region; Otherwise, it returns `None`.
1495    pub(crate) fn wait_for_opening_region(
1496        &self,
1497        region_id: RegionId,
1498        sender: OptionOutputTx,
1499    ) -> Option<OptionOutputTx> {
1500        let mut regions = self.regions.write().unwrap();
1501        match regions.entry(region_id) {
1502            Entry::Occupied(mut senders) => {
1503                senders.get_mut().push(sender);
1504                None
1505            }
1506            Entry::Vacant(_) => Some(sender),
1507        }
1508    }
1509
1510    /// Returns true if the region exists.
1511    pub(crate) fn is_region_exists(&self, region_id: RegionId) -> bool {
1512        let regions = self.regions.read().unwrap();
1513        regions.contains_key(&region_id)
1514    }
1515
1516    /// Inserts a new region into the map.
1517    pub(crate) fn insert_sender(&self, region: RegionId, sender: OptionOutputTx) {
1518        let mut regions = self.regions.write().unwrap();
1519        regions.insert(region, vec![sender]);
1520    }
1521
1522    /// Remove region by id.
1523    pub(crate) fn remove_sender(&self, region_id: RegionId) -> Vec<OptionOutputTx> {
1524        let mut regions = self.regions.write().unwrap();
1525        regions.remove(&region_id).unwrap_or_default()
1526    }
1527
1528    #[cfg(test)]
1529    pub(crate) fn sender_len(&self, region_id: RegionId) -> usize {
1530        let regions = self.regions.read().unwrap();
1531        if let Some(senders) = regions.get(&region_id) {
1532            senders.len()
1533        } else {
1534            0
1535        }
1536    }
1537}
1538
1539pub(crate) type OpeningRegionsRef = Arc<OpeningRegions>;
1540
1541/// The regions that are catching up.
1542#[derive(Debug, Default)]
1543pub(crate) struct CatchupRegions {
1544    regions: RwLock<HashSet<RegionId>>,
1545}
1546
1547impl CatchupRegions {
1548    /// Returns true if the region exists.
1549    pub(crate) fn is_region_exists(&self, region_id: RegionId) -> bool {
1550        let regions = self.regions.read().unwrap();
1551        regions.contains(&region_id)
1552    }
1553
1554    /// Inserts a new region into the set.
1555    pub(crate) fn insert_region(&self, region_id: RegionId) {
1556        let mut regions = self.regions.write().unwrap();
1557        regions.insert(region_id);
1558    }
1559
1560    /// Remove region by id.
1561    pub(crate) fn remove_region(&self, region_id: RegionId) {
1562        let mut regions = self.regions.write().unwrap();
1563        regions.remove(&region_id);
1564    }
1565}
1566
1567pub(crate) type CatchupRegionsRef = Arc<CatchupRegions>;
1568
1569/// Manifest stats.
1570#[derive(Default, Debug, Clone)]
1571pub struct ManifestStats {
1572    pub(crate) total_manifest_size: Arc<AtomicU64>,
1573    pub(crate) manifest_version: Arc<AtomicU64>,
1574    pub(crate) file_removed_cnt: Arc<AtomicU64>,
1575}
1576
1577impl ManifestStats {
1578    fn total_manifest_size(&self) -> u64 {
1579        self.total_manifest_size.load(Ordering::Relaxed)
1580    }
1581
1582    fn manifest_version(&self) -> u64 {
1583        self.manifest_version.load(Ordering::Relaxed)
1584    }
1585
1586    fn file_removed_cnt(&self) -> u64 {
1587        self.file_removed_cnt.load(Ordering::Relaxed)
1588    }
1589}
1590
1591/// Parses the partition expression from a JSON string.
1592pub fn parse_partition_expr(partition_expr_str: Option<&str>) -> Result<Option<PartitionExpr>> {
1593    match partition_expr_str {
1594        None => Ok(None),
1595        Some("") => Ok(None),
1596        Some(json_str) => {
1597            let expr = partition::expr::PartitionExpr::from_json_str(json_str)
1598                .with_context(|_| InvalidPartitionExprSnafu { expr: json_str })?;
1599            Ok(expr)
1600        }
1601    }
1602}
1603
1604#[cfg(test)]
1605mod tests {
1606    use std::sync::Arc;
1607    use std::sync::atomic::AtomicU64;
1608
1609    use common_datasource::compression::CompressionType;
1610    use common_test_util::temp_dir::create_temp_dir;
1611    use crossbeam_utils::atomic::AtomicCell;
1612    use object_store::ObjectStore;
1613    use object_store::services::Fs;
1614    use store_api::logstore::provider::Provider;
1615    use store_api::region_engine::RegionRole;
1616    use store_api::region_request::PathType;
1617    use store_api::storage::{FileId, RegionId};
1618
1619    use crate::access_layer::AccessLayer;
1620    use crate::error::Error;
1621    use crate::manifest::action::{
1622        RegionChange, RegionEdit, RegionMetaAction, RegionMetaActionList, RegionPartitionExprChange,
1623    };
1624    use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
1625    use crate::region::{
1626        ManifestContext, ManifestStats, MitoRegion, RegionLeaderState, RegionRoleState,
1627    };
1628    use crate::sst::FormatType;
1629    use crate::sst::index::intermediate::IntermediateManager;
1630    use crate::sst::index::puffin_manager::PuffinManagerFactory;
1631    use crate::test_util::scheduler_util::SchedulerEnv;
1632    use crate::test_util::version_util::VersionControlBuilder;
1633    use crate::time_provider::StdTimeProvider;
1634
1635    #[test]
1636    fn test_region_state_lock_free() {
1637        assert!(AtomicCell::<RegionRoleState>::is_lock_free());
1638    }
1639
1640    async fn build_test_region(env: &SchedulerEnv) -> MitoRegion {
1641        let builder = VersionControlBuilder::new();
1642        let version_control = Arc::new(builder.build());
1643        let metadata = version_control.current().version.metadata.clone();
1644
1645        let manager = RegionManifestManager::new(
1646            metadata.clone(),
1647            0,
1648            RegionManifestOptions {
1649                manifest_dir: "".to_string(),
1650                object_store: env.access_layer.object_store().clone(),
1651                compress_type: CompressionType::Uncompressed,
1652                checkpoint_distance: 10,
1653                remove_file_options: Default::default(),
1654                manifest_cache: None,
1655            },
1656            FormatType::PrimaryKey,
1657            &Default::default(),
1658        )
1659        .await
1660        .unwrap();
1661
1662        let manifest_ctx = Arc::new(ManifestContext::new(
1663            manager,
1664            RegionRoleState::Leader(RegionLeaderState::Writable),
1665        ));
1666
1667        MitoRegion {
1668            region_id: metadata.region_id,
1669            version_control,
1670            access_layer: env.access_layer.clone(),
1671            manifest_ctx,
1672            file_purger: crate::test_util::new_noop_file_purger(),
1673            provider: Provider::noop_provider(),
1674            last_flush_millis: Default::default(),
1675            last_compaction_millis: Default::default(),
1676            time_provider: Arc::new(StdTimeProvider),
1677            topic_latest_entry_id: Default::default(),
1678            written_bytes: Arc::new(AtomicU64::new(0)),
1679            stats: ManifestStats::default(),
1680        }
1681    }
1682
1683    fn empty_edit() -> RegionEdit {
1684        RegionEdit {
1685            files_to_add: Vec::new(),
1686            files_to_remove: Vec::new(),
1687            timestamp_ms: None,
1688            compaction_time_window: None,
1689            flushed_entry_id: None,
1690            flushed_sequence: None,
1691            committed_sequence: None,
1692        }
1693    }
1694
1695    #[tokio::test]
1696    async fn test_compaction_update_manifest_allows_editing_state() {
1697        let env = SchedulerEnv::new().await;
1698        let region = build_test_region(&env).await;
1699        region.set_editing(RegionLeaderState::Writable).unwrap();
1700
1701        let file_id = FileId::random();
1702        let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(RegionEdit {
1703            files_to_add: vec![crate::sst::file::FileMeta {
1704                region_id: region.region_id,
1705                file_id,
1706                level: 1,
1707                ..Default::default()
1708            }],
1709            files_to_remove: Vec::new(),
1710            timestamp_ms: None,
1711            compaction_time_window: None,
1712            flushed_entry_id: None,
1713            flushed_sequence: None,
1714            committed_sequence: None,
1715        }));
1716
1717        region
1718            .manifest_ctx
1719            .update_manifest_for_compaction(action_list)
1720            .await
1721            .unwrap();
1722
1723        assert!(
1724            region
1725                .manifest_ctx
1726                .manifest()
1727                .await
1728                .files
1729                .contains_key(&file_id)
1730        );
1731    }
1732
1733    #[tokio::test]
1734    async fn test_exit_staging_partition_expr_change_and_edit_success() {
1735        let env = SchedulerEnv::new().await;
1736        let region = build_test_region(&env).await;
1737
1738        let mut manager = region.manifest_ctx.manifest_manager.write().await;
1739        region.set_staging(&mut manager).await.unwrap();
1740        manager
1741            .update(
1742                RegionMetaActionList::new(vec![
1743                    RegionMetaAction::PartitionExprChange(RegionPartitionExprChange {
1744                        partition_expr: Some("expr_a".to_string()),
1745                    }),
1746                    RegionMetaAction::Edit(empty_edit()),
1747                ]),
1748                true,
1749            )
1750            .await
1751            .unwrap();
1752
1753        region.exit_staging_on_success(&mut manager).await.unwrap();
1754        drop(manager);
1755
1756        assert_eq!(
1757            region.version().metadata.partition_expr.as_deref(),
1758            Some("expr_a")
1759        );
1760        assert_eq!(
1761            region.state(),
1762            RegionRoleState::Leader(RegionLeaderState::Writable)
1763        );
1764    }
1765
1766    #[tokio::test]
1767    async fn test_exit_staging_change_with_same_columns_success() {
1768        let env = SchedulerEnv::new().await;
1769        let region = build_test_region(&env).await;
1770
1771        let mut manager = region.manifest_ctx.manifest_manager.write().await;
1772        region.set_staging(&mut manager).await.unwrap();
1773
1774        let mut changed_metadata = region.version().metadata.as_ref().clone();
1775        changed_metadata.set_partition_expr(Some("expr_b".to_string()));
1776
1777        manager
1778            .update(
1779                RegionMetaActionList::new(vec![
1780                    RegionMetaAction::Change(RegionChange {
1781                        metadata: Arc::new(changed_metadata),
1782                        sst_format: FormatType::PrimaryKey,
1783                        append_mode: None,
1784                    }),
1785                    RegionMetaAction::Edit(empty_edit()),
1786                ]),
1787                true,
1788            )
1789            .await
1790            .unwrap();
1791
1792        region.exit_staging_on_success(&mut manager).await.unwrap();
1793        drop(manager);
1794
1795        assert_eq!(
1796            region.version().metadata.partition_expr.as_deref(),
1797            Some("expr_b")
1798        );
1799        assert_eq!(
1800            region.state(),
1801            RegionRoleState::Leader(RegionLeaderState::Writable)
1802        );
1803    }
1804
1805    #[tokio::test]
1806    async fn test_exit_staging_change_with_different_columns_fails() {
1807        let env = SchedulerEnv::new().await;
1808        let region = build_test_region(&env).await;
1809
1810        let mut manager = region.manifest_ctx.manifest_manager.write().await;
1811        region.set_staging(&mut manager).await.unwrap();
1812
1813        let mut changed_metadata = region.version().metadata.as_ref().clone();
1814        changed_metadata.column_metadatas.rotate_left(1);
1815
1816        manager
1817            .update(
1818                RegionMetaActionList::new(vec![
1819                    RegionMetaAction::Change(RegionChange {
1820                        metadata: Arc::new(changed_metadata),
1821                        sst_format: FormatType::PrimaryKey,
1822                        append_mode: None,
1823                    }),
1824                    RegionMetaAction::Edit(empty_edit()),
1825                ]),
1826                true,
1827            )
1828            .await
1829            .unwrap();
1830
1831        let result = region.exit_staging_on_success(&mut manager).await;
1832        assert!(matches!(result, Err(Error::Unexpected { .. })));
1833    }
1834
1835    #[tokio::test]
1836    async fn test_exit_staging_partition_expr_change_and_change_conflict_fails() {
1837        let env = SchedulerEnv::new().await;
1838        let region = build_test_region(&env).await;
1839
1840        let mut manager = region.manifest_ctx.manifest_manager.write().await;
1841        region.set_staging(&mut manager).await.unwrap();
1842
1843        let mut changed_metadata = region.version().metadata.as_ref().clone();
1844        changed_metadata.set_partition_expr(Some("expr_c".to_string()));
1845
1846        manager
1847            .update(
1848                RegionMetaActionList::new(vec![
1849                    RegionMetaAction::PartitionExprChange(RegionPartitionExprChange {
1850                        partition_expr: Some("expr_c".to_string()),
1851                    }),
1852                    RegionMetaAction::Change(RegionChange {
1853                        metadata: Arc::new(changed_metadata),
1854                        sst_format: FormatType::PrimaryKey,
1855                        append_mode: None,
1856                    }),
1857                    RegionMetaAction::Edit(empty_edit()),
1858                ]),
1859                true,
1860            )
1861            .await
1862            .unwrap();
1863
1864        let result = region.exit_staging_on_success(&mut manager).await;
1865        assert!(matches!(result, Err(Error::Unexpected { .. })));
1866    }
1867
1868    #[tokio::test]
1869    async fn test_set_region_state() {
1870        let env = SchedulerEnv::new().await;
1871        let builder = VersionControlBuilder::new();
1872        let version_control = Arc::new(builder.build());
1873        let manifest_ctx = env
1874            .mock_manifest_context(version_control.current().version.metadata.clone())
1875            .await;
1876
1877        let region_id = RegionId::new(1024, 0);
1878        // Leader -> Follower
1879        manifest_ctx.set_role(RegionRole::Follower, region_id);
1880        assert_eq!(manifest_ctx.state.load(), RegionRoleState::Follower);
1881
1882        // Follower -> Leader
1883        manifest_ctx.set_role(RegionRole::Leader, region_id);
1884        assert_eq!(
1885            manifest_ctx.state.load(),
1886            RegionRoleState::Leader(RegionLeaderState::Writable)
1887        );
1888
1889        // Direct Leader -> StagingLeader should be ignored.
1890        manifest_ctx.set_role(RegionRole::StagingLeader, region_id);
1891        assert_eq!(
1892            manifest_ctx.state.load(),
1893            RegionRoleState::Leader(RegionLeaderState::Writable)
1894        );
1895
1896        // Leader -> Downgrading Leader
1897        manifest_ctx.set_role(RegionRole::DowngradingLeader, region_id);
1898        assert_eq!(
1899            manifest_ctx.state.load(),
1900            RegionRoleState::Leader(RegionLeaderState::Downgrading)
1901        );
1902
1903        // Downgrading Leader -> Follower
1904        manifest_ctx.set_role(RegionRole::Follower, region_id);
1905        assert_eq!(manifest_ctx.state.load(), RegionRoleState::Follower);
1906
1907        // Can't downgrade from follower (Follower -> Downgrading Leader)
1908        manifest_ctx.set_role(RegionRole::DowngradingLeader, region_id);
1909        assert_eq!(manifest_ctx.state.load(), RegionRoleState::Follower);
1910
1911        // Set region role too Downgrading Leader
1912        manifest_ctx.set_role(RegionRole::Leader, region_id);
1913        manifest_ctx.set_role(RegionRole::DowngradingLeader, region_id);
1914        assert_eq!(
1915            manifest_ctx.state.load(),
1916            RegionRoleState::Leader(RegionLeaderState::Downgrading)
1917        );
1918
1919        // Downgrading Leader -> Leader
1920        manifest_ctx.set_role(RegionRole::Leader, region_id);
1921        assert_eq!(
1922            manifest_ctx.state.load(),
1923            RegionRoleState::Leader(RegionLeaderState::Writable)
1924        );
1925    }
1926
1927    #[tokio::test]
1928    async fn test_staging_state_validation() {
1929        let env = SchedulerEnv::new().await;
1930        let builder = VersionControlBuilder::new();
1931        let version_control = Arc::new(builder.build());
1932
1933        // Create context with staging state using the correct pattern from SchedulerEnv
1934        let staging_ctx = {
1935            let manager = RegionManifestManager::new(
1936                version_control.current().version.metadata.clone(),
1937                0,
1938                RegionManifestOptions {
1939                    manifest_dir: "".to_string(),
1940                    object_store: env.access_layer.object_store().clone(),
1941                    compress_type: CompressionType::Uncompressed,
1942                    checkpoint_distance: 10,
1943                    remove_file_options: Default::default(),
1944                    manifest_cache: None,
1945                },
1946                FormatType::PrimaryKey,
1947                &Default::default(),
1948            )
1949            .await
1950            .unwrap();
1951            Arc::new(ManifestContext::new(
1952                manager,
1953                RegionRoleState::Leader(RegionLeaderState::Staging),
1954            ))
1955        };
1956
1957        // Test staging state behavior
1958        assert_eq!(
1959            staging_ctx.current_state(),
1960            RegionRoleState::Leader(RegionLeaderState::Staging)
1961        );
1962
1963        // Test writable context for comparison
1964        let writable_ctx = env
1965            .mock_manifest_context(version_control.current().version.metadata.clone())
1966            .await;
1967
1968        assert_eq!(
1969            writable_ctx.current_state(),
1970            RegionRoleState::Leader(RegionLeaderState::Writable)
1971        );
1972    }
1973
1974    #[tokio::test]
1975    async fn test_staging_state_transitions() {
1976        let builder = VersionControlBuilder::new();
1977        let version_control = Arc::new(builder.build());
1978        let metadata = version_control.current().version.metadata.clone();
1979
1980        // Create MitoRegion for testing state transitions
1981        let temp_dir = create_temp_dir("");
1982        let path_str = temp_dir.path().display().to_string();
1983        let fs_builder = Fs::default().root(&path_str);
1984        let object_store = ObjectStore::new(fs_builder).unwrap().finish();
1985
1986        let index_aux_path = temp_dir.path().join("index_aux");
1987        let puffin_mgr = PuffinManagerFactory::new(&index_aux_path, 4096, None, None)
1988            .await
1989            .unwrap();
1990        let intm_mgr = IntermediateManager::init_fs(index_aux_path.to_str().unwrap())
1991            .await
1992            .unwrap();
1993
1994        let access_layer = Arc::new(AccessLayer::new(
1995            "",
1996            PathType::Bare,
1997            object_store,
1998            puffin_mgr,
1999            intm_mgr,
2000        ));
2001
2002        let manager = RegionManifestManager::new(
2003            metadata.clone(),
2004            0,
2005            RegionManifestOptions {
2006                manifest_dir: "".to_string(),
2007                object_store: access_layer.object_store().clone(),
2008                compress_type: CompressionType::Uncompressed,
2009                checkpoint_distance: 10,
2010                remove_file_options: Default::default(),
2011                manifest_cache: None,
2012            },
2013            FormatType::PrimaryKey,
2014            &Default::default(),
2015        )
2016        .await
2017        .unwrap();
2018
2019        let manifest_ctx = Arc::new(ManifestContext::new(
2020            manager,
2021            RegionRoleState::Leader(RegionLeaderState::Writable),
2022        ));
2023
2024        let region = MitoRegion {
2025            region_id: metadata.region_id,
2026            version_control,
2027            access_layer,
2028            manifest_ctx: manifest_ctx.clone(),
2029            file_purger: crate::test_util::new_noop_file_purger(),
2030            provider: Provider::noop_provider(),
2031            last_flush_millis: Default::default(),
2032            last_compaction_millis: Default::default(),
2033            time_provider: Arc::new(StdTimeProvider),
2034            topic_latest_entry_id: Default::default(),
2035            written_bytes: Arc::new(AtomicU64::new(0)),
2036            stats: ManifestStats::default(),
2037        };
2038
2039        // Test initial state
2040        assert_eq!(
2041            region.state(),
2042            RegionRoleState::Leader(RegionLeaderState::Writable)
2043        );
2044        assert!(!region.is_staging());
2045
2046        // Test transition to staging
2047        let mut manager = manifest_ctx.manifest_manager.write().await;
2048        region.set_staging(&mut manager).await.unwrap();
2049        drop(manager);
2050        assert_eq!(
2051            region.state(),
2052            RegionRoleState::Leader(RegionLeaderState::Staging)
2053        );
2054        assert!(region.is_staging());
2055
2056        // Test transition back to writable
2057        region.exit_staging().unwrap();
2058        assert_eq!(
2059            region.state(),
2060            RegionRoleState::Leader(RegionLeaderState::Writable)
2061        );
2062        assert!(!region.is_staging());
2063
2064        // Test staging directory cleanup: Create dirty staging files before entering staging mode
2065        {
2066            // Create some dummy staging manifest files to simulate interrupted session
2067            let manager = manifest_ctx.manifest_manager.write().await;
2068            let dummy_actions = RegionMetaActionList::new(vec![]);
2069            let dummy_bytes = dummy_actions.encode().unwrap();
2070
2071            // Create dirty staging files with versions 100 and 101
2072            manager.store().save(100, &dummy_bytes, true).await.unwrap();
2073            manager.store().save(101, &dummy_bytes, true).await.unwrap();
2074            drop(manager);
2075
2076            // Verify dirty files exist before entering staging
2077            let manager = manifest_ctx.manifest_manager.read().await;
2078            let dirty_manifests = manager.store().fetch_staging_manifests().await.unwrap();
2079            assert_eq!(
2080                dirty_manifests.len(),
2081                2,
2082                "Should have 2 dirty staging files"
2083            );
2084            drop(manager);
2085
2086            // Enter staging mode - this should clean up the dirty files
2087            let mut manager = manifest_ctx.manifest_manager.write().await;
2088            region.set_staging(&mut manager).await.unwrap();
2089            drop(manager);
2090
2091            // Verify dirty files are cleaned up after entering staging
2092            let manager = manifest_ctx.manifest_manager.read().await;
2093            let cleaned_manifests = manager.store().fetch_staging_manifests().await.unwrap();
2094            assert_eq!(
2095                cleaned_manifests.len(),
2096                0,
2097                "Dirty staging files should be cleaned up"
2098            );
2099            drop(manager);
2100
2101            // Exit staging to restore normal state for remaining tests
2102            region.exit_staging().unwrap();
2103        }
2104
2105        // Test invalid transitions
2106        let mut manager = manifest_ctx.manifest_manager.write().await;
2107        assert!(region.set_staging(&mut manager).await.is_ok()); // Writable -> Staging should work
2108        drop(manager);
2109        let mut manager = manifest_ctx.manifest_manager.write().await;
2110        assert!(region.set_staging(&mut manager).await.is_err()); // Staging -> Staging should fail
2111        drop(manager);
2112        assert!(region.exit_staging().is_ok()); // Staging -> Writable should work
2113        assert!(region.exit_staging().is_err()); // Writable -> Writable should fail
2114    }
2115}