1pub mod catchup;
18pub mod opener;
19pub mod options;
20pub mod utils;
21pub(crate) mod version;
22
23use std::collections::hash_map::Entry;
24use std::collections::{HashMap, HashSet};
25use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
26use std::sync::{Arc, Mutex, RwLock};
27
28use common_base::hash::partition_expr_version;
29use common_telemetry::{error, info, warn};
30use crossbeam_utils::atomic::AtomicCell;
31use partition::expr::PartitionExpr;
32use snafu::{OptionExt, ResultExt, ensure};
33use store_api::ManifestVersion;
34use store_api::codec::PrimaryKeyEncoding;
35use store_api::logstore::provider::Provider;
36use store_api::metadata::RegionMetadataRef;
37use store_api::region_engine::{
38 RegionManifestInfo, RegionRole, RegionStatistic, SettableRegionRoleState,
39};
40use store_api::region_request::{PathType, StagingPartitionDirective};
41use store_api::sst_entry::ManifestSstEntry;
42use store_api::storage::{FileId, RegionId, SequenceNumber};
43use tokio::sync::RwLockWriteGuard;
44pub use utils::*;
45
46use crate::access_layer::AccessLayerRef;
47use crate::error::{
48 InvalidPartitionExprSnafu, RegionNotFoundSnafu, RegionStateSnafu, RegionTruncatedSnafu, Result,
49 UnexpectedSnafu, UpdateManifestSnafu,
50};
51use crate::manifest::action::{
52 RegionChange, RegionManifest, RegionMetaAction, RegionMetaActionList,
53};
54use crate::manifest::manager::RegionManifestManager;
55use crate::region::version::{VersionControlRef, VersionRef};
56use crate::request::{OnFailure, OptionOutputTx};
57use crate::sst::file::FileMeta;
58use crate::sst::file_purger::FilePurgerRef;
59use crate::sst::location::{index_file_path, sst_file_path};
60use crate::time_provider::TimeProviderRef;
61
62const ESTIMATED_WAL_FACTOR: f32 = 0.42825;
64
65#[derive(Debug)]
67pub struct RegionUsage {
68 pub region_id: RegionId,
69 pub wal_usage: u64,
70 pub sst_usage: u64,
71 pub manifest_usage: u64,
72}
73
74impl RegionUsage {
75 pub fn disk_usage(&self) -> u64 {
76 self.wal_usage + self.sst_usage + self.manifest_usage
77 }
78}
79
80#[derive(Debug, Clone, Copy, PartialEq, Eq)]
81pub enum RegionLeaderState {
82 Writable,
84 Staging,
86 EnteringStaging,
88 Altering,
90 Dropping,
92 Truncating,
94 Editing,
96 Downgrading,
98}
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum RegionRoleState {
102 Leader(RegionLeaderState),
103 Follower,
104}
105
106impl RegionRoleState {
107 pub fn into_leader_state(self) -> Option<RegionLeaderState> {
109 match self {
110 RegionRoleState::Leader(leader_state) => Some(leader_state),
111 RegionRoleState::Follower => None,
112 }
113 }
114}
115
116#[derive(Debug)]
122pub struct MitoRegion {
123 pub(crate) region_id: RegionId,
128
129 pub(crate) version_control: VersionControlRef,
133 pub(crate) access_layer: AccessLayerRef,
135 pub(crate) manifest_ctx: ManifestContextRef,
137 pub(crate) file_purger: FilePurgerRef,
139 pub(crate) provider: Provider,
141 last_flush_millis: AtomicI64,
143 last_compaction_millis: AtomicI64,
145 time_provider: TimeProviderRef,
147 pub(crate) topic_latest_entry_id: AtomicU64,
157 pub(crate) written_bytes: Arc<AtomicU64>,
159 stats: ManifestStats,
161}
162
163pub type MitoRegionRef = Arc<MitoRegion>;
164
165#[derive(Debug, Clone)]
166pub(crate) struct StagingPartitionInfo {
167 pub(crate) partition_directive: StagingPartitionDirective,
168 pub(crate) partition_rule_version: u64,
169}
170
171impl StagingPartitionInfo {
172 pub(crate) fn partition_expr(&self) -> Option<&str> {
174 self.partition_directive.partition_expr()
175 }
176
177 pub(crate) fn from_partition_directive(partition_directive: StagingPartitionDirective) -> Self {
179 let partition_rule_version = match &partition_directive {
180 StagingPartitionDirective::UpdatePartitionExpr(expr) => {
181 partition_expr_version(Some(expr))
182 }
183 StagingPartitionDirective::RejectAllWrites => 0,
184 };
185 Self {
186 partition_directive,
187 partition_rule_version,
188 }
189 }
190}
191
192impl MitoRegion {
193 pub(crate) async fn stop(&self) {
195 self.manifest_ctx
196 .manifest_manager
197 .write()
198 .await
199 .stop()
200 .await;
201
202 info!(
203 "Stopped region manifest manager, region_id: {}",
204 self.region_id
205 );
206 }
207
208 pub fn metadata(&self) -> RegionMetadataRef {
210 let version_data = self.version_control.current();
211 version_data.version.metadata.clone()
212 }
213
214 pub(crate) fn primary_key_encoding(&self) -> PrimaryKeyEncoding {
216 let version_data = self.version_control.current();
217 version_data.version.metadata.primary_key_encoding
218 }
219
220 pub(crate) fn version(&self) -> VersionRef {
222 let version_data = self.version_control.current();
223 version_data.version
224 }
225
226 pub(crate) fn last_flush_millis(&self) -> i64 {
228 self.last_flush_millis.load(Ordering::Relaxed)
229 }
230
231 pub(crate) fn update_flush_millis(&self) {
233 let now = self.time_provider.current_time_millis();
234 self.last_flush_millis.store(now, Ordering::Relaxed);
235 }
236
237 pub(crate) fn last_compaction_millis(&self) -> i64 {
239 self.last_compaction_millis.load(Ordering::Relaxed)
240 }
241
242 pub(crate) fn update_compaction_millis(&self) {
244 let now = self.time_provider.current_time_millis();
245 self.last_compaction_millis.store(now, Ordering::Relaxed);
246 }
247
248 pub(crate) fn table_dir(&self) -> &str {
250 self.access_layer.table_dir()
251 }
252
253 pub(crate) fn path_type(&self) -> PathType {
255 self.access_layer.path_type()
256 }
257
258 pub(crate) fn is_writable(&self) -> bool {
260 matches!(
261 self.manifest_ctx.state.load(),
262 RegionRoleState::Leader(RegionLeaderState::Writable)
263 | RegionRoleState::Leader(RegionLeaderState::Staging)
264 )
265 }
266
267 pub(crate) fn is_flushable(&self) -> bool {
269 matches!(
270 self.manifest_ctx.state.load(),
271 RegionRoleState::Leader(RegionLeaderState::Writable)
272 | RegionRoleState::Leader(RegionLeaderState::Staging)
273 | RegionRoleState::Leader(RegionLeaderState::Downgrading)
274 )
275 }
276
277 pub(crate) fn should_abort_index(&self) -> bool {
279 matches!(
280 self.manifest_ctx.state.load(),
281 RegionRoleState::Follower
282 | RegionRoleState::Leader(RegionLeaderState::Dropping)
283 | RegionRoleState::Leader(RegionLeaderState::Truncating)
284 | RegionRoleState::Leader(RegionLeaderState::Downgrading)
285 | RegionRoleState::Leader(RegionLeaderState::Staging)
286 )
287 }
288
289 pub(crate) fn is_downgrading(&self) -> bool {
291 matches!(
292 self.manifest_ctx.state.load(),
293 RegionRoleState::Leader(RegionLeaderState::Downgrading)
294 )
295 }
296
297 pub(crate) fn is_staging(&self) -> bool {
299 self.manifest_ctx.state.load() == RegionRoleState::Leader(RegionLeaderState::Staging)
300 }
301
302 pub(crate) fn is_enter_staging(&self) -> bool {
304 self.manifest_ctx.state.load()
305 == RegionRoleState::Leader(RegionLeaderState::EnteringStaging)
306 }
307
308 pub fn region_id(&self) -> RegionId {
309 self.region_id
310 }
311
312 pub fn find_committed_sequence(&self) -> SequenceNumber {
313 self.version_control.committed_sequence()
314 }
315
316 pub fn flushed_sequence(&self) -> SequenceNumber {
322 self.version_control.current().version.flushed_sequence
323 }
324
325 pub fn is_follower(&self) -> bool {
327 self.manifest_ctx.state.load() == RegionRoleState::Follower
328 }
329
330 pub(crate) fn state(&self) -> RegionRoleState {
332 self.manifest_ctx.state.load()
333 }
334
335 pub(crate) fn set_role(&self, next_role: RegionRole) {
337 self.manifest_ctx.set_role(next_role, self.region_id);
338 }
339
340 pub(crate) fn region_role(&self) -> RegionRole {
341 match self.state() {
342 RegionRoleState::Follower => RegionRole::Follower,
343 RegionRoleState::Leader(RegionLeaderState::Staging) => RegionRole::StagingLeader,
344 RegionRoleState::Leader(RegionLeaderState::Downgrading) => {
345 RegionRole::DowngradingLeader
346 }
347 RegionRoleState::Leader(_) => RegionRole::Leader,
348 }
349 }
350
351 pub(crate) fn set_altering(&self) -> Result<()> {
354 self.compare_exchange_state(
355 RegionLeaderState::Writable,
356 RegionRoleState::Leader(RegionLeaderState::Altering),
357 )
358 }
359
360 pub(crate) fn set_dropping(&self, expect: RegionLeaderState) -> Result<()> {
363 self.compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Dropping))
364 }
365
366 pub(crate) fn set_truncating(&self) -> Result<()> {
369 self.compare_exchange_state(
370 RegionLeaderState::Writable,
371 RegionRoleState::Leader(RegionLeaderState::Truncating),
372 )
373 }
374
375 pub(crate) fn set_editing(&self, expect: RegionLeaderState) -> Result<()> {
378 self.compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Editing))
379 }
380
381 pub(crate) async fn set_staging(
387 &self,
388 manager: &mut RwLockWriteGuard<'_, RegionManifestManager>,
389 ) -> Result<()> {
390 manager.store().clear_staging_manifests().await?;
391
392 self.compare_exchange_state(
393 RegionLeaderState::Writable,
394 RegionRoleState::Leader(RegionLeaderState::Staging),
395 )
396 }
397
398 pub(crate) fn set_entering_staging(&self) -> Result<()> {
400 self.compare_exchange_state(
401 RegionLeaderState::Writable,
402 RegionRoleState::Leader(RegionLeaderState::EnteringStaging),
403 )
404 }
405
406 pub fn exit_staging(&self) -> Result<()> {
411 self.manifest_ctx.exit_staging(
412 self.region_id,
413 RegionRoleState::Leader(RegionLeaderState::Writable),
414 )
415 }
416
417 pub(crate) async fn set_role_state_gracefully(
419 &self,
420 state: SettableRegionRoleState,
421 ) -> Result<()> {
422 let mut manager: RwLockWriteGuard<'_, RegionManifestManager> =
423 self.manifest_ctx.manifest_manager.write().await;
424 let current_state = self.state();
425
426 match state {
427 SettableRegionRoleState::Leader => {
428 match current_state {
431 RegionRoleState::Leader(RegionLeaderState::Staging) => {
432 info!("Exiting staging mode for region {}", self.region_id);
433 self.exit_staging_on_success(&mut manager).await?;
435 }
436 RegionRoleState::Leader(RegionLeaderState::Writable) => {
437 info!("Region {} already in normal leader mode", self.region_id);
439 }
440 _ => {
441 return Err(RegionStateSnafu {
443 region_id: self.region_id,
444 state: current_state,
445 expect: RegionRoleState::Leader(RegionLeaderState::Staging),
446 }
447 .build());
448 }
449 }
450 }
451
452 SettableRegionRoleState::StagingLeader => {
453 match current_state {
456 RegionRoleState::Leader(RegionLeaderState::Writable) => {
457 info!("Entering staging mode for region {}", self.region_id);
458 self.set_staging(&mut manager).await?;
459 }
460 RegionRoleState::Leader(RegionLeaderState::Staging) => {
461 info!("Region {} already in staging mode", self.region_id);
463 }
464 _ => {
465 return Err(RegionStateSnafu {
466 region_id: self.region_id,
467 state: current_state,
468 expect: RegionRoleState::Leader(RegionLeaderState::Writable),
469 }
470 .build());
471 }
472 }
473 }
474
475 SettableRegionRoleState::Follower => {
476 match current_state {
478 RegionRoleState::Leader(RegionLeaderState::Staging) => {
479 info!(
480 "Exiting staging and demoting region {} to follower",
481 self.region_id
482 );
483 self.exit_staging()?;
484 self.set_role(RegionRole::Follower);
485 }
486 RegionRoleState::Leader(_) => {
487 info!("Demoting region {} from leader to follower", self.region_id);
488 self.set_role(RegionRole::Follower);
489 }
490 RegionRoleState::Follower => {
491 info!("Region {} already in follower mode", self.region_id);
493 }
494 }
495 }
496
497 SettableRegionRoleState::DowngradingLeader => {
498 match current_state {
500 RegionRoleState::Leader(RegionLeaderState::Staging) => {
501 info!(
502 "Exiting staging and entering downgrade for region {}",
503 self.region_id
504 );
505 self.exit_staging()?;
506 self.set_role(RegionRole::DowngradingLeader);
507 }
508 RegionRoleState::Leader(RegionLeaderState::Writable) => {
509 info!("Starting downgrade for region {}", self.region_id);
510 self.set_role(RegionRole::DowngradingLeader);
511 }
512 RegionRoleState::Leader(RegionLeaderState::Downgrading) => {
513 info!("Region {} already in downgrading mode", self.region_id);
515 }
516 _ => {
517 warn!(
518 "Cannot start downgrade for region {} from state {:?}",
519 self.region_id, current_state
520 );
521 }
522 }
523 }
524 }
525
526 if self.state() == RegionRoleState::Leader(RegionLeaderState::Writable) {
528 let manifest_meta = &manager.manifest().metadata;
530 let current_version = self.version();
531 let current_meta = ¤t_version.metadata;
532 if manifest_meta.partition_expr.is_none() && current_meta.partition_expr.is_some() {
533 let action = RegionMetaAction::Change(RegionChange {
534 metadata: current_meta.clone(),
535 sst_format: current_version.options.sst_format.unwrap_or_default(),
536 append_mode: None,
537 });
538 let result = manager
539 .update(RegionMetaActionList::with_action(action), false)
540 .await;
541
542 match result {
543 Ok(version) => {
544 info!(
545 "Successfully persisted backfilled metadata for region {}, version: {}",
546 self.region_id, version
547 );
548 }
549 Err(e) => {
550 warn!(e; "Failed to persist backfilled metadata for region {}", self.region_id);
551 }
552 }
553 }
554 }
555
556 drop(manager);
557
558 Ok(())
559 }
560
561 pub(crate) fn switch_state_to_writable(&self, expect: RegionLeaderState) {
564 if let Err(e) = self
565 .compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Writable))
566 {
567 error!(e; "failed to switch region state to writable, expect state is {:?}", expect);
568 }
569 }
570
571 pub(crate) fn switch_state_to_staging(&self, expect: RegionLeaderState) {
574 if let Err(e) =
575 self.compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Staging))
576 {
577 error!(e; "failed to switch region state to staging, expect state is {:?}", expect);
578 }
579 }
580
581 pub(crate) fn region_statistic(&self) -> RegionStatistic {
583 let version = self.version();
584 let memtables = &version.memtables;
585 let memtable_usage = (memtables.mutable_usage() + memtables.immutables_usage()) as u64;
586
587 let sst_usage = version.ssts.sst_usage();
588 let index_usage = version.ssts.index_usage();
589 let flushed_entry_id = version.flushed_entry_id;
590
591 let wal_usage = self.estimated_wal_usage(memtable_usage);
592 let manifest_usage = self.stats.total_manifest_size();
593 let num_rows = version.ssts.num_rows() + version.memtables.num_rows();
594 let num_files = version.ssts.num_files();
595 let manifest_version = self.stats.manifest_version();
596 let file_removed_cnt = self.stats.file_removed_cnt();
597
598 let topic_latest_entry_id = self.topic_latest_entry_id.load(Ordering::Relaxed);
599 let written_bytes = self.written_bytes.load(Ordering::Relaxed);
600
601 RegionStatistic {
602 num_rows,
603 memtable_size: memtable_usage,
604 wal_size: wal_usage,
605 manifest_size: manifest_usage,
606 sst_size: sst_usage,
607 sst_num: num_files,
608 index_size: index_usage,
609 manifest: RegionManifestInfo::Mito {
610 manifest_version,
611 flushed_entry_id,
612 file_removed_cnt,
613 },
614 data_topic_latest_entry_id: topic_latest_entry_id,
615 metadata_topic_latest_entry_id: topic_latest_entry_id,
616 written_bytes,
617 }
618 }
619
620 fn estimated_wal_usage(&self, memtable_usage: u64) -> u64 {
623 ((memtable_usage as f32) * ESTIMATED_WAL_FACTOR) as u64
624 }
625
626 fn compare_exchange_state(
629 &self,
630 expect: RegionLeaderState,
631 state: RegionRoleState,
632 ) -> Result<()> {
633 self.manifest_ctx
634 .state
635 .compare_exchange(RegionRoleState::Leader(expect), state)
636 .map_err(|actual| {
637 RegionStateSnafu {
638 region_id: self.region_id,
639 state: actual,
640 expect: RegionRoleState::Leader(expect),
641 }
642 .build()
643 })?;
644 Ok(())
645 }
646
647 pub fn access_layer(&self) -> AccessLayerRef {
648 self.access_layer.clone()
649 }
650
651 pub async fn manifest_sst_entries(&self) -> Vec<ManifestSstEntry> {
653 let table_dir = self.table_dir();
654 let path_type = self.access_layer.path_type();
655
656 let visible_ssts = self
657 .version()
658 .ssts
659 .levels()
660 .iter()
661 .flat_map(|level| level.files().map(|file| file.file_id().file_id()))
662 .collect::<HashSet<_>>();
663
664 let manifest_files = self.manifest_ctx.manifest().await.files.clone();
665 let staging_files = self
666 .manifest_ctx
667 .staging_manifest()
668 .await
669 .map(|m| m.files.clone())
670 .unwrap_or_default();
671 let files = manifest_files
672 .into_iter()
673 .chain(staging_files)
674 .collect::<HashMap<_, _>>();
675
676 files
677 .values()
678 .map(|meta| {
679 let region_id = self.region_id;
680 let origin_region_id = meta.region_id;
681 let (index_version, index_file_path, index_file_size) = if meta.index_file_size > 0
682 {
683 let index_file_path = index_file_path(table_dir, meta.index_id(), path_type);
684 (
685 meta.index_version,
686 Some(index_file_path),
687 Some(meta.index_file_size),
688 )
689 } else {
690 (0, None, None)
691 };
692 let visible = visible_ssts.contains(&meta.file_id);
693 ManifestSstEntry {
694 table_dir: table_dir.to_string(),
695 region_id,
696 table_id: region_id.table_id(),
697 region_number: region_id.region_number(),
698 region_group: region_id.region_group(),
699 region_sequence: region_id.region_sequence(),
700 file_id: meta.file_id.to_string(),
701 index_version,
702 level: meta.level,
703 file_path: sst_file_path(table_dir, meta.file_id(), path_type),
704 file_size: meta.file_size,
705 index_file_path,
706 index_file_size,
707 num_rows: meta.num_rows,
708 num_row_groups: meta.num_row_groups,
709 num_series: Some(meta.num_series),
710 min_ts: meta.time_range.0,
711 max_ts: meta.time_range.1,
712 sequence: meta.sequence.map(|s| s.get()),
713 origin_region_id,
714 node_id: None,
715 visible,
716 }
717 })
718 .collect()
719 }
720
721 pub async fn file_metas(&self, file_ids: &[FileId]) -> Vec<Option<FileMeta>> {
723 let manifest_files = self.manifest_ctx.manifest().await.files.clone();
724
725 file_ids
726 .iter()
727 .map(|file_id| manifest_files.get(file_id).cloned())
728 .collect::<Vec<_>>()
729 }
730
731 pub(crate) async fn exit_staging_on_success(
733 &self,
734 manager: &mut RwLockWriteGuard<'_, RegionManifestManager>,
735 ) -> Result<()> {
736 let current_state = self.manifest_ctx.current_state();
737 ensure!(
738 current_state == RegionRoleState::Leader(RegionLeaderState::Staging),
739 RegionStateSnafu {
740 region_id: self.region_id,
741 state: current_state,
742 expect: RegionRoleState::Leader(RegionLeaderState::Staging),
743 }
744 );
745
746 let merged_actions = match manager.merge_staged_actions(current_state).await? {
748 Some(actions) => actions,
749 None => {
750 info!(
751 "No staged manifests to merge for region {}, exiting staging mode without changes",
752 self.region_id
753 );
754 self.exit_staging()?;
756 return Ok(());
757 }
758 };
759 let expect_change = merged_actions.actions.iter().any(|a| a.is_change());
760 let expect_partition_expr_change = merged_actions
761 .actions
762 .iter()
763 .any(|a| a.is_partition_expr_change());
764 let expect_edit = merged_actions.actions.iter().any(|a| a.is_edit());
765 ensure!(
766 !(expect_change && expect_partition_expr_change),
767 UnexpectedSnafu {
768 reason: "unexpected both change and partition expr change actions in merged actions"
769 }
770 );
771 ensure!(
772 expect_change || expect_partition_expr_change,
773 UnexpectedSnafu {
774 reason: "expect a change or partition expr change action in merged actions"
775 }
776 );
777 ensure!(
778 expect_edit,
779 UnexpectedSnafu {
780 reason: "expect an edit action in merged actions"
781 }
782 );
783
784 let (merged_partition_expr_change, merged_change, merged_edit) =
785 merged_actions.clone().split_region_change_and_edit();
786 if let Some(change) = &merged_change {
787 let current_column_metadatas = &self.version().metadata.column_metadatas;
791 ensure!(
792 change.metadata.column_metadatas == *current_column_metadatas,
793 UnexpectedSnafu {
794 reason: "change action alters column metadata in staging exit"
795 }
796 );
797 }
798
799 let new_version = manager.update(merged_actions, false).await?;
802 info!(
803 "Successfully submitted merged staged manifests for region {}, new version: {}",
804 self.region_id, new_version
805 );
806
807 if let Some(change) = merged_partition_expr_change {
809 let mut new_metadata = self.version().metadata.as_ref().clone();
810 new_metadata.set_partition_expr(change.partition_expr);
811 self.version_control.alter_metadata(new_metadata.into());
812 }
813 if let Some(change) = merged_change {
814 self.version_control.alter_metadata(change.metadata);
815 }
816 self.version_control
817 .apply_edit(Some(merged_edit), &[], self.file_purger.clone());
818
819 if let Err(e) = manager.clear_staging_manifest_and_dir().await {
821 error!(e; "Failed to clear staging manifest dir for region {}", self.region_id);
822 }
823 self.exit_staging()?;
824
825 Ok(())
826 }
827
828 pub fn maybe_staging_partition_expr_str(&self) -> Option<String> {
834 let is_staging = self.is_staging();
835 if is_staging {
836 let staging_partition_info = self.manifest_ctx.staging_partition_info();
837 if staging_partition_info.is_none() {
838 warn!(
839 "Staging partition expr is none for region {} in staging state",
840 self.region_id
841 );
842 }
843 staging_partition_info
844 .as_ref()
845 .and_then(|info| info.partition_expr().map(ToString::to_string))
846 } else {
847 let version = self.version();
848 version.metadata.partition_expr.clone()
849 }
850 }
851
852 pub fn expected_partition_expr_version(&self) -> u64 {
853 if self.is_staging() {
854 self.manifest_ctx
855 .staging_partition_info()
856 .as_ref()
857 .map(|info| info.partition_rule_version)
858 .unwrap_or_default()
859 } else {
860 self.version().metadata.partition_expr_version
861 }
862 }
863
864 pub(crate) fn reject_all_writes_in_staging(&self) -> bool {
866 if !self.is_staging() {
867 return false;
868 }
869 self.manifest_ctx
870 .staging_partition_info()
871 .as_ref()
872 .map(|info| {
873 matches!(
874 info.partition_directive,
875 StagingPartitionDirective::RejectAllWrites
876 )
877 })
878 .unwrap_or(false)
879 }
880}
881
882#[derive(Debug)]
884pub(crate) struct ManifestContext {
885 pub(crate) manifest_manager: tokio::sync::RwLock<RegionManifestManager>,
887 state: AtomicCell<RegionRoleState>,
890 staging_partition_info: Mutex<Option<StagingPartitionInfo>>,
895}
896
897impl ManifestContext {
898 pub(crate) fn new(manager: RegionManifestManager, state: RegionRoleState) -> Self {
899 ManifestContext {
900 manifest_manager: tokio::sync::RwLock::new(manager),
901 state: AtomicCell::new(state),
902 staging_partition_info: Mutex::new(None),
903 }
904 }
905
906 pub(crate) fn staging_partition_info(&self) -> Option<StagingPartitionInfo> {
907 self.staging_partition_info.lock().unwrap().clone()
908 }
909
910 pub(crate) fn set_staging_partition_info(&self, staging_partition_info: StagingPartitionInfo) {
911 let mut current = self.staging_partition_info.lock().unwrap();
912 debug_assert!(current.is_none());
913 *current = Some(staging_partition_info);
914 }
915
916 fn clear_staging_partition_info(&self) {
917 *self.staging_partition_info.lock().unwrap() = None;
918 }
919
920 pub(crate) fn exit_staging(
921 &self,
922 region_id: RegionId,
923 next_state: RegionRoleState,
924 ) -> Result<()> {
925 self.state
926 .compare_exchange(
927 RegionRoleState::Leader(RegionLeaderState::Staging),
928 next_state,
929 )
930 .map_err(|actual| {
931 RegionStateSnafu {
932 region_id,
933 state: actual,
934 expect: RegionRoleState::Leader(RegionLeaderState::Staging),
935 }
936 .build()
937 })?;
938 self.clear_staging_partition_info();
939 Ok(())
940 }
941
942 pub(crate) async fn manifest_version(&self) -> ManifestVersion {
943 self.manifest_manager
944 .read()
945 .await
946 .manifest()
947 .manifest_version
948 }
949
950 pub(crate) async fn has_update(&self) -> Result<bool> {
951 self.manifest_manager.read().await.has_update().await
952 }
953
954 pub(crate) fn current_state(&self) -> RegionRoleState {
956 self.state.load()
957 }
958
959 pub(crate) async fn install_manifest_to(
965 &self,
966 version: ManifestVersion,
967 ) -> Result<Arc<RegionManifest>> {
968 let mut manager = self.manifest_manager.write().await;
969 manager.install_manifest_to(version).await?;
970
971 Ok(manager.manifest())
972 }
973
974 pub(crate) async fn update_manifest(
976 &self,
977 expect_state: RegionLeaderState,
978 action_list: RegionMetaActionList,
979 is_staging: bool,
980 ) -> Result<ManifestVersion> {
981 self.update_manifest_with_state_check(action_list, is_staging, |current_state, region_id| {
982 if expect_state != RegionLeaderState::Downgrading {
987 if current_state == RegionRoleState::Leader(RegionLeaderState::Downgrading) {
988 info!(
989 "Region {} is in downgrading leader state, updating manifest. Expect state is {:?}",
990 region_id, expect_state
991 );
992 }
993 ensure!(
994 current_state == RegionRoleState::Leader(expect_state)
995 || current_state == RegionRoleState::Leader(RegionLeaderState::Downgrading),
996 UpdateManifestSnafu {
997 region_id,
998 state: current_state,
999 }
1000 );
1001 } else {
1002 ensure!(
1003 current_state == RegionRoleState::Leader(expect_state),
1004 RegionStateSnafu {
1005 region_id,
1006 state: current_state,
1007 expect: RegionRoleState::Leader(expect_state),
1008 }
1009 );
1010 }
1011
1012 Ok(())
1013 })
1014 .await
1015 }
1016
1017 pub(crate) async fn update_manifest_for_compaction(
1034 &self,
1035 action_list: RegionMetaActionList,
1036 ) -> Result<ManifestVersion> {
1037 self.update_manifest_with_state_check(action_list, false, |current_state, region_id| {
1038 ensure!(
1039 matches!(
1040 current_state,
1041 RegionRoleState::Leader(RegionLeaderState::Writable)
1042 | RegionRoleState::Leader(RegionLeaderState::Editing)
1043 | RegionRoleState::Leader(RegionLeaderState::Downgrading)
1044 ),
1045 UpdateManifestSnafu {
1046 region_id,
1047 state: current_state,
1048 }
1049 );
1050
1051 Ok(())
1052 })
1053 .await
1054 }
1055
1056 async fn update_manifest_with_state_check(
1057 &self,
1058 action_list: RegionMetaActionList,
1059 is_staging: bool,
1060 check_state: impl FnOnce(RegionRoleState, RegionId) -> Result<()>,
1061 ) -> Result<ManifestVersion> {
1062 let mut manager = self.manifest_manager.write().await;
1064 let manifest = manager.manifest();
1066 let current_state = self.state.load();
1069 check_state(current_state, manifest.metadata.region_id)?;
1070
1071 for action in &action_list.actions {
1072 let RegionMetaAction::Edit(edit) = &action else {
1074 continue;
1075 };
1076
1077 let Some(truncated_entry_id) = manifest.truncated_entry_id else {
1079 continue;
1080 };
1081
1082 if let Some(flushed_entry_id) = edit.flushed_entry_id {
1084 let is_newer_entry = truncated_entry_id < flushed_entry_id;
1094 let is_same_entry_with_newer_sequence = truncated_entry_id == flushed_entry_id
1095 && edit.flushed_sequence.is_some_and(|flushed_sequence| {
1096 manifest.flushed_sequence < flushed_sequence
1097 });
1098
1099 ensure!(
1100 is_newer_entry || is_same_entry_with_newer_sequence,
1101 RegionTruncatedSnafu {
1102 region_id: manifest.metadata.region_id,
1103 }
1104 );
1105 }
1106
1107 if !edit.files_to_remove.is_empty() {
1109 for file in &edit.files_to_remove {
1111 ensure!(
1112 manifest.files.contains_key(&file.file_id),
1113 RegionTruncatedSnafu {
1114 region_id: manifest.metadata.region_id,
1115 }
1116 );
1117 }
1118 }
1119 }
1120
1121 let version = manager.update(action_list, is_staging).await.inspect_err(
1123 |e| error!(e; "Failed to update manifest, region_id: {}", manifest.metadata.region_id),
1124 )?;
1125
1126 if self.state.load() == RegionRoleState::Follower {
1127 warn!(
1128 "Region {} becomes follower while updating manifest which may cause inconsistency, manifest version: {version}",
1129 manifest.metadata.region_id
1130 );
1131 }
1132
1133 Ok(version)
1134 }
1135
1136 pub(crate) fn set_role(&self, next_role: RegionRole, region_id: RegionId) {
1170 match next_role {
1171 RegionRole::Follower => {
1172 if self
1173 .exit_staging(region_id, RegionRoleState::Follower)
1174 .is_ok()
1175 {
1176 info!(
1177 "Convert region {} to follower, previous role state: {:?}",
1178 region_id,
1179 RegionRoleState::Leader(RegionLeaderState::Staging)
1180 );
1181 return;
1182 }
1183 match self.state.fetch_update(|state| {
1184 if !matches!(state, RegionRoleState::Follower) {
1185 Some(RegionRoleState::Follower)
1186 } else {
1187 None
1188 }
1189 }) {
1190 Ok(state) => info!(
1191 "Convert region {} to follower, previous role state: {:?}",
1192 region_id, state
1193 ),
1194 Err(state) => {
1195 if state != RegionRoleState::Follower {
1196 warn!(
1197 "Failed to convert region {} to follower, current role state: {:?}",
1198 region_id, state
1199 )
1200 }
1201 }
1202 }
1203 }
1204 RegionRole::Leader => {
1205 if self
1206 .exit_staging(
1207 region_id,
1208 RegionRoleState::Leader(RegionLeaderState::Writable),
1209 )
1210 .is_ok()
1211 {
1212 info!(
1213 "Convert region {} to leader, previous role state: {:?}",
1214 region_id,
1215 RegionRoleState::Leader(RegionLeaderState::Staging)
1216 );
1217 return;
1218 }
1219 match self.state.fetch_update(|state| {
1220 if matches!(
1221 state,
1222 RegionRoleState::Follower
1223 | RegionRoleState::Leader(RegionLeaderState::Downgrading)
1224 ) {
1225 Some(RegionRoleState::Leader(RegionLeaderState::Writable))
1226 } else {
1227 None
1228 }
1229 }) {
1230 Ok(state) => info!(
1231 "Convert region {} to leader, previous role state: {:?}",
1232 region_id, state
1233 ),
1234 Err(state) => {
1235 if state != RegionRoleState::Leader(RegionLeaderState::Writable) {
1236 warn!(
1237 "Failed to convert region {} to leader, current role state: {:?}",
1238 region_id, state
1239 )
1240 }
1241 }
1242 }
1243 }
1244 RegionRole::StagingLeader => {
1245 info!(
1246 "Ignore direct conversion of region {} to staging leader; staging requires the dedicated workflow",
1247 region_id
1248 );
1249 }
1250 RegionRole::DowngradingLeader => {
1251 if self
1252 .exit_staging(
1253 region_id,
1254 RegionRoleState::Leader(RegionLeaderState::Downgrading),
1255 )
1256 .is_ok()
1257 {
1258 info!(
1259 "Convert region {} to downgrading region, previous role state: {:?}",
1260 region_id,
1261 RegionRoleState::Leader(RegionLeaderState::Staging)
1262 );
1263 return;
1264 }
1265 match self.state.compare_exchange(
1266 RegionRoleState::Leader(RegionLeaderState::Writable),
1267 RegionRoleState::Leader(RegionLeaderState::Downgrading),
1268 ) {
1269 Ok(state) => info!(
1270 "Convert region {} to downgrading region, previous role state: {:?}",
1271 region_id, state
1272 ),
1273 Err(state) => {
1274 if state != RegionRoleState::Leader(RegionLeaderState::Downgrading) {
1275 warn!(
1276 "Failed to convert region {} to downgrading leader, current role state: {:?}",
1277 region_id, state
1278 )
1279 }
1280 }
1281 }
1282 }
1283 }
1284 }
1285
1286 pub(crate) async fn manifest(&self) -> Arc<crate::manifest::action::RegionManifest> {
1288 self.manifest_manager.read().await.manifest()
1289 }
1290
1291 pub(crate) async fn staging_manifest(
1293 &self,
1294 ) -> Option<Arc<crate::manifest::action::RegionManifest>> {
1295 self.manifest_manager.read().await.staging_manifest()
1296 }
1297}
1298
1299pub(crate) type ManifestContextRef = Arc<ManifestContext>;
1300
1301#[derive(Debug, Default)]
1303pub(crate) struct RegionMap {
1304 regions: RwLock<HashMap<RegionId, MitoRegionRef>>,
1305}
1306
1307impl RegionMap {
1308 pub(crate) fn is_region_exists(&self, region_id: RegionId) -> bool {
1310 let regions = self.regions.read().unwrap();
1311 regions.contains_key(®ion_id)
1312 }
1313
1314 pub(crate) fn insert_region(&self, region: MitoRegionRef) {
1316 let mut regions = self.regions.write().unwrap();
1317 regions.insert(region.region_id, region);
1318 }
1319
1320 pub(crate) fn get_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
1322 let regions = self.regions.read().unwrap();
1323 regions.get(®ion_id).cloned()
1324 }
1325
1326 pub(crate) fn writable_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1330 let region = self
1331 .get_region(region_id)
1332 .context(RegionNotFoundSnafu { region_id })?;
1333 ensure!(
1334 region.is_writable(),
1335 RegionStateSnafu {
1336 region_id,
1337 state: region.state(),
1338 expect: RegionRoleState::Leader(RegionLeaderState::Writable),
1339 }
1340 );
1341 Ok(region)
1342 }
1343
1344 pub(crate) fn follower_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1348 let region = self
1349 .get_region(region_id)
1350 .context(RegionNotFoundSnafu { region_id })?;
1351 ensure!(
1352 region.is_follower(),
1353 RegionStateSnafu {
1354 region_id,
1355 state: region.state(),
1356 expect: RegionRoleState::Follower,
1357 }
1358 );
1359
1360 Ok(region)
1361 }
1362
1363 pub(crate) fn get_region_or<F: OnFailure>(
1367 &self,
1368 region_id: RegionId,
1369 cb: &mut F,
1370 ) -> Option<MitoRegionRef> {
1371 match self
1372 .get_region(region_id)
1373 .context(RegionNotFoundSnafu { region_id })
1374 {
1375 Ok(region) => Some(region),
1376 Err(e) => {
1377 cb.on_failure(e);
1378 None
1379 }
1380 }
1381 }
1382
1383 pub(crate) fn writable_region_or<F: OnFailure>(
1387 &self,
1388 region_id: RegionId,
1389 cb: &mut F,
1390 ) -> Option<MitoRegionRef> {
1391 match self.writable_region(region_id) {
1392 Ok(region) => Some(region),
1393 Err(e) => {
1394 cb.on_failure(e);
1395 None
1396 }
1397 }
1398 }
1399
1400 pub(crate) fn writable_non_staging_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1404 let region = self.writable_region(region_id)?;
1405 if region.is_staging() {
1406 return Err(crate::error::RegionStateSnafu {
1407 region_id,
1408 state: region.state(),
1409 expect: RegionRoleState::Leader(RegionLeaderState::Writable),
1410 }
1411 .build());
1412 }
1413 Ok(region)
1414 }
1415
1416 pub(crate) fn staging_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1420 let region = self
1421 .get_region(region_id)
1422 .context(RegionNotFoundSnafu { region_id })?;
1423 ensure!(
1424 region.is_staging(),
1425 RegionStateSnafu {
1426 region_id,
1427 state: region.state(),
1428 expect: RegionRoleState::Leader(RegionLeaderState::Staging),
1429 }
1430 );
1431 Ok(region)
1432 }
1433
1434 fn flushable_region(&self, region_id: RegionId) -> Result<Option<MitoRegionRef>> {
1439 let region = self
1440 .get_region(region_id)
1441 .context(RegionNotFoundSnafu { region_id })?;
1442 if region.is_flushable() {
1443 Ok(Some(region))
1444 } else {
1445 Ok(None)
1446 }
1447 }
1448
1449 pub(crate) fn flushable_region_or<F: OnFailure>(
1454 &self,
1455 region_id: RegionId,
1456 cb: &mut F,
1457 ) -> Option<MitoRegionRef> {
1458 match self.flushable_region(region_id) {
1459 Ok(region) => region,
1460 Err(e) => {
1461 cb.on_failure(e);
1462 None
1463 }
1464 }
1465 }
1466
1467 pub(crate) fn remove_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
1469 let mut regions = self.regions.write().unwrap();
1470 regions.remove(®ion_id)
1471 }
1472
1473 pub(crate) fn list_regions(&self) -> Vec<MitoRegionRef> {
1475 let regions = self.regions.read().unwrap();
1476 regions.values().cloned().collect()
1477 }
1478
1479 pub(crate) fn clear(&self) {
1481 self.regions.write().unwrap().clear();
1482 }
1483}
1484
1485pub(crate) type RegionMapRef = Arc<RegionMap>;
1486
1487#[derive(Debug, Default)]
1489pub(crate) struct OpeningRegions {
1490 regions: RwLock<HashMap<RegionId, Vec<OptionOutputTx>>>,
1491}
1492
1493impl OpeningRegions {
1494 pub(crate) fn wait_for_opening_region(
1496 &self,
1497 region_id: RegionId,
1498 sender: OptionOutputTx,
1499 ) -> Option<OptionOutputTx> {
1500 let mut regions = self.regions.write().unwrap();
1501 match regions.entry(region_id) {
1502 Entry::Occupied(mut senders) => {
1503 senders.get_mut().push(sender);
1504 None
1505 }
1506 Entry::Vacant(_) => Some(sender),
1507 }
1508 }
1509
1510 pub(crate) fn is_region_exists(&self, region_id: RegionId) -> bool {
1512 let regions = self.regions.read().unwrap();
1513 regions.contains_key(®ion_id)
1514 }
1515
1516 pub(crate) fn insert_sender(&self, region: RegionId, sender: OptionOutputTx) {
1518 let mut regions = self.regions.write().unwrap();
1519 regions.insert(region, vec![sender]);
1520 }
1521
1522 pub(crate) fn remove_sender(&self, region_id: RegionId) -> Vec<OptionOutputTx> {
1524 let mut regions = self.regions.write().unwrap();
1525 regions.remove(®ion_id).unwrap_or_default()
1526 }
1527
1528 #[cfg(test)]
1529 pub(crate) fn sender_len(&self, region_id: RegionId) -> usize {
1530 let regions = self.regions.read().unwrap();
1531 if let Some(senders) = regions.get(®ion_id) {
1532 senders.len()
1533 } else {
1534 0
1535 }
1536 }
1537}
1538
1539pub(crate) type OpeningRegionsRef = Arc<OpeningRegions>;
1540
1541#[derive(Debug, Default)]
1543pub(crate) struct CatchupRegions {
1544 regions: RwLock<HashSet<RegionId>>,
1545}
1546
1547impl CatchupRegions {
1548 pub(crate) fn is_region_exists(&self, region_id: RegionId) -> bool {
1550 let regions = self.regions.read().unwrap();
1551 regions.contains(®ion_id)
1552 }
1553
1554 pub(crate) fn insert_region(&self, region_id: RegionId) {
1556 let mut regions = self.regions.write().unwrap();
1557 regions.insert(region_id);
1558 }
1559
1560 pub(crate) fn remove_region(&self, region_id: RegionId) {
1562 let mut regions = self.regions.write().unwrap();
1563 regions.remove(®ion_id);
1564 }
1565}
1566
1567pub(crate) type CatchupRegionsRef = Arc<CatchupRegions>;
1568
1569#[derive(Default, Debug, Clone)]
1571pub struct ManifestStats {
1572 pub(crate) total_manifest_size: Arc<AtomicU64>,
1573 pub(crate) manifest_version: Arc<AtomicU64>,
1574 pub(crate) file_removed_cnt: Arc<AtomicU64>,
1575}
1576
1577impl ManifestStats {
1578 fn total_manifest_size(&self) -> u64 {
1579 self.total_manifest_size.load(Ordering::Relaxed)
1580 }
1581
1582 fn manifest_version(&self) -> u64 {
1583 self.manifest_version.load(Ordering::Relaxed)
1584 }
1585
1586 fn file_removed_cnt(&self) -> u64 {
1587 self.file_removed_cnt.load(Ordering::Relaxed)
1588 }
1589}
1590
1591pub fn parse_partition_expr(partition_expr_str: Option<&str>) -> Result<Option<PartitionExpr>> {
1593 match partition_expr_str {
1594 None => Ok(None),
1595 Some("") => Ok(None),
1596 Some(json_str) => {
1597 let expr = partition::expr::PartitionExpr::from_json_str(json_str)
1598 .with_context(|_| InvalidPartitionExprSnafu { expr: json_str })?;
1599 Ok(expr)
1600 }
1601 }
1602}
1603
1604#[cfg(test)]
1605mod tests {
1606 use std::sync::Arc;
1607 use std::sync::atomic::AtomicU64;
1608
1609 use common_datasource::compression::CompressionType;
1610 use common_test_util::temp_dir::create_temp_dir;
1611 use crossbeam_utils::atomic::AtomicCell;
1612 use object_store::ObjectStore;
1613 use object_store::services::Fs;
1614 use store_api::logstore::provider::Provider;
1615 use store_api::region_engine::RegionRole;
1616 use store_api::region_request::PathType;
1617 use store_api::storage::{FileId, RegionId};
1618
1619 use crate::access_layer::AccessLayer;
1620 use crate::error::Error;
1621 use crate::manifest::action::{
1622 RegionChange, RegionEdit, RegionMetaAction, RegionMetaActionList, RegionPartitionExprChange,
1623 };
1624 use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
1625 use crate::region::{
1626 ManifestContext, ManifestStats, MitoRegion, RegionLeaderState, RegionRoleState,
1627 };
1628 use crate::sst::FormatType;
1629 use crate::sst::index::intermediate::IntermediateManager;
1630 use crate::sst::index::puffin_manager::PuffinManagerFactory;
1631 use crate::test_util::scheduler_util::SchedulerEnv;
1632 use crate::test_util::version_util::VersionControlBuilder;
1633 use crate::time_provider::StdTimeProvider;
1634
1635 #[test]
1636 fn test_region_state_lock_free() {
1637 assert!(AtomicCell::<RegionRoleState>::is_lock_free());
1638 }
1639
1640 async fn build_test_region(env: &SchedulerEnv) -> MitoRegion {
1641 let builder = VersionControlBuilder::new();
1642 let version_control = Arc::new(builder.build());
1643 let metadata = version_control.current().version.metadata.clone();
1644
1645 let manager = RegionManifestManager::new(
1646 metadata.clone(),
1647 0,
1648 RegionManifestOptions {
1649 manifest_dir: "".to_string(),
1650 object_store: env.access_layer.object_store().clone(),
1651 compress_type: CompressionType::Uncompressed,
1652 checkpoint_distance: 10,
1653 remove_file_options: Default::default(),
1654 manifest_cache: None,
1655 },
1656 FormatType::PrimaryKey,
1657 &Default::default(),
1658 )
1659 .await
1660 .unwrap();
1661
1662 let manifest_ctx = Arc::new(ManifestContext::new(
1663 manager,
1664 RegionRoleState::Leader(RegionLeaderState::Writable),
1665 ));
1666
1667 MitoRegion {
1668 region_id: metadata.region_id,
1669 version_control,
1670 access_layer: env.access_layer.clone(),
1671 manifest_ctx,
1672 file_purger: crate::test_util::new_noop_file_purger(),
1673 provider: Provider::noop_provider(),
1674 last_flush_millis: Default::default(),
1675 last_compaction_millis: Default::default(),
1676 time_provider: Arc::new(StdTimeProvider),
1677 topic_latest_entry_id: Default::default(),
1678 written_bytes: Arc::new(AtomicU64::new(0)),
1679 stats: ManifestStats::default(),
1680 }
1681 }
1682
1683 fn empty_edit() -> RegionEdit {
1684 RegionEdit {
1685 files_to_add: Vec::new(),
1686 files_to_remove: Vec::new(),
1687 timestamp_ms: None,
1688 compaction_time_window: None,
1689 flushed_entry_id: None,
1690 flushed_sequence: None,
1691 committed_sequence: None,
1692 }
1693 }
1694
1695 #[tokio::test]
1696 async fn test_compaction_update_manifest_allows_editing_state() {
1697 let env = SchedulerEnv::new().await;
1698 let region = build_test_region(&env).await;
1699 region.set_editing(RegionLeaderState::Writable).unwrap();
1700
1701 let file_id = FileId::random();
1702 let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(RegionEdit {
1703 files_to_add: vec![crate::sst::file::FileMeta {
1704 region_id: region.region_id,
1705 file_id,
1706 level: 1,
1707 ..Default::default()
1708 }],
1709 files_to_remove: Vec::new(),
1710 timestamp_ms: None,
1711 compaction_time_window: None,
1712 flushed_entry_id: None,
1713 flushed_sequence: None,
1714 committed_sequence: None,
1715 }));
1716
1717 region
1718 .manifest_ctx
1719 .update_manifest_for_compaction(action_list)
1720 .await
1721 .unwrap();
1722
1723 assert!(
1724 region
1725 .manifest_ctx
1726 .manifest()
1727 .await
1728 .files
1729 .contains_key(&file_id)
1730 );
1731 }
1732
1733 #[tokio::test]
1734 async fn test_exit_staging_partition_expr_change_and_edit_success() {
1735 let env = SchedulerEnv::new().await;
1736 let region = build_test_region(&env).await;
1737
1738 let mut manager = region.manifest_ctx.manifest_manager.write().await;
1739 region.set_staging(&mut manager).await.unwrap();
1740 manager
1741 .update(
1742 RegionMetaActionList::new(vec![
1743 RegionMetaAction::PartitionExprChange(RegionPartitionExprChange {
1744 partition_expr: Some("expr_a".to_string()),
1745 }),
1746 RegionMetaAction::Edit(empty_edit()),
1747 ]),
1748 true,
1749 )
1750 .await
1751 .unwrap();
1752
1753 region.exit_staging_on_success(&mut manager).await.unwrap();
1754 drop(manager);
1755
1756 assert_eq!(
1757 region.version().metadata.partition_expr.as_deref(),
1758 Some("expr_a")
1759 );
1760 assert_eq!(
1761 region.state(),
1762 RegionRoleState::Leader(RegionLeaderState::Writable)
1763 );
1764 }
1765
1766 #[tokio::test]
1767 async fn test_exit_staging_change_with_same_columns_success() {
1768 let env = SchedulerEnv::new().await;
1769 let region = build_test_region(&env).await;
1770
1771 let mut manager = region.manifest_ctx.manifest_manager.write().await;
1772 region.set_staging(&mut manager).await.unwrap();
1773
1774 let mut changed_metadata = region.version().metadata.as_ref().clone();
1775 changed_metadata.set_partition_expr(Some("expr_b".to_string()));
1776
1777 manager
1778 .update(
1779 RegionMetaActionList::new(vec![
1780 RegionMetaAction::Change(RegionChange {
1781 metadata: Arc::new(changed_metadata),
1782 sst_format: FormatType::PrimaryKey,
1783 append_mode: None,
1784 }),
1785 RegionMetaAction::Edit(empty_edit()),
1786 ]),
1787 true,
1788 )
1789 .await
1790 .unwrap();
1791
1792 region.exit_staging_on_success(&mut manager).await.unwrap();
1793 drop(manager);
1794
1795 assert_eq!(
1796 region.version().metadata.partition_expr.as_deref(),
1797 Some("expr_b")
1798 );
1799 assert_eq!(
1800 region.state(),
1801 RegionRoleState::Leader(RegionLeaderState::Writable)
1802 );
1803 }
1804
1805 #[tokio::test]
1806 async fn test_exit_staging_change_with_different_columns_fails() {
1807 let env = SchedulerEnv::new().await;
1808 let region = build_test_region(&env).await;
1809
1810 let mut manager = region.manifest_ctx.manifest_manager.write().await;
1811 region.set_staging(&mut manager).await.unwrap();
1812
1813 let mut changed_metadata = region.version().metadata.as_ref().clone();
1814 changed_metadata.column_metadatas.rotate_left(1);
1815
1816 manager
1817 .update(
1818 RegionMetaActionList::new(vec![
1819 RegionMetaAction::Change(RegionChange {
1820 metadata: Arc::new(changed_metadata),
1821 sst_format: FormatType::PrimaryKey,
1822 append_mode: None,
1823 }),
1824 RegionMetaAction::Edit(empty_edit()),
1825 ]),
1826 true,
1827 )
1828 .await
1829 .unwrap();
1830
1831 let result = region.exit_staging_on_success(&mut manager).await;
1832 assert!(matches!(result, Err(Error::Unexpected { .. })));
1833 }
1834
1835 #[tokio::test]
1836 async fn test_exit_staging_partition_expr_change_and_change_conflict_fails() {
1837 let env = SchedulerEnv::new().await;
1838 let region = build_test_region(&env).await;
1839
1840 let mut manager = region.manifest_ctx.manifest_manager.write().await;
1841 region.set_staging(&mut manager).await.unwrap();
1842
1843 let mut changed_metadata = region.version().metadata.as_ref().clone();
1844 changed_metadata.set_partition_expr(Some("expr_c".to_string()));
1845
1846 manager
1847 .update(
1848 RegionMetaActionList::new(vec![
1849 RegionMetaAction::PartitionExprChange(RegionPartitionExprChange {
1850 partition_expr: Some("expr_c".to_string()),
1851 }),
1852 RegionMetaAction::Change(RegionChange {
1853 metadata: Arc::new(changed_metadata),
1854 sst_format: FormatType::PrimaryKey,
1855 append_mode: None,
1856 }),
1857 RegionMetaAction::Edit(empty_edit()),
1858 ]),
1859 true,
1860 )
1861 .await
1862 .unwrap();
1863
1864 let result = region.exit_staging_on_success(&mut manager).await;
1865 assert!(matches!(result, Err(Error::Unexpected { .. })));
1866 }
1867
1868 #[tokio::test]
1869 async fn test_set_region_state() {
1870 let env = SchedulerEnv::new().await;
1871 let builder = VersionControlBuilder::new();
1872 let version_control = Arc::new(builder.build());
1873 let manifest_ctx = env
1874 .mock_manifest_context(version_control.current().version.metadata.clone())
1875 .await;
1876
1877 let region_id = RegionId::new(1024, 0);
1878 manifest_ctx.set_role(RegionRole::Follower, region_id);
1880 assert_eq!(manifest_ctx.state.load(), RegionRoleState::Follower);
1881
1882 manifest_ctx.set_role(RegionRole::Leader, region_id);
1884 assert_eq!(
1885 manifest_ctx.state.load(),
1886 RegionRoleState::Leader(RegionLeaderState::Writable)
1887 );
1888
1889 manifest_ctx.set_role(RegionRole::StagingLeader, region_id);
1891 assert_eq!(
1892 manifest_ctx.state.load(),
1893 RegionRoleState::Leader(RegionLeaderState::Writable)
1894 );
1895
1896 manifest_ctx.set_role(RegionRole::DowngradingLeader, region_id);
1898 assert_eq!(
1899 manifest_ctx.state.load(),
1900 RegionRoleState::Leader(RegionLeaderState::Downgrading)
1901 );
1902
1903 manifest_ctx.set_role(RegionRole::Follower, region_id);
1905 assert_eq!(manifest_ctx.state.load(), RegionRoleState::Follower);
1906
1907 manifest_ctx.set_role(RegionRole::DowngradingLeader, region_id);
1909 assert_eq!(manifest_ctx.state.load(), RegionRoleState::Follower);
1910
1911 manifest_ctx.set_role(RegionRole::Leader, region_id);
1913 manifest_ctx.set_role(RegionRole::DowngradingLeader, region_id);
1914 assert_eq!(
1915 manifest_ctx.state.load(),
1916 RegionRoleState::Leader(RegionLeaderState::Downgrading)
1917 );
1918
1919 manifest_ctx.set_role(RegionRole::Leader, region_id);
1921 assert_eq!(
1922 manifest_ctx.state.load(),
1923 RegionRoleState::Leader(RegionLeaderState::Writable)
1924 );
1925 }
1926
1927 #[tokio::test]
1928 async fn test_staging_state_validation() {
1929 let env = SchedulerEnv::new().await;
1930 let builder = VersionControlBuilder::new();
1931 let version_control = Arc::new(builder.build());
1932
1933 let staging_ctx = {
1935 let manager = RegionManifestManager::new(
1936 version_control.current().version.metadata.clone(),
1937 0,
1938 RegionManifestOptions {
1939 manifest_dir: "".to_string(),
1940 object_store: env.access_layer.object_store().clone(),
1941 compress_type: CompressionType::Uncompressed,
1942 checkpoint_distance: 10,
1943 remove_file_options: Default::default(),
1944 manifest_cache: None,
1945 },
1946 FormatType::PrimaryKey,
1947 &Default::default(),
1948 )
1949 .await
1950 .unwrap();
1951 Arc::new(ManifestContext::new(
1952 manager,
1953 RegionRoleState::Leader(RegionLeaderState::Staging),
1954 ))
1955 };
1956
1957 assert_eq!(
1959 staging_ctx.current_state(),
1960 RegionRoleState::Leader(RegionLeaderState::Staging)
1961 );
1962
1963 let writable_ctx = env
1965 .mock_manifest_context(version_control.current().version.metadata.clone())
1966 .await;
1967
1968 assert_eq!(
1969 writable_ctx.current_state(),
1970 RegionRoleState::Leader(RegionLeaderState::Writable)
1971 );
1972 }
1973
1974 #[tokio::test]
1975 async fn test_staging_state_transitions() {
1976 let builder = VersionControlBuilder::new();
1977 let version_control = Arc::new(builder.build());
1978 let metadata = version_control.current().version.metadata.clone();
1979
1980 let temp_dir = create_temp_dir("");
1982 let path_str = temp_dir.path().display().to_string();
1983 let fs_builder = Fs::default().root(&path_str);
1984 let object_store = ObjectStore::new(fs_builder).unwrap().finish();
1985
1986 let index_aux_path = temp_dir.path().join("index_aux");
1987 let puffin_mgr = PuffinManagerFactory::new(&index_aux_path, 4096, None, None)
1988 .await
1989 .unwrap();
1990 let intm_mgr = IntermediateManager::init_fs(index_aux_path.to_str().unwrap())
1991 .await
1992 .unwrap();
1993
1994 let access_layer = Arc::new(AccessLayer::new(
1995 "",
1996 PathType::Bare,
1997 object_store,
1998 puffin_mgr,
1999 intm_mgr,
2000 ));
2001
2002 let manager = RegionManifestManager::new(
2003 metadata.clone(),
2004 0,
2005 RegionManifestOptions {
2006 manifest_dir: "".to_string(),
2007 object_store: access_layer.object_store().clone(),
2008 compress_type: CompressionType::Uncompressed,
2009 checkpoint_distance: 10,
2010 remove_file_options: Default::default(),
2011 manifest_cache: None,
2012 },
2013 FormatType::PrimaryKey,
2014 &Default::default(),
2015 )
2016 .await
2017 .unwrap();
2018
2019 let manifest_ctx = Arc::new(ManifestContext::new(
2020 manager,
2021 RegionRoleState::Leader(RegionLeaderState::Writable),
2022 ));
2023
2024 let region = MitoRegion {
2025 region_id: metadata.region_id,
2026 version_control,
2027 access_layer,
2028 manifest_ctx: manifest_ctx.clone(),
2029 file_purger: crate::test_util::new_noop_file_purger(),
2030 provider: Provider::noop_provider(),
2031 last_flush_millis: Default::default(),
2032 last_compaction_millis: Default::default(),
2033 time_provider: Arc::new(StdTimeProvider),
2034 topic_latest_entry_id: Default::default(),
2035 written_bytes: Arc::new(AtomicU64::new(0)),
2036 stats: ManifestStats::default(),
2037 };
2038
2039 assert_eq!(
2041 region.state(),
2042 RegionRoleState::Leader(RegionLeaderState::Writable)
2043 );
2044 assert!(!region.is_staging());
2045
2046 let mut manager = manifest_ctx.manifest_manager.write().await;
2048 region.set_staging(&mut manager).await.unwrap();
2049 drop(manager);
2050 assert_eq!(
2051 region.state(),
2052 RegionRoleState::Leader(RegionLeaderState::Staging)
2053 );
2054 assert!(region.is_staging());
2055
2056 region.exit_staging().unwrap();
2058 assert_eq!(
2059 region.state(),
2060 RegionRoleState::Leader(RegionLeaderState::Writable)
2061 );
2062 assert!(!region.is_staging());
2063
2064 {
2066 let manager = manifest_ctx.manifest_manager.write().await;
2068 let dummy_actions = RegionMetaActionList::new(vec![]);
2069 let dummy_bytes = dummy_actions.encode().unwrap();
2070
2071 manager.store().save(100, &dummy_bytes, true).await.unwrap();
2073 manager.store().save(101, &dummy_bytes, true).await.unwrap();
2074 drop(manager);
2075
2076 let manager = manifest_ctx.manifest_manager.read().await;
2078 let dirty_manifests = manager.store().fetch_staging_manifests().await.unwrap();
2079 assert_eq!(
2080 dirty_manifests.len(),
2081 2,
2082 "Should have 2 dirty staging files"
2083 );
2084 drop(manager);
2085
2086 let mut manager = manifest_ctx.manifest_manager.write().await;
2088 region.set_staging(&mut manager).await.unwrap();
2089 drop(manager);
2090
2091 let manager = manifest_ctx.manifest_manager.read().await;
2093 let cleaned_manifests = manager.store().fetch_staging_manifests().await.unwrap();
2094 assert_eq!(
2095 cleaned_manifests.len(),
2096 0,
2097 "Dirty staging files should be cleaned up"
2098 );
2099 drop(manager);
2100
2101 region.exit_staging().unwrap();
2103 }
2104
2105 let mut manager = manifest_ctx.manifest_manager.write().await;
2107 assert!(region.set_staging(&mut manager).await.is_ok()); drop(manager);
2109 let mut manager = manifest_ctx.manifest_manager.write().await;
2110 assert!(region.set_staging(&mut manager).await.is_err()); drop(manager);
2112 assert!(region.exit_staging().is_ok()); assert!(region.exit_staging().is_err()); }
2115}