1pub mod catchup;
18pub mod opener;
19pub mod options;
20pub mod utils;
21pub(crate) mod version;
22
23use std::collections::hash_map::Entry;
24use std::collections::{HashMap, HashSet};
25use std::sync::atomic::{AtomicI64, AtomicU64, Ordering};
26use std::sync::{Arc, Mutex, RwLock};
27
28use common_base::hash::partition_expr_version;
29use common_telemetry::{error, info, warn};
30use crossbeam_utils::atomic::AtomicCell;
31use partition::expr::PartitionExpr;
32use snafu::{OptionExt, ResultExt, ensure};
33use store_api::ManifestVersion;
34use store_api::codec::PrimaryKeyEncoding;
35use store_api::logstore::provider::Provider;
36use store_api::metadata::RegionMetadataRef;
37use store_api::region_engine::{
38 RegionManifestInfo, RegionRole, RegionStatistic, SettableRegionRoleState,
39};
40use store_api::region_info::RegionInfoEntry;
41use store_api::region_request::{PathType, StagingPartitionDirective};
42use store_api::sst_entry::ManifestSstEntry;
43use store_api::storage::{FileId, RegionId, SequenceNumber};
44use tokio::sync::RwLockWriteGuard;
45pub use utils::*;
46
47use crate::access_layer::AccessLayerRef;
48use crate::error::{
49 FlushableRegionStateSnafu, InvalidPartitionExprSnafu, RegionNotFoundSnafu, RegionStateSnafu,
50 RegionTruncatedSnafu, Result, UnexpectedSnafu, UpdateManifestSnafu,
51};
52use crate::manifest::action::{
53 RegionChange, RegionManifest, RegionMetaAction, RegionMetaActionList,
54};
55use crate::manifest::manager::RegionManifestManager;
56use crate::region::version::{VersionControlRef, VersionRef};
57use crate::request::{OnFailure, OptionOutputTx};
58use crate::sst::file::FileMeta;
59use crate::sst::file_purger::FilePurgerRef;
60use crate::sst::location::{index_file_path, sst_file_path};
61use crate::time_provider::TimeProviderRef;
62
63const ESTIMATED_WAL_FACTOR: f32 = 0.42825;
65
66#[derive(Debug)]
68pub struct RegionUsage {
69 pub region_id: RegionId,
70 pub wal_usage: u64,
71 pub sst_usage: u64,
72 pub manifest_usage: u64,
73}
74
75impl RegionUsage {
76 pub fn disk_usage(&self) -> u64 {
77 self.wal_usage + self.sst_usage + self.manifest_usage
78 }
79}
80
81#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82pub enum RegionLeaderState {
83 Writable,
85 Staging,
87 EnteringStaging,
89 Altering,
91 Dropping,
93 Truncating,
95 Editing,
97 Downgrading,
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
102pub enum RegionRoleState {
103 Leader(RegionLeaderState),
104 Follower,
105}
106
107impl RegionRoleState {
108 pub fn into_leader_state(self) -> Option<RegionLeaderState> {
110 match self {
111 RegionRoleState::Leader(leader_state) => Some(leader_state),
112 RegionRoleState::Follower => None,
113 }
114 }
115
116 pub(crate) fn as_str(&self) -> &'static str {
117 match self {
118 RegionRoleState::Follower => "Follower",
119 RegionRoleState::Leader(RegionLeaderState::Writable) => "Leader(Writable)",
120 RegionRoleState::Leader(RegionLeaderState::Staging) => "Leader(Staging)",
121 RegionRoleState::Leader(RegionLeaderState::EnteringStaging) => {
122 "Leader(EnteringStaging)"
123 }
124 RegionRoleState::Leader(RegionLeaderState::Altering) => "Leader(Altering)",
125 RegionRoleState::Leader(RegionLeaderState::Dropping) => "Leader(Dropping)",
126 RegionRoleState::Leader(RegionLeaderState::Truncating) => "Leader(Truncating)",
127 RegionRoleState::Leader(RegionLeaderState::Editing) => "Leader(Editing)",
128 RegionRoleState::Leader(RegionLeaderState::Downgrading) => "Leader(Downgrading)",
129 }
130 }
131}
132
133#[derive(Debug)]
139pub struct MitoRegion {
140 pub(crate) region_id: RegionId,
145
146 pub(crate) version_control: VersionControlRef,
150 pub(crate) access_layer: AccessLayerRef,
152 pub(crate) manifest_ctx: ManifestContextRef,
154 pub(crate) file_purger: FilePurgerRef,
156 pub(crate) provider: Provider,
158 last_flush_millis: AtomicI64,
160 last_schedule_compaction_millis: AtomicI64,
162 time_provider: TimeProviderRef,
164 pub(crate) topic_latest_entry_id: AtomicU64,
174 pub(crate) written_bytes: Arc<AtomicU64>,
176 stats: ManifestStats,
178}
179
180pub type MitoRegionRef = Arc<MitoRegion>;
181
182#[derive(Debug, Clone)]
183pub(crate) struct StagingPartitionInfo {
184 pub(crate) partition_directive: StagingPartitionDirective,
185 pub(crate) partition_rule_version: u64,
186}
187
188impl StagingPartitionInfo {
189 pub(crate) fn partition_expr(&self) -> Option<&str> {
191 self.partition_directive.partition_expr()
192 }
193
194 pub(crate) fn from_partition_directive(partition_directive: StagingPartitionDirective) -> Self {
196 let partition_rule_version = match &partition_directive {
197 StagingPartitionDirective::UpdatePartitionExpr(expr) => {
198 partition_expr_version(Some(expr))
199 }
200 StagingPartitionDirective::RejectAllWrites => 0,
201 };
202 Self {
203 partition_directive,
204 partition_rule_version,
205 }
206 }
207}
208
209impl MitoRegion {
210 pub(crate) async fn stop(&self) {
212 self.manifest_ctx
213 .manifest_manager
214 .write()
215 .await
216 .stop()
217 .await;
218
219 info!(
220 "Stopped region manifest manager, region_id: {}",
221 self.region_id
222 );
223 }
224
225 pub fn metadata(&self) -> RegionMetadataRef {
227 let version_data = self.version_control.current();
228 version_data.version.metadata.clone()
229 }
230
231 pub(crate) fn primary_key_encoding(&self) -> PrimaryKeyEncoding {
233 let version_data = self.version_control.current();
234 version_data.version.metadata.primary_key_encoding
235 }
236
237 pub(crate) fn version(&self) -> VersionRef {
239 let version_data = self.version_control.current();
240 version_data.version
241 }
242
243 pub(crate) fn last_flush_millis(&self) -> i64 {
245 self.last_flush_millis.load(Ordering::Relaxed)
246 }
247
248 pub(crate) fn update_flush_millis(&self) {
250 let now = self.time_provider.current_time_millis();
251 self.last_flush_millis.store(now, Ordering::Relaxed);
252 }
253
254 pub(crate) fn last_schedule_compaction_millis(&self) -> i64 {
256 self.last_schedule_compaction_millis.load(Ordering::Relaxed)
257 }
258
259 pub(crate) fn update_schedule_compaction_millis(&self) {
261 let now = self.time_provider.current_time_millis();
262 self.last_schedule_compaction_millis
263 .store(now, Ordering::Relaxed);
264 }
265
266 pub(crate) fn table_dir(&self) -> &str {
268 self.access_layer.table_dir()
269 }
270
271 pub(crate) fn path_type(&self) -> PathType {
273 self.access_layer.path_type()
274 }
275
276 pub(crate) fn is_writable(&self) -> bool {
278 matches!(
279 self.manifest_ctx.state.load(),
280 RegionRoleState::Leader(RegionLeaderState::Writable)
281 | RegionRoleState::Leader(RegionLeaderState::Staging)
282 )
283 }
284
285 pub(crate) fn is_flushable(&self) -> bool {
287 matches!(
288 self.manifest_ctx.state.load(),
289 RegionRoleState::Leader(RegionLeaderState::Writable)
290 | RegionRoleState::Leader(RegionLeaderState::Staging)
291 | RegionRoleState::Leader(RegionLeaderState::Downgrading)
292 )
293 }
294
295 pub(crate) fn should_abort_index(&self) -> bool {
297 matches!(
298 self.manifest_ctx.state.load(),
299 RegionRoleState::Follower
300 | RegionRoleState::Leader(RegionLeaderState::Dropping)
301 | RegionRoleState::Leader(RegionLeaderState::Truncating)
302 | RegionRoleState::Leader(RegionLeaderState::Downgrading)
303 | RegionRoleState::Leader(RegionLeaderState::Staging)
304 )
305 }
306
307 pub(crate) fn is_downgrading(&self) -> bool {
309 matches!(
310 self.manifest_ctx.state.load(),
311 RegionRoleState::Leader(RegionLeaderState::Downgrading)
312 )
313 }
314
315 pub(crate) fn is_staging(&self) -> bool {
317 self.manifest_ctx.state.load() == RegionRoleState::Leader(RegionLeaderState::Staging)
318 }
319
320 pub(crate) fn is_enter_staging(&self) -> bool {
322 self.manifest_ctx.state.load()
323 == RegionRoleState::Leader(RegionLeaderState::EnteringStaging)
324 }
325
326 pub fn region_id(&self) -> RegionId {
327 self.region_id
328 }
329
330 pub fn find_committed_sequence(&self) -> SequenceNumber {
331 self.version_control.committed_sequence()
332 }
333
334 pub fn flushed_sequence(&self) -> SequenceNumber {
340 self.version_control.current().version.flushed_sequence
341 }
342
343 pub fn is_follower(&self) -> bool {
345 self.manifest_ctx.state.load() == RegionRoleState::Follower
346 }
347
348 pub(crate) fn state(&self) -> RegionRoleState {
350 self.manifest_ctx.state.load()
351 }
352
353 pub(crate) fn set_role(&self, next_role: RegionRole) {
355 self.manifest_ctx.set_role(next_role, self.region_id);
356 }
357
358 pub(crate) fn region_role(&self) -> RegionRole {
359 match self.state() {
360 RegionRoleState::Follower => RegionRole::Follower,
361 RegionRoleState::Leader(RegionLeaderState::Staging) => RegionRole::StagingLeader,
362 RegionRoleState::Leader(RegionLeaderState::Downgrading) => {
363 RegionRole::DowngradingLeader
364 }
365 RegionRoleState::Leader(_) => RegionRole::Leader,
366 }
367 }
368
369 pub(crate) fn set_altering(&self) -> Result<()> {
372 self.compare_exchange_state(
373 RegionLeaderState::Writable,
374 RegionRoleState::Leader(RegionLeaderState::Altering),
375 )
376 }
377
378 pub(crate) fn set_dropping(&self, expect: RegionLeaderState) -> Result<()> {
381 self.compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Dropping))
382 }
383
384 pub(crate) fn set_truncating(&self) -> Result<()> {
387 self.compare_exchange_state(
388 RegionLeaderState::Writable,
389 RegionRoleState::Leader(RegionLeaderState::Truncating),
390 )
391 }
392
393 pub(crate) fn set_editing(&self, expect: RegionLeaderState) -> Result<()> {
396 self.compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Editing))
397 }
398
399 pub(crate) async fn set_staging(
405 &self,
406 manager: &mut RwLockWriteGuard<'_, RegionManifestManager>,
407 ) -> Result<()> {
408 manager.store().clear_staging_manifests().await?;
409
410 self.compare_exchange_state(
411 RegionLeaderState::Writable,
412 RegionRoleState::Leader(RegionLeaderState::Staging),
413 )
414 }
415
416 pub(crate) fn set_entering_staging(&self) -> Result<()> {
418 self.compare_exchange_state(
419 RegionLeaderState::Writable,
420 RegionRoleState::Leader(RegionLeaderState::EnteringStaging),
421 )
422 }
423
424 pub fn exit_staging(&self) -> Result<()> {
429 self.manifest_ctx.exit_staging(
430 self.region_id,
431 RegionRoleState::Leader(RegionLeaderState::Writable),
432 )
433 }
434
435 pub(crate) async fn set_role_state_gracefully(
437 &self,
438 state: SettableRegionRoleState,
439 ) -> Result<()> {
440 let mut manager: RwLockWriteGuard<'_, RegionManifestManager> =
441 self.manifest_ctx.manifest_manager.write().await;
442 let current_state = self.state();
443
444 match state {
445 SettableRegionRoleState::Leader => {
446 match current_state {
449 RegionRoleState::Leader(RegionLeaderState::Staging) => {
450 info!("Exiting staging mode for region {}", self.region_id);
451 self.exit_staging_on_success(&mut manager).await?;
453 }
454 RegionRoleState::Leader(RegionLeaderState::Writable) => {
455 info!("Region {} already in normal leader mode", self.region_id);
457 }
458 _ => {
459 return Err(RegionStateSnafu {
461 region_id: self.region_id,
462 state: current_state,
463 expect: RegionRoleState::Leader(RegionLeaderState::Staging),
464 }
465 .build());
466 }
467 }
468 }
469
470 SettableRegionRoleState::StagingLeader => {
471 match current_state {
474 RegionRoleState::Leader(RegionLeaderState::Writable) => {
475 info!("Entering staging mode for region {}", self.region_id);
476 self.set_staging(&mut manager).await?;
477 }
478 RegionRoleState::Leader(RegionLeaderState::Staging) => {
479 info!("Region {} already in staging mode", self.region_id);
481 }
482 _ => {
483 return Err(RegionStateSnafu {
484 region_id: self.region_id,
485 state: current_state,
486 expect: RegionRoleState::Leader(RegionLeaderState::Writable),
487 }
488 .build());
489 }
490 }
491 }
492
493 SettableRegionRoleState::Follower => {
494 match current_state {
496 RegionRoleState::Leader(RegionLeaderState::Staging) => {
497 info!(
498 "Exiting staging and demoting region {} to follower",
499 self.region_id
500 );
501 self.exit_staging()?;
502 self.set_role(RegionRole::Follower);
503 }
504 RegionRoleState::Leader(_) => {
505 info!("Demoting region {} from leader to follower", self.region_id);
506 self.set_role(RegionRole::Follower);
507 }
508 RegionRoleState::Follower => {
509 info!("Region {} already in follower mode", self.region_id);
511 }
512 }
513 }
514
515 SettableRegionRoleState::DowngradingLeader => {
516 match current_state {
518 RegionRoleState::Leader(RegionLeaderState::Staging) => {
519 info!(
520 "Exiting staging and entering downgrade for region {}",
521 self.region_id
522 );
523 self.exit_staging()?;
524 self.set_role(RegionRole::DowngradingLeader);
525 }
526 RegionRoleState::Leader(RegionLeaderState::Writable) => {
527 info!("Starting downgrade for region {}", self.region_id);
528 self.set_role(RegionRole::DowngradingLeader);
529 }
530 RegionRoleState::Leader(RegionLeaderState::Downgrading) => {
531 info!("Region {} already in downgrading mode", self.region_id);
533 }
534 _ => {
535 warn!(
536 "Cannot start downgrade for region {} from state {:?}",
537 self.region_id, current_state
538 );
539 }
540 }
541 }
542 }
543
544 if self.state() == RegionRoleState::Leader(RegionLeaderState::Writable) {
546 let manifest_meta = &manager.manifest().metadata;
548 let current_version = self.version();
549 let current_meta = ¤t_version.metadata;
550 if manifest_meta.partition_expr.is_none() && current_meta.partition_expr.is_some() {
551 let action = RegionMetaAction::Change(RegionChange {
552 metadata: current_meta.clone(),
553 sst_format: current_version.options.sst_format.unwrap_or_default(),
554 append_mode: None,
555 });
556 let result = manager
557 .update(RegionMetaActionList::with_action(action), false)
558 .await;
559
560 match result {
561 Ok(version) => {
562 info!(
563 "Successfully persisted backfilled metadata for region {}, version: {}",
564 self.region_id, version
565 );
566 }
567 Err(e) => {
568 warn!(e; "Failed to persist backfilled metadata for region {}", self.region_id);
569 }
570 }
571 }
572 }
573
574 drop(manager);
575
576 Ok(())
577 }
578
579 pub(crate) fn switch_state_to_writable(&self, expect: RegionLeaderState) {
582 if let Err(e) = self
583 .compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Writable))
584 {
585 error!(e; "failed to switch region state to writable, expect state is {:?}", expect);
586 }
587 }
588
589 pub(crate) fn switch_state_to_staging(&self, expect: RegionLeaderState) {
592 if let Err(e) =
593 self.compare_exchange_state(expect, RegionRoleState::Leader(RegionLeaderState::Staging))
594 {
595 error!(e; "failed to switch region state to staging, expect state is {:?}", expect);
596 }
597 }
598
599 pub(crate) fn region_statistic(&self) -> RegionStatistic {
601 let version = self.version();
602 let memtables = &version.memtables;
603 let memtable_usage = (memtables.mutable_usage() + memtables.immutables_usage()) as u64;
604
605 let sst_usage = version.ssts.owned_sst_usage(self.region_id);
606 let index_usage = version.ssts.owned_index_usage(self.region_id);
607 let flushed_entry_id = version.flushed_entry_id;
608
609 let wal_usage = self.estimated_wal_usage(memtable_usage);
610 let manifest_usage = self.stats.total_manifest_size();
611 let num_rows = version.ssts.owned_num_rows(self.region_id) + version.memtables.num_rows();
612 let num_files = version.ssts.owned_num_files(self.region_id);
613 let manifest_version = self.stats.manifest_version();
614 let file_removed_cnt = self.stats.file_removed_cnt();
615
616 let topic_latest_entry_id = self.topic_latest_entry_id.load(Ordering::Relaxed);
617 let written_bytes = self.written_bytes.load(Ordering::Relaxed);
618
619 RegionStatistic {
620 num_rows,
621 memtable_size: memtable_usage,
622 wal_size: wal_usage,
623 manifest_size: manifest_usage,
624 sst_size: sst_usage,
625 sst_num: num_files,
626 index_size: index_usage,
627 manifest: RegionManifestInfo::Mito {
628 manifest_version,
629 flushed_entry_id,
630 file_removed_cnt,
631 },
632 data_topic_latest_entry_id: topic_latest_entry_id,
633 metadata_topic_latest_entry_id: topic_latest_entry_id,
634 written_bytes,
635 }
636 }
637
638 fn estimated_wal_usage(&self, memtable_usage: u64) -> u64 {
641 ((memtable_usage as f32) * ESTIMATED_WAL_FACTOR) as u64
642 }
643
644 fn compare_exchange_state(
647 &self,
648 expect: RegionLeaderState,
649 state: RegionRoleState,
650 ) -> Result<()> {
651 self.manifest_ctx
652 .state
653 .compare_exchange(RegionRoleState::Leader(expect), state)
654 .map_err(|actual| {
655 RegionStateSnafu {
656 region_id: self.region_id,
657 state: actual,
658 expect: RegionRoleState::Leader(expect),
659 }
660 .build()
661 })?;
662 Ok(())
663 }
664
665 pub fn access_layer(&self) -> AccessLayerRef {
666 self.access_layer.clone()
667 }
668
669 pub(crate) fn region_info_entry(&self, node_id: Option<u64>) -> RegionInfoEntry {
671 let region_id = self.region_id;
672 let version = self.version();
673 let state = self.state();
674 let role = self.region_role();
675 let region_options = serde_json::to_string(&version.options)
676 .unwrap_or_else(|err| serde_json::json!({ "error": err.to_string() }).to_string());
677 let sst_format = match version.options.sst_format.unwrap_or_default() {
678 crate::sst::FormatType::PrimaryKey => "primary_key",
679 crate::sst::FormatType::Flat => "flat",
680 }
681 .to_string();
682
683 RegionInfoEntry {
684 region_id,
685 table_id: region_id.table_id(),
686 region_number: region_id.region_number(),
687 region_group: region_id.region_group(),
688 region_sequence: region_id.region_sequence(),
689 state: state.as_str().to_string(),
690 role: role.to_string(),
691 writable: self.is_writable(),
692 committed_sequence: self.find_committed_sequence(),
693 flushed_sequence: Some(self.flushed_sequence()).filter(|sequence| *sequence > 0),
694 manifest_version: self.stats.manifest_version(),
695 compaction_time_window: version
696 .compaction_time_window
697 .map(|duration| humantime::format_duration(duration).to_string()),
698 region_options,
699 sst_format,
700 node_id,
701 }
702 }
703
704 pub async fn manifest_sst_entries(&self) -> Vec<ManifestSstEntry> {
706 let table_dir = self.table_dir();
707 let path_type = self.access_layer.path_type();
708
709 let visible_ssts = self
710 .version()
711 .ssts
712 .levels()
713 .iter()
714 .flat_map(|level| level.files().map(|file| file.file_id().file_id()))
715 .collect::<HashSet<_>>();
716
717 let manifest_files = self.manifest_ctx.manifest().await.files.clone();
718 let staging_files = self
719 .manifest_ctx
720 .staging_manifest()
721 .await
722 .map(|m| m.files.clone())
723 .unwrap_or_default();
724 let files = manifest_files
725 .into_iter()
726 .chain(staging_files)
727 .collect::<HashMap<_, _>>();
728
729 files
730 .values()
731 .map(|meta| {
732 let region_id = self.region_id;
733 let origin_region_id = meta.region_id;
734 let (index_version, index_file_path, index_file_size) = if meta.index_file_size > 0
735 {
736 let index_file_path = index_file_path(table_dir, meta.index_id(), path_type);
737 (
738 meta.index_version,
739 Some(index_file_path),
740 Some(meta.index_file_size),
741 )
742 } else {
743 (0, None, None)
744 };
745 let visible = visible_ssts.contains(&meta.file_id);
746 ManifestSstEntry {
747 table_dir: table_dir.to_string(),
748 region_id,
749 table_id: region_id.table_id(),
750 region_number: region_id.region_number(),
751 region_group: region_id.region_group(),
752 region_sequence: region_id.region_sequence(),
753 file_id: meta.file_id.to_string(),
754 index_version,
755 level: meta.level,
756 file_path: sst_file_path(table_dir, meta.file_id(), path_type),
757 file_size: meta.file_size,
758 index_file_path,
759 index_file_size,
760 num_rows: meta.num_rows,
761 num_row_groups: meta.num_row_groups,
762 num_series: Some(meta.num_series),
763 min_ts: meta.time_range.0,
764 max_ts: meta.time_range.1,
765 sequence: meta.sequence.map(|s| s.get()),
766 origin_region_id,
767 node_id: None,
768 visible,
769 primary_key_min: meta.primary_key_min.clone(),
770 primary_key_max: meta.primary_key_max.clone(),
771 }
772 })
773 .collect()
774 }
775
776 pub async fn file_metas(&self, file_ids: &[FileId]) -> Vec<Option<FileMeta>> {
778 let manifest_files = self.manifest_ctx.manifest().await.files.clone();
779
780 file_ids
781 .iter()
782 .map(|file_id| manifest_files.get(file_id).cloned())
783 .collect::<Vec<_>>()
784 }
785
786 pub(crate) async fn exit_staging_on_success(
788 &self,
789 manager: &mut RwLockWriteGuard<'_, RegionManifestManager>,
790 ) -> Result<()> {
791 let current_state = self.manifest_ctx.current_state();
792 ensure!(
793 current_state == RegionRoleState::Leader(RegionLeaderState::Staging),
794 RegionStateSnafu {
795 region_id: self.region_id,
796 state: current_state,
797 expect: RegionRoleState::Leader(RegionLeaderState::Staging),
798 }
799 );
800
801 let merged_actions = match manager.merge_staged_actions(current_state).await? {
803 Some(actions) => actions,
804 None => {
805 info!(
806 "No staged manifests to merge for region {}, exiting staging mode without changes",
807 self.region_id
808 );
809 self.exit_staging()?;
811 return Ok(());
812 }
813 };
814 let expect_change = merged_actions.actions.iter().any(|a| a.is_change());
815 let expect_partition_expr_change = merged_actions
816 .actions
817 .iter()
818 .any(|a| a.is_partition_expr_change());
819 let expect_edit = merged_actions.actions.iter().any(|a| a.is_edit());
820 ensure!(
821 !(expect_change && expect_partition_expr_change),
822 UnexpectedSnafu {
823 reason: "unexpected both change and partition expr change actions in merged actions"
824 }
825 );
826 ensure!(
827 expect_change || expect_partition_expr_change,
828 UnexpectedSnafu {
829 reason: "expect a change or partition expr change action in merged actions"
830 }
831 );
832 ensure!(
833 expect_edit,
834 UnexpectedSnafu {
835 reason: "expect an edit action in merged actions"
836 }
837 );
838
839 let (merged_partition_expr_change, merged_change, merged_edit) =
840 merged_actions.clone().split_region_change_and_edit();
841 if let Some(change) = &merged_change {
842 let current_column_metadatas = &self.version().metadata.column_metadatas;
846 ensure!(
847 change.metadata.column_metadatas == *current_column_metadatas,
848 UnexpectedSnafu {
849 reason: "change action alters column metadata in staging exit"
850 }
851 );
852 }
853
854 let new_version = manager.update(merged_actions, false).await?;
857 info!(
858 "Successfully submitted merged staged manifests for region {}, new version: {}",
859 self.region_id, new_version
860 );
861
862 if let Some(change) = merged_partition_expr_change {
864 let mut new_metadata = self.version().metadata.as_ref().clone();
865 new_metadata.set_partition_expr(change.partition_expr);
866 self.version_control.alter_metadata(new_metadata.into());
867 }
868 if let Some(change) = merged_change {
869 self.version_control.alter_metadata(change.metadata);
870 }
871 self.version_control
872 .apply_edit(Some(merged_edit), &[], self.file_purger.clone());
873
874 if let Err(e) = manager.clear_staging_manifest_and_dir().await {
876 error!(e; "Failed to clear staging manifest dir for region {}", self.region_id);
877 }
878 self.exit_staging()?;
879
880 Ok(())
881 }
882
883 pub fn maybe_staging_partition_expr_str(&self) -> Option<String> {
889 let is_staging = self.is_staging();
890 if is_staging {
891 let staging_partition_info = self.manifest_ctx.staging_partition_info();
892 if staging_partition_info.is_none() {
893 warn!(
894 "Staging partition expr is none for region {} in staging state",
895 self.region_id
896 );
897 }
898 staging_partition_info
899 .as_ref()
900 .and_then(|info| info.partition_expr().map(ToString::to_string))
901 } else {
902 let version = self.version();
903 version.metadata.partition_expr.clone()
904 }
905 }
906
907 pub fn expected_partition_expr_version(&self) -> u64 {
908 if self.is_staging() {
909 self.manifest_ctx
910 .staging_partition_info()
911 .as_ref()
912 .map(|info| info.partition_rule_version)
913 .unwrap_or_default()
914 } else {
915 self.version().metadata.partition_expr_version
916 }
917 }
918
919 pub(crate) fn reject_all_writes_in_staging(&self) -> bool {
921 if !self.is_staging() {
922 return false;
923 }
924 self.manifest_ctx
925 .staging_partition_info()
926 .as_ref()
927 .map(|info| {
928 matches!(
929 info.partition_directive,
930 StagingPartitionDirective::RejectAllWrites
931 )
932 })
933 .unwrap_or(false)
934 }
935}
936
937#[derive(Debug)]
939pub(crate) struct ManifestContext {
940 pub(crate) manifest_manager: tokio::sync::RwLock<RegionManifestManager>,
942 state: AtomicCell<RegionRoleState>,
945 staging_partition_info: Mutex<Option<StagingPartitionInfo>>,
950}
951
952impl ManifestContext {
953 pub(crate) fn new(manager: RegionManifestManager, state: RegionRoleState) -> Self {
954 ManifestContext {
955 manifest_manager: tokio::sync::RwLock::new(manager),
956 state: AtomicCell::new(state),
957 staging_partition_info: Mutex::new(None),
958 }
959 }
960
961 pub(crate) fn staging_partition_info(&self) -> Option<StagingPartitionInfo> {
962 self.staging_partition_info.lock().unwrap().clone()
963 }
964
965 pub(crate) fn set_staging_partition_info(&self, staging_partition_info: StagingPartitionInfo) {
966 let mut current = self.staging_partition_info.lock().unwrap();
967 debug_assert!(current.is_none());
968 *current = Some(staging_partition_info);
969 }
970
971 fn clear_staging_partition_info(&self) {
972 *self.staging_partition_info.lock().unwrap() = None;
973 }
974
975 pub(crate) fn exit_staging(
976 &self,
977 region_id: RegionId,
978 next_state: RegionRoleState,
979 ) -> Result<()> {
980 self.state
981 .compare_exchange(
982 RegionRoleState::Leader(RegionLeaderState::Staging),
983 next_state,
984 )
985 .map_err(|actual| {
986 RegionStateSnafu {
987 region_id,
988 state: actual,
989 expect: RegionRoleState::Leader(RegionLeaderState::Staging),
990 }
991 .build()
992 })?;
993 self.clear_staging_partition_info();
994 Ok(())
995 }
996
997 pub(crate) async fn manifest_version(&self) -> ManifestVersion {
998 self.manifest_manager
999 .read()
1000 .await
1001 .manifest()
1002 .manifest_version
1003 }
1004
1005 pub(crate) async fn has_update(&self) -> Result<bool> {
1006 self.manifest_manager.read().await.has_update().await
1007 }
1008
1009 pub(crate) fn current_state(&self) -> RegionRoleState {
1011 self.state.load()
1012 }
1013
1014 pub(crate) async fn install_manifest_to(
1020 &self,
1021 version: ManifestVersion,
1022 ) -> Result<Arc<RegionManifest>> {
1023 let mut manager = self.manifest_manager.write().await;
1024 manager.install_manifest_to(version).await?;
1025
1026 Ok(manager.manifest())
1027 }
1028
1029 pub(crate) async fn update_manifest(
1031 &self,
1032 expect_state: RegionLeaderState,
1033 action_list: RegionMetaActionList,
1034 is_staging: bool,
1035 ) -> Result<ManifestVersion> {
1036 self.update_manifest_with_state_check(action_list, is_staging, |current_state, region_id| {
1037 if expect_state != RegionLeaderState::Downgrading {
1042 if current_state == RegionRoleState::Leader(RegionLeaderState::Downgrading) {
1043 info!(
1044 "Region {} is in downgrading leader state, updating manifest. Expect state is {:?}",
1045 region_id, expect_state
1046 );
1047 }
1048 ensure!(
1049 current_state == RegionRoleState::Leader(expect_state)
1050 || current_state == RegionRoleState::Leader(RegionLeaderState::Downgrading),
1051 UpdateManifestSnafu {
1052 region_id,
1053 state: current_state,
1054 }
1055 );
1056 } else {
1057 ensure!(
1058 current_state == RegionRoleState::Leader(expect_state),
1059 RegionStateSnafu {
1060 region_id,
1061 state: current_state,
1062 expect: RegionRoleState::Leader(expect_state),
1063 }
1064 );
1065 }
1066
1067 Ok(())
1068 })
1069 .await
1070 }
1071
1072 pub(crate) async fn update_manifest_for_compaction(
1089 &self,
1090 action_list: RegionMetaActionList,
1091 ) -> Result<ManifestVersion> {
1092 self.update_manifest_with_state_check(action_list, false, |current_state, region_id| {
1093 ensure!(
1094 matches!(
1095 current_state,
1096 RegionRoleState::Leader(RegionLeaderState::Writable)
1097 | RegionRoleState::Leader(RegionLeaderState::Editing)
1098 | RegionRoleState::Leader(RegionLeaderState::Downgrading)
1099 ),
1100 UpdateManifestSnafu {
1101 region_id,
1102 state: current_state,
1103 }
1104 );
1105
1106 Ok(())
1107 })
1108 .await
1109 }
1110
1111 async fn update_manifest_with_state_check(
1112 &self,
1113 action_list: RegionMetaActionList,
1114 is_staging: bool,
1115 check_state: impl FnOnce(RegionRoleState, RegionId) -> Result<()>,
1116 ) -> Result<ManifestVersion> {
1117 let mut manager = self.manifest_manager.write().await;
1119 let manifest = manager.manifest();
1121 let current_state = self.state.load();
1124 check_state(current_state, manifest.metadata.region_id)?;
1125
1126 for action in &action_list.actions {
1127 let RegionMetaAction::Edit(edit) = &action else {
1129 continue;
1130 };
1131
1132 let Some(truncated_entry_id) = manifest.truncated_entry_id else {
1134 continue;
1135 };
1136
1137 if let Some(flushed_entry_id) = edit.flushed_entry_id {
1139 let is_newer_entry = truncated_entry_id < flushed_entry_id;
1149 let is_same_entry_with_newer_sequence = truncated_entry_id == flushed_entry_id
1150 && edit.flushed_sequence.is_some_and(|flushed_sequence| {
1151 manifest.flushed_sequence < flushed_sequence
1152 });
1153
1154 ensure!(
1155 is_newer_entry || is_same_entry_with_newer_sequence,
1156 RegionTruncatedSnafu {
1157 region_id: manifest.metadata.region_id,
1158 }
1159 );
1160 }
1161
1162 if !edit.files_to_remove.is_empty() {
1164 for file in &edit.files_to_remove {
1166 ensure!(
1167 manifest.files.contains_key(&file.file_id),
1168 RegionTruncatedSnafu {
1169 region_id: manifest.metadata.region_id,
1170 }
1171 );
1172 }
1173 }
1174 }
1175
1176 let version = manager.update(action_list, is_staging).await.inspect_err(
1178 |e| error!(e; "Failed to update manifest, region_id: {}", manifest.metadata.region_id),
1179 )?;
1180
1181 if self.state.load() == RegionRoleState::Follower {
1182 warn!(
1183 "Region {} becomes follower while updating manifest which may cause inconsistency, manifest version: {version}",
1184 manifest.metadata.region_id
1185 );
1186 }
1187
1188 Ok(version)
1189 }
1190
1191 pub(crate) fn set_role(&self, next_role: RegionRole, region_id: RegionId) {
1225 match next_role {
1226 RegionRole::Follower => {
1227 if self
1228 .exit_staging(region_id, RegionRoleState::Follower)
1229 .is_ok()
1230 {
1231 info!(
1232 "Convert region {} to follower, previous role state: {:?}",
1233 region_id,
1234 RegionRoleState::Leader(RegionLeaderState::Staging)
1235 );
1236 return;
1237 }
1238 match self.state.fetch_update(|state| {
1239 if !matches!(state, RegionRoleState::Follower) {
1240 Some(RegionRoleState::Follower)
1241 } else {
1242 None
1243 }
1244 }) {
1245 Ok(state) => info!(
1246 "Convert region {} to follower, previous role state: {:?}",
1247 region_id, state
1248 ),
1249 Err(state) => {
1250 if state != RegionRoleState::Follower {
1251 warn!(
1252 "Failed to convert region {} to follower, current role state: {:?}",
1253 region_id, state
1254 )
1255 }
1256 }
1257 }
1258 }
1259 RegionRole::Leader => {
1260 if self
1261 .exit_staging(
1262 region_id,
1263 RegionRoleState::Leader(RegionLeaderState::Writable),
1264 )
1265 .is_ok()
1266 {
1267 info!(
1268 "Convert region {} to leader, previous role state: {:?}",
1269 region_id,
1270 RegionRoleState::Leader(RegionLeaderState::Staging)
1271 );
1272 return;
1273 }
1274 match self.state.fetch_update(|state| {
1275 if matches!(
1276 state,
1277 RegionRoleState::Follower
1278 | RegionRoleState::Leader(RegionLeaderState::Downgrading)
1279 ) {
1280 Some(RegionRoleState::Leader(RegionLeaderState::Writable))
1281 } else {
1282 None
1283 }
1284 }) {
1285 Ok(state) => info!(
1286 "Convert region {} to leader, previous role state: {:?}",
1287 region_id, state
1288 ),
1289 Err(state) => {
1290 if state != RegionRoleState::Leader(RegionLeaderState::Writable) {
1291 warn!(
1292 "Failed to convert region {} to leader, current role state: {:?}",
1293 region_id, state
1294 )
1295 }
1296 }
1297 }
1298 }
1299 RegionRole::StagingLeader => {
1300 info!(
1301 "Ignore direct conversion of region {} to staging leader; staging requires the dedicated workflow",
1302 region_id
1303 );
1304 }
1305 RegionRole::DowngradingLeader => {
1306 if self
1307 .exit_staging(
1308 region_id,
1309 RegionRoleState::Leader(RegionLeaderState::Downgrading),
1310 )
1311 .is_ok()
1312 {
1313 info!(
1314 "Convert region {} to downgrading region, previous role state: {:?}",
1315 region_id,
1316 RegionRoleState::Leader(RegionLeaderState::Staging)
1317 );
1318 return;
1319 }
1320 match self.state.compare_exchange(
1321 RegionRoleState::Leader(RegionLeaderState::Writable),
1322 RegionRoleState::Leader(RegionLeaderState::Downgrading),
1323 ) {
1324 Ok(state) => info!(
1325 "Convert region {} to downgrading region, previous role state: {:?}",
1326 region_id, state
1327 ),
1328 Err(state) => {
1329 if state != RegionRoleState::Leader(RegionLeaderState::Downgrading) {
1330 warn!(
1331 "Failed to convert region {} to downgrading leader, current role state: {:?}",
1332 region_id, state
1333 )
1334 }
1335 }
1336 }
1337 }
1338 }
1339 }
1340
1341 pub(crate) async fn manifest(&self) -> Arc<crate::manifest::action::RegionManifest> {
1343 self.manifest_manager.read().await.manifest()
1344 }
1345
1346 pub(crate) async fn staging_manifest(
1348 &self,
1349 ) -> Option<Arc<crate::manifest::action::RegionManifest>> {
1350 self.manifest_manager.read().await.staging_manifest()
1351 }
1352}
1353
1354pub(crate) type ManifestContextRef = Arc<ManifestContext>;
1355
1356#[derive(Debug, Default)]
1358pub(crate) struct RegionMap {
1359 regions: RwLock<HashMap<RegionId, MitoRegionRef>>,
1360}
1361
1362impl RegionMap {
1363 pub(crate) fn is_region_exists(&self, region_id: RegionId) -> bool {
1365 let regions = self.regions.read().unwrap();
1366 regions.contains_key(®ion_id)
1367 }
1368
1369 pub(crate) fn insert_region(&self, region: MitoRegionRef) {
1371 let mut regions = self.regions.write().unwrap();
1372 regions.insert(region.region_id, region);
1373 }
1374
1375 pub(crate) fn get_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
1377 let regions = self.regions.read().unwrap();
1378 regions.get(®ion_id).cloned()
1379 }
1380
1381 pub(crate) fn writable_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1385 let region = self
1386 .get_region(region_id)
1387 .context(RegionNotFoundSnafu { region_id })?;
1388 ensure!(
1389 region.is_writable(),
1390 RegionStateSnafu {
1391 region_id,
1392 state: region.state(),
1393 expect: RegionRoleState::Leader(RegionLeaderState::Writable),
1394 }
1395 );
1396 Ok(region)
1397 }
1398
1399 pub(crate) fn follower_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1403 let region = self
1404 .get_region(region_id)
1405 .context(RegionNotFoundSnafu { region_id })?;
1406 ensure!(
1407 region.is_follower(),
1408 RegionStateSnafu {
1409 region_id,
1410 state: region.state(),
1411 expect: RegionRoleState::Follower,
1412 }
1413 );
1414
1415 Ok(region)
1416 }
1417
1418 pub(crate) fn get_region_or<F: OnFailure>(
1422 &self,
1423 region_id: RegionId,
1424 cb: &mut F,
1425 ) -> Option<MitoRegionRef> {
1426 match self
1427 .get_region(region_id)
1428 .context(RegionNotFoundSnafu { region_id })
1429 {
1430 Ok(region) => Some(region),
1431 Err(e) => {
1432 cb.on_failure(e);
1433 None
1434 }
1435 }
1436 }
1437
1438 pub(crate) fn writable_region_or<F: OnFailure>(
1442 &self,
1443 region_id: RegionId,
1444 cb: &mut F,
1445 ) -> Option<MitoRegionRef> {
1446 match self.writable_region(region_id) {
1447 Ok(region) => Some(region),
1448 Err(e) => {
1449 cb.on_failure(e);
1450 None
1451 }
1452 }
1453 }
1454
1455 pub(crate) fn writable_non_staging_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1459 let region = self.writable_region(region_id)?;
1460 if region.is_staging() {
1461 return Err(crate::error::RegionStateSnafu {
1462 region_id,
1463 state: region.state(),
1464 expect: RegionRoleState::Leader(RegionLeaderState::Writable),
1465 }
1466 .build());
1467 }
1468 Ok(region)
1469 }
1470
1471 pub(crate) fn staging_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1475 let region = self
1476 .get_region(region_id)
1477 .context(RegionNotFoundSnafu { region_id })?;
1478 ensure!(
1479 region.is_staging(),
1480 RegionStateSnafu {
1481 region_id,
1482 state: region.state(),
1483 expect: RegionRoleState::Leader(RegionLeaderState::Staging),
1484 }
1485 );
1486 Ok(region)
1487 }
1488
1489 pub(crate) fn flushable_region(&self, region_id: RegionId) -> Result<MitoRegionRef> {
1493 let region = self
1494 .get_region(region_id)
1495 .context(RegionNotFoundSnafu { region_id })?;
1496 ensure!(
1497 region.is_flushable(),
1498 FlushableRegionStateSnafu {
1499 region_id,
1500 state: region.state(),
1501 }
1502 );
1503 Ok(region)
1504 }
1505
1506 pub(crate) fn remove_region(&self, region_id: RegionId) -> Option<MitoRegionRef> {
1508 let mut regions = self.regions.write().unwrap();
1509 regions.remove(®ion_id)
1510 }
1511
1512 pub(crate) fn list_regions(&self) -> Vec<MitoRegionRef> {
1514 let regions = self.regions.read().unwrap();
1515 regions.values().cloned().collect()
1516 }
1517
1518 pub(crate) fn clear(&self) {
1520 self.regions.write().unwrap().clear();
1521 }
1522}
1523
1524pub(crate) type RegionMapRef = Arc<RegionMap>;
1525
1526#[derive(Debug, Default)]
1528pub(crate) struct OpeningRegions {
1529 regions: RwLock<HashMap<RegionId, Vec<OptionOutputTx>>>,
1530}
1531
1532impl OpeningRegions {
1533 pub(crate) fn wait_for_opening_region(
1535 &self,
1536 region_id: RegionId,
1537 sender: OptionOutputTx,
1538 ) -> Option<OptionOutputTx> {
1539 let mut regions = self.regions.write().unwrap();
1540 match regions.entry(region_id) {
1541 Entry::Occupied(mut senders) => {
1542 senders.get_mut().push(sender);
1543 None
1544 }
1545 Entry::Vacant(_) => Some(sender),
1546 }
1547 }
1548
1549 pub(crate) fn is_region_exists(&self, region_id: RegionId) -> bool {
1551 let regions = self.regions.read().unwrap();
1552 regions.contains_key(®ion_id)
1553 }
1554
1555 pub(crate) fn insert_sender(&self, region: RegionId, sender: OptionOutputTx) {
1557 let mut regions = self.regions.write().unwrap();
1558 regions.insert(region, vec![sender]);
1559 }
1560
1561 pub(crate) fn remove_sender(&self, region_id: RegionId) -> Vec<OptionOutputTx> {
1563 let mut regions = self.regions.write().unwrap();
1564 regions.remove(®ion_id).unwrap_or_default()
1565 }
1566
1567 #[cfg(test)]
1568 pub(crate) fn sender_len(&self, region_id: RegionId) -> usize {
1569 let regions = self.regions.read().unwrap();
1570 if let Some(senders) = regions.get(®ion_id) {
1571 senders.len()
1572 } else {
1573 0
1574 }
1575 }
1576}
1577
1578pub(crate) type OpeningRegionsRef = Arc<OpeningRegions>;
1579
1580#[derive(Debug, Default)]
1582pub(crate) struct CatchupRegions {
1583 regions: RwLock<HashSet<RegionId>>,
1584}
1585
1586impl CatchupRegions {
1587 pub(crate) fn is_region_exists(&self, region_id: RegionId) -> bool {
1589 let regions = self.regions.read().unwrap();
1590 regions.contains(®ion_id)
1591 }
1592
1593 pub(crate) fn insert_region(&self, region_id: RegionId) {
1595 let mut regions = self.regions.write().unwrap();
1596 regions.insert(region_id);
1597 }
1598
1599 pub(crate) fn remove_region(&self, region_id: RegionId) {
1601 let mut regions = self.regions.write().unwrap();
1602 regions.remove(®ion_id);
1603 }
1604}
1605
1606pub(crate) type CatchupRegionsRef = Arc<CatchupRegions>;
1607
1608#[derive(Default, Debug, Clone)]
1610pub struct ManifestStats {
1611 pub(crate) total_manifest_size: Arc<AtomicU64>,
1612 pub(crate) manifest_version: Arc<AtomicU64>,
1613 pub(crate) file_removed_cnt: Arc<AtomicU64>,
1614}
1615
1616impl ManifestStats {
1617 fn total_manifest_size(&self) -> u64 {
1618 self.total_manifest_size.load(Ordering::Relaxed)
1619 }
1620
1621 fn manifest_version(&self) -> u64 {
1622 self.manifest_version.load(Ordering::Relaxed)
1623 }
1624
1625 fn file_removed_cnt(&self) -> u64 {
1626 self.file_removed_cnt.load(Ordering::Relaxed)
1627 }
1628}
1629
1630pub fn parse_partition_expr(partition_expr_str: Option<&str>) -> Result<Option<PartitionExpr>> {
1632 match partition_expr_str {
1633 None => Ok(None),
1634 Some("") => Ok(None),
1635 Some(json_str) => {
1636 let expr = partition::expr::PartitionExpr::from_json_str(json_str)
1637 .with_context(|_| InvalidPartitionExprSnafu { expr: json_str })?;
1638 Ok(expr)
1639 }
1640 }
1641}
1642
1643#[cfg(test)]
1644mod tests {
1645 use std::sync::Arc;
1646 use std::sync::atomic::AtomicU64;
1647
1648 use common_datasource::compression::CompressionType;
1649 use common_test_util::temp_dir::create_temp_dir;
1650 use crossbeam_utils::atomic::AtomicCell;
1651 use object_store::ObjectStore;
1652 use object_store::services::Fs;
1653 use store_api::logstore::provider::Provider;
1654 use store_api::region_engine::RegionRole;
1655 use store_api::region_request::PathType;
1656 use store_api::storage::{FileId, RegionId};
1657
1658 use crate::access_layer::AccessLayer;
1659 use crate::error::Error;
1660 use crate::manifest::action::{
1661 RegionChange, RegionEdit, RegionMetaAction, RegionMetaActionList, RegionPartitionExprChange,
1662 };
1663 use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions};
1664 use crate::region::{
1665 ManifestContext, ManifestStats, MitoRegion, RegionLeaderState, RegionRoleState,
1666 };
1667 use crate::sst::FormatType;
1668 use crate::sst::index::intermediate::IntermediateManager;
1669 use crate::sst::index::puffin_manager::PuffinManagerFactory;
1670 use crate::test_util::scheduler_util::SchedulerEnv;
1671 use crate::test_util::version_util::VersionControlBuilder;
1672 use crate::time_provider::StdTimeProvider;
1673
1674 #[test]
1675 fn test_region_state_lock_free() {
1676 assert!(AtomicCell::<RegionRoleState>::is_lock_free());
1677 }
1678
1679 #[test]
1680 fn test_region_role_state_as_str() {
1681 assert_eq!("Follower", RegionRoleState::Follower.as_str());
1682 assert_eq!(
1683 "Leader(Writable)",
1684 RegionRoleState::Leader(RegionLeaderState::Writable).as_str()
1685 );
1686 assert_eq!(
1687 "Leader(Staging)",
1688 RegionRoleState::Leader(RegionLeaderState::Staging).as_str()
1689 );
1690 assert_eq!(
1691 "Leader(Downgrading)",
1692 RegionRoleState::Leader(RegionLeaderState::Downgrading).as_str()
1693 );
1694 }
1695
1696 async fn build_test_region(env: &SchedulerEnv) -> MitoRegion {
1697 let builder = VersionControlBuilder::new();
1698 let version_control = Arc::new(builder.build());
1699 let metadata = version_control.current().version.metadata.clone();
1700
1701 let manager = RegionManifestManager::new(
1702 metadata.clone(),
1703 0,
1704 RegionManifestOptions {
1705 manifest_dir: "".to_string(),
1706 object_store: env.access_layer.object_store().clone(),
1707 compress_type: CompressionType::Uncompressed,
1708 checkpoint_distance: 10,
1709 remove_file_options: Default::default(),
1710 manifest_cache: None,
1711 },
1712 FormatType::PrimaryKey,
1713 &Default::default(),
1714 )
1715 .await
1716 .unwrap();
1717
1718 let manifest_ctx = Arc::new(ManifestContext::new(
1719 manager,
1720 RegionRoleState::Leader(RegionLeaderState::Writable),
1721 ));
1722
1723 MitoRegion {
1724 region_id: metadata.region_id,
1725 version_control,
1726 access_layer: env.access_layer.clone(),
1727 manifest_ctx,
1728 file_purger: crate::test_util::new_noop_file_purger(),
1729 provider: Provider::noop_provider(),
1730 last_flush_millis: Default::default(),
1731 last_schedule_compaction_millis: Default::default(),
1732 time_provider: Arc::new(StdTimeProvider),
1733 topic_latest_entry_id: Default::default(),
1734 written_bytes: Arc::new(AtomicU64::new(0)),
1735 stats: ManifestStats::default(),
1736 }
1737 }
1738
1739 fn empty_edit() -> RegionEdit {
1740 RegionEdit {
1741 files_to_add: Vec::new(),
1742 files_to_remove: Vec::new(),
1743 timestamp_ms: None,
1744 compaction_time_window: None,
1745 flushed_entry_id: None,
1746 flushed_sequence: None,
1747 committed_sequence: None,
1748 }
1749 }
1750
1751 #[tokio::test]
1752 async fn test_compaction_update_manifest_allows_editing_state() {
1753 let env = SchedulerEnv::new().await;
1754 let region = build_test_region(&env).await;
1755 region.set_editing(RegionLeaderState::Writable).unwrap();
1756
1757 let file_id = FileId::random();
1758 let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(RegionEdit {
1759 files_to_add: vec![crate::sst::file::FileMeta {
1760 region_id: region.region_id,
1761 file_id,
1762 level: 1,
1763 ..Default::default()
1764 }],
1765 files_to_remove: Vec::new(),
1766 timestamp_ms: None,
1767 compaction_time_window: None,
1768 flushed_entry_id: None,
1769 flushed_sequence: None,
1770 committed_sequence: None,
1771 }));
1772
1773 region
1774 .manifest_ctx
1775 .update_manifest_for_compaction(action_list)
1776 .await
1777 .unwrap();
1778
1779 assert!(
1780 region
1781 .manifest_ctx
1782 .manifest()
1783 .await
1784 .files
1785 .contains_key(&file_id)
1786 );
1787 }
1788
1789 #[tokio::test]
1790 async fn test_exit_staging_partition_expr_change_and_edit_success() {
1791 let env = SchedulerEnv::new().await;
1792 let region = build_test_region(&env).await;
1793
1794 let mut manager = region.manifest_ctx.manifest_manager.write().await;
1795 region.set_staging(&mut manager).await.unwrap();
1796 manager
1797 .update(
1798 RegionMetaActionList::new(vec![
1799 RegionMetaAction::PartitionExprChange(RegionPartitionExprChange {
1800 partition_expr: Some("expr_a".to_string()),
1801 }),
1802 RegionMetaAction::Edit(empty_edit()),
1803 ]),
1804 true,
1805 )
1806 .await
1807 .unwrap();
1808
1809 region.exit_staging_on_success(&mut manager).await.unwrap();
1810 drop(manager);
1811
1812 assert_eq!(
1813 region.version().metadata.partition_expr.as_deref(),
1814 Some("expr_a")
1815 );
1816 assert_eq!(
1817 region.state(),
1818 RegionRoleState::Leader(RegionLeaderState::Writable)
1819 );
1820 }
1821
1822 #[tokio::test]
1823 async fn test_exit_staging_change_with_same_columns_success() {
1824 let env = SchedulerEnv::new().await;
1825 let region = build_test_region(&env).await;
1826
1827 let mut manager = region.manifest_ctx.manifest_manager.write().await;
1828 region.set_staging(&mut manager).await.unwrap();
1829
1830 let mut changed_metadata = region.version().metadata.as_ref().clone();
1831 changed_metadata.set_partition_expr(Some("expr_b".to_string()));
1832
1833 manager
1834 .update(
1835 RegionMetaActionList::new(vec![
1836 RegionMetaAction::Change(RegionChange {
1837 metadata: Arc::new(changed_metadata),
1838 sst_format: FormatType::PrimaryKey,
1839 append_mode: None,
1840 }),
1841 RegionMetaAction::Edit(empty_edit()),
1842 ]),
1843 true,
1844 )
1845 .await
1846 .unwrap();
1847
1848 region.exit_staging_on_success(&mut manager).await.unwrap();
1849 drop(manager);
1850
1851 assert_eq!(
1852 region.version().metadata.partition_expr.as_deref(),
1853 Some("expr_b")
1854 );
1855 assert_eq!(
1856 region.state(),
1857 RegionRoleState::Leader(RegionLeaderState::Writable)
1858 );
1859 }
1860
1861 #[tokio::test]
1862 async fn test_exit_staging_change_with_different_columns_fails() {
1863 let env = SchedulerEnv::new().await;
1864 let region = build_test_region(&env).await;
1865
1866 let mut manager = region.manifest_ctx.manifest_manager.write().await;
1867 region.set_staging(&mut manager).await.unwrap();
1868
1869 let mut changed_metadata = region.version().metadata.as_ref().clone();
1870 changed_metadata.column_metadatas.rotate_left(1);
1871
1872 manager
1873 .update(
1874 RegionMetaActionList::new(vec![
1875 RegionMetaAction::Change(RegionChange {
1876 metadata: Arc::new(changed_metadata),
1877 sst_format: FormatType::PrimaryKey,
1878 append_mode: None,
1879 }),
1880 RegionMetaAction::Edit(empty_edit()),
1881 ]),
1882 true,
1883 )
1884 .await
1885 .unwrap();
1886
1887 let result = region.exit_staging_on_success(&mut manager).await;
1888 assert!(matches!(result, Err(Error::Unexpected { .. })));
1889 }
1890
1891 #[tokio::test]
1892 async fn test_exit_staging_partition_expr_change_and_change_conflict_fails() {
1893 let env = SchedulerEnv::new().await;
1894 let region = build_test_region(&env).await;
1895
1896 let mut manager = region.manifest_ctx.manifest_manager.write().await;
1897 region.set_staging(&mut manager).await.unwrap();
1898
1899 let mut changed_metadata = region.version().metadata.as_ref().clone();
1900 changed_metadata.set_partition_expr(Some("expr_c".to_string()));
1901
1902 manager
1903 .update(
1904 RegionMetaActionList::new(vec![
1905 RegionMetaAction::PartitionExprChange(RegionPartitionExprChange {
1906 partition_expr: Some("expr_c".to_string()),
1907 }),
1908 RegionMetaAction::Change(RegionChange {
1909 metadata: Arc::new(changed_metadata),
1910 sst_format: FormatType::PrimaryKey,
1911 append_mode: None,
1912 }),
1913 RegionMetaAction::Edit(empty_edit()),
1914 ]),
1915 true,
1916 )
1917 .await
1918 .unwrap();
1919
1920 let result = region.exit_staging_on_success(&mut manager).await;
1921 assert!(matches!(result, Err(Error::Unexpected { .. })));
1922 }
1923
1924 #[tokio::test]
1925 async fn test_set_region_state() {
1926 let env = SchedulerEnv::new().await;
1927 let builder = VersionControlBuilder::new();
1928 let version_control = Arc::new(builder.build());
1929 let manifest_ctx = env
1930 .mock_manifest_context(version_control.current().version.metadata.clone())
1931 .await;
1932
1933 let region_id = RegionId::new(1024, 0);
1934 manifest_ctx.set_role(RegionRole::Follower, region_id);
1936 assert_eq!(manifest_ctx.state.load(), RegionRoleState::Follower);
1937
1938 manifest_ctx.set_role(RegionRole::Leader, region_id);
1940 assert_eq!(
1941 manifest_ctx.state.load(),
1942 RegionRoleState::Leader(RegionLeaderState::Writable)
1943 );
1944
1945 manifest_ctx.set_role(RegionRole::StagingLeader, region_id);
1947 assert_eq!(
1948 manifest_ctx.state.load(),
1949 RegionRoleState::Leader(RegionLeaderState::Writable)
1950 );
1951
1952 manifest_ctx.set_role(RegionRole::DowngradingLeader, region_id);
1954 assert_eq!(
1955 manifest_ctx.state.load(),
1956 RegionRoleState::Leader(RegionLeaderState::Downgrading)
1957 );
1958
1959 manifest_ctx.set_role(RegionRole::Follower, region_id);
1961 assert_eq!(manifest_ctx.state.load(), RegionRoleState::Follower);
1962
1963 manifest_ctx.set_role(RegionRole::DowngradingLeader, region_id);
1965 assert_eq!(manifest_ctx.state.load(), RegionRoleState::Follower);
1966
1967 manifest_ctx.set_role(RegionRole::Leader, region_id);
1969 manifest_ctx.set_role(RegionRole::DowngradingLeader, region_id);
1970 assert_eq!(
1971 manifest_ctx.state.load(),
1972 RegionRoleState::Leader(RegionLeaderState::Downgrading)
1973 );
1974
1975 manifest_ctx.set_role(RegionRole::Leader, region_id);
1977 assert_eq!(
1978 manifest_ctx.state.load(),
1979 RegionRoleState::Leader(RegionLeaderState::Writable)
1980 );
1981 }
1982
1983 #[tokio::test]
1984 async fn test_staging_state_validation() {
1985 let env = SchedulerEnv::new().await;
1986 let builder = VersionControlBuilder::new();
1987 let version_control = Arc::new(builder.build());
1988
1989 let staging_ctx = {
1991 let manager = RegionManifestManager::new(
1992 version_control.current().version.metadata.clone(),
1993 0,
1994 RegionManifestOptions {
1995 manifest_dir: "".to_string(),
1996 object_store: env.access_layer.object_store().clone(),
1997 compress_type: CompressionType::Uncompressed,
1998 checkpoint_distance: 10,
1999 remove_file_options: Default::default(),
2000 manifest_cache: None,
2001 },
2002 FormatType::PrimaryKey,
2003 &Default::default(),
2004 )
2005 .await
2006 .unwrap();
2007 Arc::new(ManifestContext::new(
2008 manager,
2009 RegionRoleState::Leader(RegionLeaderState::Staging),
2010 ))
2011 };
2012
2013 assert_eq!(
2015 staging_ctx.current_state(),
2016 RegionRoleState::Leader(RegionLeaderState::Staging)
2017 );
2018
2019 let writable_ctx = env
2021 .mock_manifest_context(version_control.current().version.metadata.clone())
2022 .await;
2023
2024 assert_eq!(
2025 writable_ctx.current_state(),
2026 RegionRoleState::Leader(RegionLeaderState::Writable)
2027 );
2028 }
2029
2030 #[tokio::test]
2031 async fn test_staging_state_transitions() {
2032 let builder = VersionControlBuilder::new();
2033 let version_control = Arc::new(builder.build());
2034 let metadata = version_control.current().version.metadata.clone();
2035
2036 let temp_dir = create_temp_dir("");
2038 let path_str = temp_dir.path().display().to_string();
2039 let fs_builder = Fs::default().root(&path_str);
2040 let object_store = ObjectStore::new(fs_builder).unwrap().finish();
2041
2042 let index_aux_path = temp_dir.path().join("index_aux");
2043 let puffin_mgr = PuffinManagerFactory::new(&index_aux_path, 4096, None, None)
2044 .await
2045 .unwrap();
2046 let intm_mgr = IntermediateManager::init_fs(index_aux_path.to_str().unwrap())
2047 .await
2048 .unwrap();
2049
2050 let access_layer = Arc::new(AccessLayer::new(
2051 "",
2052 PathType::Bare,
2053 object_store,
2054 puffin_mgr,
2055 intm_mgr,
2056 ));
2057
2058 let manager = RegionManifestManager::new(
2059 metadata.clone(),
2060 0,
2061 RegionManifestOptions {
2062 manifest_dir: "".to_string(),
2063 object_store: access_layer.object_store().clone(),
2064 compress_type: CompressionType::Uncompressed,
2065 checkpoint_distance: 10,
2066 remove_file_options: Default::default(),
2067 manifest_cache: None,
2068 },
2069 FormatType::PrimaryKey,
2070 &Default::default(),
2071 )
2072 .await
2073 .unwrap();
2074
2075 let manifest_ctx = Arc::new(ManifestContext::new(
2076 manager,
2077 RegionRoleState::Leader(RegionLeaderState::Writable),
2078 ));
2079
2080 let region = MitoRegion {
2081 region_id: metadata.region_id,
2082 version_control,
2083 access_layer,
2084 manifest_ctx: manifest_ctx.clone(),
2085 file_purger: crate::test_util::new_noop_file_purger(),
2086 provider: Provider::noop_provider(),
2087 last_flush_millis: Default::default(),
2088 last_schedule_compaction_millis: Default::default(),
2089 time_provider: Arc::new(StdTimeProvider),
2090 topic_latest_entry_id: Default::default(),
2091 written_bytes: Arc::new(AtomicU64::new(0)),
2092 stats: ManifestStats::default(),
2093 };
2094
2095 assert_eq!(
2097 region.state(),
2098 RegionRoleState::Leader(RegionLeaderState::Writable)
2099 );
2100 assert!(!region.is_staging());
2101
2102 let mut manager = manifest_ctx.manifest_manager.write().await;
2104 region.set_staging(&mut manager).await.unwrap();
2105 drop(manager);
2106 assert_eq!(
2107 region.state(),
2108 RegionRoleState::Leader(RegionLeaderState::Staging)
2109 );
2110 assert!(region.is_staging());
2111
2112 region.exit_staging().unwrap();
2114 assert_eq!(
2115 region.state(),
2116 RegionRoleState::Leader(RegionLeaderState::Writable)
2117 );
2118 assert!(!region.is_staging());
2119
2120 {
2122 let manager = manifest_ctx.manifest_manager.write().await;
2124 let dummy_actions = RegionMetaActionList::new(vec![]);
2125 let dummy_bytes = dummy_actions.encode().unwrap();
2126
2127 manager.store().save(100, &dummy_bytes, true).await.unwrap();
2129 manager.store().save(101, &dummy_bytes, true).await.unwrap();
2130 drop(manager);
2131
2132 let manager = manifest_ctx.manifest_manager.read().await;
2134 let dirty_manifests = manager.store().fetch_staging_manifests().await.unwrap();
2135 assert_eq!(
2136 dirty_manifests.len(),
2137 2,
2138 "Should have 2 dirty staging files"
2139 );
2140 drop(manager);
2141
2142 let mut manager = manifest_ctx.manifest_manager.write().await;
2144 region.set_staging(&mut manager).await.unwrap();
2145 drop(manager);
2146
2147 let manager = manifest_ctx.manifest_manager.read().await;
2149 let cleaned_manifests = manager.store().fetch_staging_manifests().await.unwrap();
2150 assert_eq!(
2151 cleaned_manifests.len(),
2152 0,
2153 "Dirty staging files should be cleaned up"
2154 );
2155 drop(manager);
2156
2157 region.exit_staging().unwrap();
2159 }
2160
2161 let mut manager = manifest_ctx.manifest_manager.write().await;
2163 assert!(region.set_staging(&mut manager).await.is_ok()); drop(manager);
2165 let mut manager = manifest_ctx.manifest_manager.write().await;
2166 assert!(region.set_staging(&mut manager).await.is_err()); drop(manager);
2168 assert!(region.exit_staging().is_ok()); assert!(region.exit_staging().is_err()); }
2171}