From 45a3e1121d1463d743482b144f8a9a9dfdce3c57 Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Tue, 10 Feb 2026 16:26:25 +0800 Subject: [PATCH] fix(mito2): introduce `PartitionExprChange` in staging flow and keep memtables on metadata-only updates (#7695) * feat(mito2): add RegionMetaAction::PartitionExprChange Signed-off-by: WenyXu * refactor(mito2): apply partition-expr action in manifest builder and manager Signed-off-by: WenyXu * refactor(mito2): add partition-expr action merge rules and conflict guard Signed-off-by: WenyXu * refactor(mito2): use partition-rule action in enter staging Signed-off-by: WenyXu * fix(mito2): validate Change and route to metadata-only update on staging exit Signed-off-by: WenyXu * test(mito2): cover partition-expr action staging flow and conflict cases Signed-off-by: WenyXu * test(mito2): add apply-staging coverage for Change metadata validation Signed-off-by: WenyXu * test(mito2): add apply-staging coverage for Change metadata validation Signed-off-by: WenyXu * chore: fmt Signed-off-by: WenyXu * chore: remove unused error Signed-off-by: WenyXu * chore: apply suggestions Signed-off-by: WenyXu * chore: add warn Signed-off-by: WenyXu * chore: add comments Signed-off-by: WenyXu * test: preserves unflushed memtable Signed-off-by: WenyXu * chore: fmt Signed-off-by: WenyXu --------- Signed-off-by: WenyXu --- .../src/engine/apply_staging_manifest_test.rs | 238 +++++++++++++++++- src/mito2/src/engine/staging_test.rs | 149 ++++++++++- src/mito2/src/manifest/action.rs | 59 ++++- src/mito2/src/manifest/manager.rs | 9 + src/mito2/src/region.rs | 237 ++++++++++++++++- src/mito2/src/region/version.rs | 13 + src/mito2/src/worker/handle_enter_staging.rs | 14 +- 7 files changed, 695 insertions(+), 24 deletions(-) diff --git a/src/mito2/src/engine/apply_staging_manifest_test.rs b/src/mito2/src/engine/apply_staging_manifest_test.rs index 9e3ee82bf3..e7c8bcea80 100644 --- a/src/mito2/src/engine/apply_staging_manifest_test.rs +++ b/src/mito2/src/engine/apply_staging_manifest_test.rs @@ -14,8 +14,10 @@ use std::assert_matches::assert_matches; use std::fs; +use std::sync::Arc; use api::v1::Rows; +use common_function::utils::partition_rule_version; use common_recordbatch::RecordBatches; use datatypes::value::Value; use partition::expr::{PartitionExpr, col}; @@ -23,14 +25,18 @@ use store_api::region_engine::{ RegionEngine, RegionRole, RemapManifestsRequest, SettableRegionRoleState, }; use store_api::region_request::{ - ApplyStagingManifestRequest, EnterStagingRequest, RegionFlushRequest, RegionRequest, + ApplyStagingManifestRequest, EnterStagingRequest, RegionFlushRequest, RegionPutRequest, + RegionRequest, }; use store_api::storage::{FileId, RegionId}; use super::ScanRequest; use crate::config::MitoConfig; use crate::error::Error; -use crate::manifest::action::RegionManifest; +use crate::manifest::action::{ + RegionChange, RegionEdit, RegionManifest, RegionMetaAction, RegionMetaActionList, +}; +use crate::sst::FormatType; use crate::sst::file::FileMeta; use crate::test_util::{CreateRequestBuilder, TestEnv, build_rows, put_rows, rows_schema}; @@ -40,6 +46,12 @@ fn range_expr(col_name: &str, start: i64, end: i64) -> PartitionExpr { .and(col(col_name).lt(Value::Int64(end))) } +fn float_range_expr(col_name: &str, start: f64, end: f64) -> PartitionExpr { + col(col_name) + .gt_eq(Value::Float64(start.into())) + .and(col(col_name).lt(Value::Float64(end.into()))) +} + #[tokio::test] async fn test_apply_staging_manifest_invalid_region_state() { common_telemetry::init_default_ut_logging(); @@ -454,6 +466,228 @@ async fn test_apply_staging_manifest_invalid_files_to_add_with_format(flat_forma ); } +#[tokio::test] +async fn test_apply_staging_manifest_change_edit_different_columns_fails() { + test_apply_staging_manifest_change_edit_different_columns_fails_with_format(false).await; + test_apply_staging_manifest_change_edit_different_columns_fails_with_format(true).await; +} + +async fn test_apply_staging_manifest_change_edit_different_columns_fails_with_format( + flat_format: bool, +) { + let mut env = TestEnv::with_prefix("apply-change-edit-different-columns").await; + let engine = env + .create_engine(MitoConfig { + default_experimental_flat_format: flat_format, + ..Default::default() + }) + .await; + let region_id = RegionId::new(2, 2); + let request = CreateRequestBuilder::new().build(); + let column_schemas = rows_schema(&request); + engine + .handle_request(region_id, RegionRequest::Create(request)) + .await + .unwrap(); + + let rows_data = Rows { + schema: column_schemas, + rows: build_rows(0, 3), + }; + put_rows(&engine, region_id, rows_data).await; + engine + .handle_request( + region_id, + RegionRequest::Flush(RegionFlushRequest { + row_group_size: None, + }), + ) + .await + .unwrap(); + + let partition_expr = range_expr("tag_0", 0, 50).as_json_str().unwrap(); + engine + .handle_request( + region_id, + RegionRequest::EnterStaging(EnterStagingRequest { + partition_expr: partition_expr.clone(), + }), + ) + .await + .unwrap(); + + let remap_result = engine + .remap_manifests(RemapManifestsRequest { + region_id, + input_regions: vec![region_id], + region_mapping: [(region_id, vec![region_id])].into_iter().collect(), + new_partition_exprs: [(region_id, partition_expr.clone())].into_iter().collect(), + }) + .await + .unwrap(); + + let region = engine.get_region(region_id).unwrap(); + let mut manager = region.manifest_ctx.manifest_manager.write().await; + let manifest_storage = manager.store(); + let blob_store = manifest_storage.staging_storage().blob_storage(); + let remap_manifest_path = remap_result.manifest_paths[®ion_id].clone(); + let remap_manifest_bytes = blob_store.get(&remap_manifest_path).await.unwrap(); + manager.clear_staging_manifest_and_dir().await.unwrap(); + + let mut changed_metadata = region.version().metadata.as_ref().clone(); + changed_metadata.column_metadatas.rotate_left(1); + + manager + .update( + RegionMetaActionList::new(vec![ + RegionMetaAction::Change(RegionChange { + metadata: Arc::new(changed_metadata), + sst_format: FormatType::PrimaryKey, + }), + RegionMetaAction::Edit(RegionEdit { + files_to_add: Vec::new(), + files_to_remove: Vec::new(), + timestamp_ms: None, + compaction_time_window: None, + flushed_entry_id: None, + flushed_sequence: None, + committed_sequence: None, + }), + ]), + true, + ) + .await + .unwrap(); + blob_store + .put(&remap_manifest_path, remap_manifest_bytes) + .await + .unwrap(); + drop(manager); + + let err = engine + .handle_request( + region_id, + RegionRequest::ApplyStagingManifest(ApplyStagingManifestRequest { + partition_expr, + central_region_id: region_id, + manifest_path: remap_manifest_path, + }), + ) + .await + .unwrap_err(); + assert_matches!( + err.into_inner().as_any().downcast_ref::().unwrap(), + Error::Unexpected { .. } + ); +} + +#[tokio::test] +async fn test_apply_staging_manifest_preserves_unflushed_memtable() { + test_apply_staging_manifest_preserves_unflushed_memtable_with_format(false).await; + test_apply_staging_manifest_preserves_unflushed_memtable_with_format(true).await; +} + +async fn test_apply_staging_manifest_preserves_unflushed_memtable_with_format(flat_format: bool) { + let mut env = TestEnv::with_prefix("apply-preserve-memtable").await; + let engine = env + .create_engine(MitoConfig { + default_experimental_flat_format: flat_format, + ..Default::default() + }) + .await; + + let region_id = RegionId::new(3, 1); + let origin_partition_expr = float_range_expr("field_0", 0., 50.).as_json_str().unwrap(); + let request = CreateRequestBuilder::new() + .partition_expr_json(Some(origin_partition_expr)) + .build(); + let column_schemas = rows_schema(&request); + + engine + .handle_request(region_id, RegionRequest::Create(request)) + .await + .unwrap(); + + let base_rows = Rows { + schema: column_schemas.clone(), + rows: build_rows(0, 3), + }; + put_rows(&engine, region_id, base_rows).await; + engine + .handle_request( + region_id, + RegionRequest::Flush(RegionFlushRequest { + row_group_size: None, + }), + ) + .await + .unwrap(); + + let partition_expr = float_range_expr("field_0", 0., 100.).as_json_str().unwrap(); + engine + .handle_request( + region_id, + RegionRequest::EnterStaging(EnterStagingRequest { + partition_expr: partition_expr.clone(), + }), + ) + .await + .unwrap(); + + let expected_version = partition_rule_version(Some(&partition_expr)); + let unflushed_rows = Rows { + schema: column_schemas, + rows: build_rows(3, 6), + }; + engine + .handle_request( + region_id, + RegionRequest::Put(RegionPutRequest { + rows: unflushed_rows, + hint: None, + partition_rule_version: Some(expected_version), + }), + ) + .await + .unwrap(); + + let remap_result = engine + .remap_manifests(RemapManifestsRequest { + region_id, + input_regions: vec![region_id], + region_mapping: [(region_id, vec![region_id])].into_iter().collect(), + new_partition_exprs: [(region_id, partition_expr.clone())].into_iter().collect(), + }) + .await + .unwrap(); + + engine + .handle_request( + region_id, + RegionRequest::ApplyStagingManifest(ApplyStagingManifestRequest { + partition_expr, + central_region_id: region_id, + manifest_path: remap_result.manifest_paths[®ion_id].clone(), + }), + ) + .await + .unwrap(); + + let scanner = engine + .scanner(region_id, ScanRequest::default()) + .await + .unwrap(); + assert!( + scanner.num_memtables() > 0, + "unflushed memtable should be preserved after apply staging manifest" + ); + let batches = RecordBatches::try_collect(scanner.scan().await.unwrap()) + .await + .unwrap(); + let total_rows: usize = batches.iter().map(|rb| rb.num_rows()).sum(); + assert_eq!(total_rows, 6, "rows from memtable should remain readable"); +} + #[tokio::test] async fn test_split_repartition_causes_duplicate_data() { common_telemetry::init_default_ut_logging(); diff --git a/src/mito2/src/engine/staging_test.rs b/src/mito2/src/engine/staging_test.rs index 4db77406f7..ddc6ca3fb6 100644 --- a/src/mito2/src/engine/staging_test.rs +++ b/src/mito2/src/engine/staging_test.rs @@ -31,7 +31,9 @@ use object_store::layers::mock::{ Result as MockResult, Write, Writer, }; use partition::expr::{PartitionExpr, col}; -use store_api::region_engine::{RegionEngine, RemapManifestsRequest, SettableRegionRoleState}; +use store_api::region_engine::{ + RegionEngine, RemapManifestsRequest, SetRegionRoleStateResponse, SettableRegionRoleState, +}; use store_api::region_request::{ ApplyStagingManifestRequest, EnterStagingRequest, RegionAlterRequest, RegionFlushRequest, RegionPutRequest, RegionRequest, RegionTruncateRequest, @@ -41,8 +43,12 @@ use store_api::storage::{RegionId, ScanRequest}; use crate::config::MitoConfig; use crate::engine::listener::NotifyEnterStagingResultListener; use crate::error::Error; +use crate::manifest::action::{ + RegionChange, RegionEdit, RegionMetaAction, RegionMetaActionList, RegionPartitionExprChange, +}; use crate::region::{RegionLeaderState, RegionRoleState, parse_partition_expr}; use crate::request::WorkerRequest; +use crate::sst::FormatType; use crate::test_util::{CreateRequestBuilder, TestEnv, build_rows, put_rows, rows_schema}; fn range_expr(col_name: &str, start: i64, end: i64) -> PartitionExpr { @@ -810,6 +816,147 @@ async fn test_staging_exit_success_with_manifests_with_format(flat_format: bool) assert!(sst_entries.iter().all(|e| e.visible)); } +#[tokio::test] +async fn test_enter_staging_writes_partition_expr_change_action() { + test_enter_staging_writes_partition_expr_change_action_with_format(false).await; + test_enter_staging_writes_partition_expr_change_action_with_format(true).await; +} + +async fn test_enter_staging_writes_partition_expr_change_action_with_format(flat_format: bool) { + let mut env = TestEnv::new().await; + let engine = env + .create_engine(MitoConfig { + default_experimental_flat_format: flat_format, + ..Default::default() + }) + .await; + + let region_id = RegionId::new(2000, 1); + let request = CreateRequestBuilder::new().build(); + engine + .handle_request(region_id, RegionRequest::Create(request)) + .await + .unwrap(); + + let partition_expr = default_partition_expr(); + engine + .handle_request( + region_id, + RegionRequest::EnterStaging(EnterStagingRequest { + partition_expr: partition_expr.clone(), + }), + ) + .await + .unwrap(); + + let region = engine.get_region(region_id).unwrap(); + let manager = region.manifest_ctx.manifest_manager.read().await; + let staging_manifests = manager.store().fetch_staging_manifests().await.unwrap(); + assert!(!staging_manifests.is_empty()); + + let mut found_partition_expr_change = false; + let mut found_change = false; + for (_, raw_action_list) in staging_manifests { + let action_list = RegionMetaActionList::decode(&raw_action_list).unwrap(); + for action in action_list.actions { + match action { + RegionMetaAction::PartitionExprChange(change) => { + found_partition_expr_change = true; + assert_eq!(change.partition_expr, Some(partition_expr.clone())); + } + RegionMetaAction::Change(_) => { + found_change = true; + } + _ => {} + } + } + } + + assert!(found_partition_expr_change); + assert!(!found_change); +} + +#[tokio::test] +async fn test_staging_exit_conflict_partition_expr_change_and_change() { + test_staging_exit_conflict_partition_expr_change_and_change_with_format(false).await; + test_staging_exit_conflict_partition_expr_change_and_change_with_format(true).await; +} + +async fn test_staging_exit_conflict_partition_expr_change_and_change_with_format( + flat_format: bool, +) { + let mut env = TestEnv::new().await; + let engine = env + .create_engine(MitoConfig { + default_experimental_flat_format: flat_format, + ..Default::default() + }) + .await; + + let region_id = RegionId::new(2000, 2); + let request = CreateRequestBuilder::new().build(); + engine + .handle_request(region_id, RegionRequest::Create(request)) + .await + .unwrap(); + + let partition_expr = default_partition_expr(); + engine + .handle_request( + region_id, + RegionRequest::EnterStaging(EnterStagingRequest { + partition_expr: partition_expr.clone(), + }), + ) + .await + .unwrap(); + + let region = engine.get_region(region_id).unwrap(); + let mut changed_metadata = region.version().metadata.as_ref().clone(); + changed_metadata.set_partition_expr(Some(partition_expr.clone())); + + let mut manager = region.manifest_ctx.manifest_manager.write().await; + manager + .update( + RegionMetaActionList::new(vec![ + RegionMetaAction::PartitionExprChange(RegionPartitionExprChange { + partition_expr: Some(partition_expr), + }), + RegionMetaAction::Change(RegionChange { + metadata: Arc::new(changed_metadata), + sst_format: FormatType::PrimaryKey, + }), + RegionMetaAction::Edit(RegionEdit { + files_to_add: Vec::new(), + files_to_remove: Vec::new(), + timestamp_ms: None, + compaction_time_window: None, + flushed_entry_id: None, + flushed_sequence: None, + committed_sequence: None, + }), + ]), + true, + ) + .await + .unwrap(); + drop(manager); + + let response = engine + .set_region_role_state_gracefully(region_id, SettableRegionRoleState::Leader) + .await + .unwrap(); + match response { + SetRegionRoleStateResponse::InvalidTransition(err) => { + assert_matches!( + err.as_any().downcast_ref::().unwrap(), + Error::Unexpected { .. } + ); + } + _ => panic!("Expected InvalidTransition response, got: {response:?}"), + } +} + #[tokio::test(flavor = "multi_thread")] async fn test_write_stall_on_enter_staging() { test_write_stall_on_enter_staging_with_format(false).await; diff --git a/src/mito2/src/manifest/action.rs b/src/mito2/src/manifest/action.rs index 6ed34ca210..177bb86873 100644 --- a/src/mito2/src/manifest/action.rs +++ b/src/mito2/src/manifest/action.rs @@ -18,6 +18,7 @@ use std::collections::{HashMap, HashSet}; use std::time::Duration; use chrono::Utc; +use common_telemetry::warn; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; use store_api::ManifestVersion; @@ -37,6 +38,8 @@ use crate::wal::EntryId; pub enum RegionMetaAction { /// Change region's metadata for request like ALTER Change(RegionChange), + /// Change only region partition expression metadata. + PartitionExprChange(RegionPartitionExprChange), /// Edit region's state for changing options or file list. Edit(RegionEdit), /// Remove the region. @@ -45,6 +48,12 @@ pub enum RegionMetaAction { Truncate(RegionTruncate), } +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +pub struct RegionPartitionExprChange { + /// Partition expression serialized as JSON. + pub partition_expr: Option, +} + impl RegionMetaAction { /// Returns true if the action is a change action. pub fn is_change(&self) -> bool { @@ -55,6 +64,11 @@ impl RegionMetaAction { pub fn is_edit(&self) -> bool { matches!(self, RegionMetaAction::Edit(_)) } + + /// Returns true if the action is a partition expr change action. + pub fn is_partition_expr_change(&self) -> bool { + matches!(self, RegionMetaAction::PartitionExprChange(_)) + } } #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] @@ -203,6 +217,29 @@ impl RegionManifestBuilder { self.sst_format = change.sst_format; } + /// Applies a partition-expression-only metadata change. + /// + /// This path updates only `partition_expr` (and its derived + /// `partition_rule_version`) on current metadata and does not touch + /// `sst_format`. + pub fn apply_partition_expr_change( + &mut self, + manifest_version: ManifestVersion, + change: RegionPartitionExprChange, + ) { + if let Some(metadata) = &self.metadata { + let mut metadata = metadata.as_ref().clone(); + metadata.set_partition_expr(change.partition_expr); + self.metadata = Some(metadata.into()); + self.manifest_version = manifest_version; + } else { + warn!( + "metadata is not set in region manifest builder, ignore partition expr change: {:?}", + change + ); + } + } + pub fn apply_edit(&mut self, manifest_version: ManifestVersion, edit: RegionEdit) { self.manifest_version = manifest_version; @@ -485,8 +522,14 @@ impl RegionMetaActionList { Self { actions } } - /// Split the actions into a region change and an edit. - pub fn split_region_change_and_edit(self) -> (Option, RegionEdit) { + /// Split the actions into a partition expr change, a region change and an edit. + pub fn split_region_change_and_edit( + self, + ) -> ( + Option, + Option, + RegionEdit, + ) { let mut edit = RegionEdit { files_to_add: Vec::new(), files_to_remove: Vec::new(), @@ -496,9 +539,13 @@ impl RegionMetaActionList { flushed_sequence: None, committed_sequence: None, }; + let mut partition_expr_change = None; let mut region_change = None; for action in self.actions { match action { + RegionMetaAction::PartitionExprChange(change) => { + partition_expr_change = Some(change); + } RegionMetaAction::Change(change) => { region_change = Some(change); } @@ -528,7 +575,7 @@ impl RegionMetaActionList { } } - (region_change, edit) + (partition_expr_change, region_change, edit) } } @@ -599,6 +646,12 @@ mod tests { let region_remove = r#"{"region_id":42}"#; let _ = serde_json::from_str::(region_remove).unwrap(); + + let region_partition_expr_change = r#"{ + "partition_expr": "{\"expr\":\"x < 100\"}" + }"#; + let _ = serde_json::from_str::(region_partition_expr_change) + .unwrap(); } #[test] diff --git a/src/mito2/src/manifest/manager.rs b/src/mito2/src/manifest/manager.rs index 83e7d58b95..f16fd85e0b 100644 --- a/src/mito2/src/manifest/manager.rs +++ b/src/mito2/src/manifest/manager.rs @@ -300,6 +300,9 @@ impl RegionManifestManager { RegionMetaAction::Change(action) => { manifest_builder.apply_change(manifest_version, action); } + RegionMetaAction::PartitionExprChange(action) => { + manifest_builder.apply_partition_expr_change(manifest_version, action); + } RegionMetaAction::Edit(action) => { manifest_builder.apply_edit(manifest_version, action); } @@ -442,6 +445,9 @@ impl RegionManifestManager { RegionMetaAction::Change(action) => { manifest_builder.apply_change(manifest_version, action); } + RegionMetaAction::PartitionExprChange(action) => { + manifest_builder.apply_partition_expr_change(manifest_version, action); + } RegionMetaAction::Edit(action) => { manifest_builder.apply_edit(manifest_version, action); } @@ -548,6 +554,9 @@ impl RegionManifestManager { RegionMetaAction::Change(action) => { manifest_builder.apply_change(version, action); } + RegionMetaAction::PartitionExprChange(action) => { + manifest_builder.apply_partition_expr_change(version, action); + } RegionMetaAction::Edit(action) => { manifest_builder.apply_edit(version, action); } diff --git a/src/mito2/src/region.rs b/src/mito2/src/region.rs index 3d31d6c173..509462716d 100644 --- a/src/mito2/src/region.rs +++ b/src/mito2/src/region.rs @@ -715,11 +715,21 @@ impl MitoRegion { } }; let expect_change = merged_actions.actions.iter().any(|a| a.is_change()); + let expect_partition_expr_change = merged_actions + .actions + .iter() + .any(|a| a.is_partition_expr_change()); let expect_edit = merged_actions.actions.iter().any(|a| a.is_edit()); ensure!( - expect_change, + !(expect_change && expect_partition_expr_change), UnexpectedSnafu { - reason: "expect a change action in merged actions" + reason: "unexpected both change and partition expr change actions in merged actions" + } + ); + ensure!( + expect_change || expect_partition_expr_change, + UnexpectedSnafu { + reason: "expect a change or partition expr change action in merged actions" } ); ensure!( @@ -729,20 +739,38 @@ impl MitoRegion { } ); + let (merged_partition_expr_change, merged_change, merged_edit) = + merged_actions.clone().split_region_change_and_edit(); + if let Some(change) = &merged_change { + // In staging exit we only allow metadata-only updates. A `Change` + // action is accepted only when column definitions are unchanged; + // otherwise it is treated as a schema change and rejected. + let current_column_metadatas = &self.version().metadata.column_metadatas; + ensure!( + change.metadata.column_metadatas == *current_column_metadatas, + UnexpectedSnafu { + reason: "change action alters column metadata in staging exit" + } + ); + } + // Submit merged actions using the manifest manager's update method // Pass the `false` so it saves to normal directory, not staging - let new_version = manager.update(merged_actions.clone(), false).await?; - + let new_version = manager.update(merged_actions, false).await?; info!( "Successfully submitted merged staged manifests for region {}, new version: {}", self.region_id, new_version ); // Apply the merged changes to in-memory version control - let (merged_change, merged_edit) = merged_actions.split_region_change_and_edit(); - // Safety: we have already ensured that there is a change action in the merged actions. - let new_metadata = merged_change.as_ref().unwrap().metadata.clone(); - self.version_control.alter_schema(new_metadata); + if let Some(change) = merged_partition_expr_change { + let mut new_metadata = self.version().metadata.as_ref().clone(); + new_metadata.set_partition_expr(change.partition_expr); + self.version_control.alter_metadata(new_metadata.into()); + } + if let Some(change) = merged_change { + self.version_control.alter_metadata(change.metadata); + } self.version_control .apply_edit(Some(merged_edit), &[], self.file_purger.clone()); @@ -1364,7 +1392,10 @@ mod tests { use store_api::storage::RegionId; use crate::access_layer::AccessLayer; - use crate::manifest::action::RegionMetaActionList; + use crate::error::Error; + use crate::manifest::action::{ + RegionChange, RegionEdit, RegionMetaAction, RegionMetaActionList, RegionPartitionExprChange, + }; use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions}; use crate::region::{ ManifestContext, ManifestStats, MitoRegion, RegionLeaderState, RegionRoleState, @@ -1381,6 +1412,194 @@ mod tests { assert!(AtomicCell::::is_lock_free()); } + async fn build_test_region(env: &SchedulerEnv) -> MitoRegion { + let builder = VersionControlBuilder::new(); + let version_control = Arc::new(builder.build()); + let metadata = version_control.current().version.metadata.clone(); + + let manager = RegionManifestManager::new( + metadata.clone(), + 0, + RegionManifestOptions { + manifest_dir: "".to_string(), + object_store: env.access_layer.object_store().clone(), + compress_type: CompressionType::Uncompressed, + checkpoint_distance: 10, + remove_file_options: Default::default(), + manifest_cache: None, + }, + FormatType::PrimaryKey, + &Default::default(), + ) + .await + .unwrap(); + + let manifest_ctx = Arc::new(ManifestContext::new( + manager, + RegionRoleState::Leader(RegionLeaderState::Writable), + )); + + MitoRegion { + region_id: metadata.region_id, + version_control, + access_layer: env.access_layer.clone(), + manifest_ctx, + file_purger: crate::test_util::new_noop_file_purger(), + provider: Provider::noop_provider(), + last_flush_millis: Default::default(), + last_compaction_millis: Default::default(), + time_provider: Arc::new(StdTimeProvider), + topic_latest_entry_id: Default::default(), + written_bytes: Arc::new(AtomicU64::new(0)), + stats: ManifestStats::default(), + staging_partition_info: Mutex::new(None), + } + } + + fn empty_edit() -> RegionEdit { + RegionEdit { + files_to_add: Vec::new(), + files_to_remove: Vec::new(), + timestamp_ms: None, + compaction_time_window: None, + flushed_entry_id: None, + flushed_sequence: None, + committed_sequence: None, + } + } + + #[tokio::test] + async fn test_exit_staging_partition_expr_change_and_edit_success() { + let env = SchedulerEnv::new().await; + let region = build_test_region(&env).await; + + let mut manager = region.manifest_ctx.manifest_manager.write().await; + region.set_staging(&mut manager).await.unwrap(); + manager + .update( + RegionMetaActionList::new(vec![ + RegionMetaAction::PartitionExprChange(RegionPartitionExprChange { + partition_expr: Some("expr_a".to_string()), + }), + RegionMetaAction::Edit(empty_edit()), + ]), + true, + ) + .await + .unwrap(); + + region.exit_staging_on_success(&mut manager).await.unwrap(); + drop(manager); + + assert_eq!( + region.version().metadata.partition_expr.as_deref(), + Some("expr_a") + ); + assert_eq!( + region.state(), + RegionRoleState::Leader(RegionLeaderState::Writable) + ); + } + + #[tokio::test] + async fn test_exit_staging_change_with_same_columns_success() { + let env = SchedulerEnv::new().await; + let region = build_test_region(&env).await; + + let mut manager = region.manifest_ctx.manifest_manager.write().await; + region.set_staging(&mut manager).await.unwrap(); + + let mut changed_metadata = region.version().metadata.as_ref().clone(); + changed_metadata.set_partition_expr(Some("expr_b".to_string())); + + manager + .update( + RegionMetaActionList::new(vec![ + RegionMetaAction::Change(RegionChange { + metadata: Arc::new(changed_metadata), + sst_format: FormatType::PrimaryKey, + }), + RegionMetaAction::Edit(empty_edit()), + ]), + true, + ) + .await + .unwrap(); + + region.exit_staging_on_success(&mut manager).await.unwrap(); + drop(manager); + + assert_eq!( + region.version().metadata.partition_expr.as_deref(), + Some("expr_b") + ); + assert_eq!( + region.state(), + RegionRoleState::Leader(RegionLeaderState::Writable) + ); + } + + #[tokio::test] + async fn test_exit_staging_change_with_different_columns_fails() { + let env = SchedulerEnv::new().await; + let region = build_test_region(&env).await; + + let mut manager = region.manifest_ctx.manifest_manager.write().await; + region.set_staging(&mut manager).await.unwrap(); + + let mut changed_metadata = region.version().metadata.as_ref().clone(); + changed_metadata.column_metadatas.rotate_left(1); + + manager + .update( + RegionMetaActionList::new(vec![ + RegionMetaAction::Change(RegionChange { + metadata: Arc::new(changed_metadata), + sst_format: FormatType::PrimaryKey, + }), + RegionMetaAction::Edit(empty_edit()), + ]), + true, + ) + .await + .unwrap(); + + let result = region.exit_staging_on_success(&mut manager).await; + assert!(matches!(result, Err(Error::Unexpected { .. }))); + } + + #[tokio::test] + async fn test_exit_staging_partition_expr_change_and_change_conflict_fails() { + let env = SchedulerEnv::new().await; + let region = build_test_region(&env).await; + + let mut manager = region.manifest_ctx.manifest_manager.write().await; + region.set_staging(&mut manager).await.unwrap(); + + let mut changed_metadata = region.version().metadata.as_ref().clone(); + changed_metadata.set_partition_expr(Some("expr_c".to_string())); + + manager + .update( + RegionMetaActionList::new(vec![ + RegionMetaAction::PartitionExprChange(RegionPartitionExprChange { + partition_expr: Some("expr_c".to_string()), + }), + RegionMetaAction::Change(RegionChange { + metadata: Arc::new(changed_metadata), + sst_format: FormatType::PrimaryKey, + }), + RegionMetaAction::Edit(empty_edit()), + ]), + true, + ) + .await + .unwrap(); + + let result = region.exit_staging_on_success(&mut manager).await; + assert!(matches!(result, Err(Error::Unexpected { .. }))); + } + #[tokio::test] async fn test_set_region_state() { let env = SchedulerEnv::new().await; diff --git a/src/mito2/src/region/version.rs b/src/mito2/src/region/version.rs index 1e6b8386b3..006f684a23 100644 --- a/src/mito2/src/region/version.rs +++ b/src/mito2/src/region/version.rs @@ -198,6 +198,19 @@ impl VersionControl { version_data.version = new_version; } + /// Alter metadata of the region without rebuilding memtables. + pub(crate) fn alter_metadata(&self, metadata: RegionMetadataRef) { + let version = self.current().version; + let new_version = Arc::new( + VersionBuilder::from_version(version) + .metadata(metadata) + .build(), + ); + + let mut version_data = self.data.write().unwrap(); + version_data.version = new_version; + } + /// Alter schema and format of the region. /// /// It replaces existing mutable memtable with a memtable that uses the diff --git a/src/mito2/src/worker/handle_enter_staging.rs b/src/mito2/src/worker/handle_enter_staging.rs index 969869804b..79762c243b 100644 --- a/src/mito2/src/worker/handle_enter_staging.rs +++ b/src/mito2/src/worker/handle_enter_staging.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; use std::time::Instant; use common_base::hash::partition_rule_version; @@ -23,7 +22,7 @@ use store_api::storage::RegionId; use crate::error::{RegionNotFoundSnafu, Result, StagingPartitionExprMismatchSnafu}; use crate::flush::FlushReason; -use crate::manifest::action::{RegionChange, RegionMetaAction, RegionMetaActionList}; +use crate::manifest::action::{RegionMetaAction, RegionMetaActionList, RegionPartitionExprChange}; use crate::region::{MitoRegionRef, RegionLeaderState, StagingPartitionInfo}; use crate::request::{ BackgroundNotify, DdlRequest, EnterStagingResult, OptionOutputTx, SenderDdlRequest, @@ -125,14 +124,11 @@ impl RegionWorkerLoop { } // Second step: write new staging manifest. - let mut new_meta = (*region.metadata()).clone(); - new_meta.set_partition_expr(Some(partition_expr.clone())); - let sst_format = region.version().options.sst_format.unwrap_or_default(); - let change = RegionChange { - metadata: Arc::new(new_meta), - sst_format, + let change = RegionPartitionExprChange { + partition_expr: Some(partition_expr.clone()), }; - let action_list = RegionMetaActionList::with_action(RegionMetaAction::Change(change)); + let action_list = + RegionMetaActionList::with_action(RegionMetaAction::PartitionExprChange(change)); region .manifest_ctx .update_manifest(RegionLeaderState::EnteringStaging, action_list, true)