From dbe0aa653ac2d0c3ef0a8087b7ab8878d1e59c9a Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Fri, 26 Apr 2024 11:48:47 -0400 Subject: [PATCH] feat(pageserver): add aux-file-v2 flag on tenant level (#7505) Changing metadata format is not easy. This pull request adds a tenant-level flag on whether to enable aux file v2. As long as we don't roll this out to the user and guarantee our staging projects can persist tenant config correctly, we can test the aux file v2 change with setting this flag. Previous discussion at https://github.com/neondatabase/neon/pull/7424. Signed-off-by: Alex Chi Z --- control_plane/src/pageserver.rs | 10 ++++++++++ libs/pageserver_api/src/models.rs | 1 + pageserver/src/tenant.rs | 1 + pageserver/src/tenant/config.rs | 13 +++++++++++++ pageserver/src/tenant/timeline.rs | 9 +++++++++ test_runner/regress/test_attach_tenant_config.py | 1 + 6 files changed, 35 insertions(+) diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index adac7d7bb5..0699e47866 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -441,6 +441,11 @@ impl PageServerNode { .map(serde_json::from_str) .transpose() .context("parse `timeline_get_throttle` from json")?, + switch_to_aux_file_v2: settings + .remove("switch_to_aux_file_v2") + .map(|x| x.parse::()) + .transpose() + .context("Failed to parse 'switch_to_aux_file_v2' as bool")?, }; if !settings.is_empty() { bail!("Unrecognized tenant settings: {settings:?}") @@ -559,6 +564,11 @@ impl PageServerNode { .map(serde_json::from_str) .transpose() .context("parse `timeline_get_throttle` from json")?, + switch_to_aux_file_v2: settings + .remove("switch_to_aux_file_v2") + .map(|x| x.parse::()) + .transpose() + .context("Failed to parse 'switch_to_aux_file_v2' as bool")?, } }; diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 4ce1ecde26..e2acde6139 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -303,6 +303,7 @@ pub struct TenantConfig { pub lazy_slru_download: Option, pub timeline_get_throttle: Option, pub image_layer_creation_check_threshold: Option, + pub switch_to_aux_file_v2: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index ff6194ab00..32c0606fc2 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3664,6 +3664,7 @@ pub(crate) mod harness { image_layer_creation_check_threshold: Some( tenant_conf.image_layer_creation_check_threshold, ), + switch_to_aux_file_v2: Some(tenant_conf.switch_to_aux_file_v2), } } } diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index a2bb479f63..9975c9edbc 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -369,6 +369,10 @@ pub struct TenantConf { // How much WAL must be ingested before checking again whether a new image layer is required. // Expresed in multiples of checkpoint distance. pub image_layer_creation_check_threshold: u8, + + /// Switch to aux file v2. Switching this flag requires the user has not written any aux file into + /// the storage before, and this flag cannot be switched back. Otherwise there will be data corruptions. + pub switch_to_aux_file_v2: bool, } /// Same as TenantConf, but this struct preserves the information about @@ -464,6 +468,10 @@ pub struct TenantConfOpt { #[serde(skip_serializing_if = "Option::is_none")] pub image_layer_creation_check_threshold: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default)] + pub switch_to_aux_file_v2: Option, } impl TenantConfOpt { @@ -521,6 +529,9 @@ impl TenantConfOpt { image_layer_creation_check_threshold: self .image_layer_creation_check_threshold .unwrap_or(global_conf.image_layer_creation_check_threshold), + switch_to_aux_file_v2: self + .switch_to_aux_file_v2 + .unwrap_or(global_conf.switch_to_aux_file_v2), } } } @@ -562,6 +573,7 @@ impl Default for TenantConf { lazy_slru_download: false, timeline_get_throttle: crate::tenant::throttle::Config::disabled(), image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD, + switch_to_aux_file_v2: false, } } } @@ -636,6 +648,7 @@ impl From for models::TenantConfig { lazy_slru_download: value.lazy_slru_download, timeline_get_throttle: value.timeline_get_throttle.map(ThrottleConfig::from), image_layer_creation_check_threshold: value.image_layer_creation_check_threshold, + switch_to_aux_file_v2: value.switch_to_aux_file_v2, } } } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index eb72ce9629..a05e0da260 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -1871,6 +1871,15 @@ const REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE: u64 = 10; // Private functions impl Timeline { + #[allow(dead_code)] + pub(crate) fn get_switch_to_aux_file_v2(&self) -> bool { + let tenant_conf = self.tenant_conf.load(); + tenant_conf + .tenant_conf + .switch_to_aux_file_v2 + .unwrap_or(self.conf.default_tenant_conf.switch_to_aux_file_v2) + } + pub(crate) fn get_lazy_slru_download(&self) -> bool { let tenant_conf = self.tenant_conf.load(); tenant_conf diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py index 909d25980b..59461cc095 100644 --- a/test_runner/regress/test_attach_tenant_config.py +++ b/test_runner/regress/test_attach_tenant_config.py @@ -190,6 +190,7 @@ def test_fully_custom_config(positive_env: NeonEnv): "trace_read_requests": True, "walreceiver_connect_timeout": "13m", "image_layer_creation_check_threshold": 1, + "switch_to_aux_file_v2": True, } ps_http = env.pageserver.http_client()