From 6eb638f4b390270fa004cdea45e00ca63c21f773 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 22 Aug 2024 17:31:38 -0400 Subject: [PATCH] feat(pageserver): warn on aux v1 tenants + default to v2 (#8625) part of https://github.com/neondatabase/neon/issues/8623 We want to discover potential aux v1 customers that we might have missed from the migrations. ## Summary of changes Log warnings on basebackup, load timeline, and the first put_file. --------- Signed-off-by: Alex Chi Z --- libs/pageserver_api/src/models.rs | 2 +- pageserver/src/pgdatadir_mapping.rs | 15 +++++++++++-- pageserver/src/tenant.rs | 14 ++++++------ pageserver/src/tenant/timeline.rs | 5 +++++ .../regress/test_logical_replication.py | 22 +++++-------------- 5 files changed, 32 insertions(+), 26 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index ab4adfbebe..d55c06b685 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -348,7 +348,7 @@ impl AuxFilePolicy { /// If a tenant writes aux files without setting `switch_aux_policy`, this value will be used. pub fn default_tenant_config() -> Self { - Self::V1 + Self::V2 } } diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index d6e0b82e1d..b7110d69b6 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -726,7 +726,17 @@ impl Timeline { ) -> Result, PageReconstructError> { let current_policy = self.last_aux_file_policy.load(); match current_policy { - Some(AuxFilePolicy::V1) | None => self.list_aux_files_v1(lsn, ctx).await, + Some(AuxFilePolicy::V1) => { + warn!("this timeline is using deprecated aux file policy V1 (policy=V1)"); + self.list_aux_files_v1(lsn, ctx).await + } + None => { + let res = self.list_aux_files_v1(lsn, ctx).await?; + if !res.is_empty() { + warn!("this timeline is using deprecated aux file policy V1 (policy=None)"); + } + Ok(res) + } Some(AuxFilePolicy::V2) => self.list_aux_files_v2(lsn, ctx).await, Some(AuxFilePolicy::CrossValidation) => { let v1_result = self.list_aux_files_v1(lsn, ctx).await; @@ -1587,6 +1597,7 @@ impl<'a> DatadirModification<'a> { if aux_files_key_v1.is_empty() { None } else { + warn!("this timeline is using deprecated aux file policy V1"); self.tline.do_switch_aux_policy(AuxFilePolicy::V1)?; Some(AuxFilePolicy::V1) } @@ -2048,7 +2059,7 @@ mod tests { let (tenant, ctx) = harness.load().await; let tline = tenant - .create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx) + .create_empty_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) .await?; let tline = tline.raw_timeline().unwrap(); diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 65a7504b74..2e19a46ac8 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -5932,10 +5932,10 @@ mod tests { .await .unwrap(); - // the default aux file policy to switch is v1 if not set by the admins + // the default aux file policy to switch is v2 if not set by the admins assert_eq!( harness.tenant_conf.switch_aux_file_policy, - AuxFilePolicy::V1 + AuxFilePolicy::default_tenant_config() ); let (tenant, ctx) = harness.load().await; @@ -5979,8 +5979,8 @@ mod tests { ); assert_eq!( tline.last_aux_file_policy.load(), - Some(AuxFilePolicy::V1), - "aux file is written with switch_aux_file_policy unset (which is v1), so we should keep v1" + Some(AuxFilePolicy::V2), + "aux file is written with switch_aux_file_policy unset (which is v2), so we should use v2 there" ); // we can read everything from the storage @@ -6002,8 +6002,8 @@ mod tests { assert_eq!( tline.last_aux_file_policy.load(), - Some(AuxFilePolicy::V1), - "keep v1 storage format when new files are written" + Some(AuxFilePolicy::V2), + "keep v2 storage format when new files are written" ); let files = tline.list_aux_files(lsn, &ctx).await.unwrap(); @@ -6019,7 +6019,7 @@ mod tests { // child copies the last flag even if that is not on remote storage yet assert_eq!(child.get_switch_aux_file_policy(), AuxFilePolicy::V2); - assert_eq!(child.last_aux_file_policy.load(), Some(AuxFilePolicy::V1)); + assert_eq!(child.last_aux_file_policy.load(), Some(AuxFilePolicy::V2)); let files = child.list_aux_files(lsn, &ctx).await.unwrap(); assert_eq!(files.get("pg_logical/mappings/test1"), None); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index e90f65942f..dc9cddea43 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2234,6 +2234,11 @@ impl Timeline { handles: Default::default(), }; + + if aux_file_policy == Some(AuxFilePolicy::V1) { + warn!("this timeline is using deprecated aux file policy V1"); + } + result.repartition_threshold = result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE; diff --git a/test_runner/regress/test_logical_replication.py b/test_runner/regress/test_logical_replication.py index 0d18aa43b7..f83a833dda 100644 --- a/test_runner/regress/test_logical_replication.py +++ b/test_runner/regress/test_logical_replication.py @@ -22,7 +22,7 @@ def random_string(n: int): @pytest.mark.parametrize( - "pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.V2, AuxFileStore.CrossValidation] + "pageserver_aux_file_policy", [AuxFileStore.V2, AuxFileStore.CrossValidation] ) def test_aux_file_v2_flag(neon_simple_env: NeonEnv, pageserver_aux_file_policy: AuxFileStore): env = neon_simple_env @@ -31,9 +31,7 @@ def test_aux_file_v2_flag(neon_simple_env: NeonEnv, pageserver_aux_file_policy: assert pageserver_aux_file_policy == tenant_config["switch_aux_file_policy"] -@pytest.mark.parametrize( - "pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation] -) +@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation]) def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg): env = neon_simple_env @@ -175,9 +173,7 @@ COMMIT; # Test that neon.logical_replication_max_snap_files works -@pytest.mark.parametrize( - "pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation] -) +@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation]) def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg): def slot_removed(ep): assert ( @@ -355,9 +351,7 @@ FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of # # Most pages start with a contrecord, so we don't do anything special # to ensure that. -@pytest.mark.parametrize( - "pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation] -) +@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation]) def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg): env = neon_simple_env @@ -402,9 +396,7 @@ def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg): # logical replication bug as such, but without logical replication, # records passed ot the WAL redo process are never large enough to hit # the bug. -@pytest.mark.parametrize( - "pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation] -) +@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation]) def test_large_records(neon_simple_env: NeonEnv, vanilla_pg): env = neon_simple_env @@ -476,9 +468,7 @@ def test_slots_and_branching(neon_simple_env: NeonEnv): ws_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')") -@pytest.mark.parametrize( - "pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation] -) +@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation]) def test_replication_shutdown(neon_simple_env: NeonEnv): # Ensure Postgres can exit without stuck when a replication job is active + neon extension installed env = neon_simple_env