feat(pageserver): warn on aux v1 tenants + default to v2 (#8625)

part of https://github.com/neondatabase/neon/issues/8623

We want to discover potential aux v1 customers that we might have missed
from the migrations.

## Summary of changes

Log warnings on basebackup, load timeline, and the first put_file.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2024-08-22 17:31:38 -04:00
committed by GitHub
parent 7a485b599b
commit 6eb638f4b3
5 changed files with 32 additions and 26 deletions

View File

@@ -348,7 +348,7 @@ impl AuxFilePolicy {
/// If a tenant writes aux files without setting `switch_aux_policy`, this value will be used.
pub fn default_tenant_config() -> Self {
Self::V1
Self::V2
}
}

View File

@@ -726,7 +726,17 @@ impl Timeline {
) -> Result<HashMap<String, Bytes>, PageReconstructError> {
let current_policy = self.last_aux_file_policy.load();
match current_policy {
Some(AuxFilePolicy::V1) | None => self.list_aux_files_v1(lsn, ctx).await,
Some(AuxFilePolicy::V1) => {
warn!("this timeline is using deprecated aux file policy V1 (policy=V1)");
self.list_aux_files_v1(lsn, ctx).await
}
None => {
let res = self.list_aux_files_v1(lsn, ctx).await?;
if !res.is_empty() {
warn!("this timeline is using deprecated aux file policy V1 (policy=None)");
}
Ok(res)
}
Some(AuxFilePolicy::V2) => self.list_aux_files_v2(lsn, ctx).await,
Some(AuxFilePolicy::CrossValidation) => {
let v1_result = self.list_aux_files_v1(lsn, ctx).await;
@@ -1587,6 +1597,7 @@ impl<'a> DatadirModification<'a> {
if aux_files_key_v1.is_empty() {
None
} else {
warn!("this timeline is using deprecated aux file policy V1");
self.tline.do_switch_aux_policy(AuxFilePolicy::V1)?;
Some(AuxFilePolicy::V1)
}
@@ -2048,7 +2059,7 @@ mod tests {
let (tenant, ctx) = harness.load().await;
let tline = tenant
.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)
.create_empty_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
.await?;
let tline = tline.raw_timeline().unwrap();

View File

@@ -5932,10 +5932,10 @@ mod tests {
.await
.unwrap();
// the default aux file policy to switch is v1 if not set by the admins
// the default aux file policy to switch is v2 if not set by the admins
assert_eq!(
harness.tenant_conf.switch_aux_file_policy,
AuxFilePolicy::V1
AuxFilePolicy::default_tenant_config()
);
let (tenant, ctx) = harness.load().await;
@@ -5979,8 +5979,8 @@ mod tests {
);
assert_eq!(
tline.last_aux_file_policy.load(),
Some(AuxFilePolicy::V1),
"aux file is written with switch_aux_file_policy unset (which is v1), so we should keep v1"
Some(AuxFilePolicy::V2),
"aux file is written with switch_aux_file_policy unset (which is v2), so we should use v2 there"
);
// we can read everything from the storage
@@ -6002,8 +6002,8 @@ mod tests {
assert_eq!(
tline.last_aux_file_policy.load(),
Some(AuxFilePolicy::V1),
"keep v1 storage format when new files are written"
Some(AuxFilePolicy::V2),
"keep v2 storage format when new files are written"
);
let files = tline.list_aux_files(lsn, &ctx).await.unwrap();
@@ -6019,7 +6019,7 @@ mod tests {
// child copies the last flag even if that is not on remote storage yet
assert_eq!(child.get_switch_aux_file_policy(), AuxFilePolicy::V2);
assert_eq!(child.last_aux_file_policy.load(), Some(AuxFilePolicy::V1));
assert_eq!(child.last_aux_file_policy.load(), Some(AuxFilePolicy::V2));
let files = child.list_aux_files(lsn, &ctx).await.unwrap();
assert_eq!(files.get("pg_logical/mappings/test1"), None);

View File

@@ -2234,6 +2234,11 @@ impl Timeline {
handles: Default::default(),
};
if aux_file_policy == Some(AuxFilePolicy::V1) {
warn!("this timeline is using deprecated aux file policy V1");
}
result.repartition_threshold =
result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE;

View File

@@ -22,7 +22,7 @@ def random_string(n: int):
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.V2, AuxFileStore.CrossValidation]
"pageserver_aux_file_policy", [AuxFileStore.V2, AuxFileStore.CrossValidation]
)
def test_aux_file_v2_flag(neon_simple_env: NeonEnv, pageserver_aux_file_policy: AuxFileStore):
env = neon_simple_env
@@ -31,9 +31,7 @@ def test_aux_file_v2_flag(neon_simple_env: NeonEnv, pageserver_aux_file_policy:
assert pageserver_aux_file_policy == tenant_config["switch_aux_file_policy"]
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation]
)
@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation])
def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
@@ -175,9 +173,7 @@ COMMIT;
# Test that neon.logical_replication_max_snap_files works
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation]
)
@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation])
def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg):
def slot_removed(ep):
assert (
@@ -355,9 +351,7 @@ FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of
#
# Most pages start with a contrecord, so we don't do anything special
# to ensure that.
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation]
)
@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation])
def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
@@ -402,9 +396,7 @@ def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
# logical replication bug as such, but without logical replication,
# records passed ot the WAL redo process are never large enough to hit
# the bug.
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation]
)
@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation])
def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
env = neon_simple_env
@@ -476,9 +468,7 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
ws_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
@pytest.mark.parametrize(
"pageserver_aux_file_policy", [AuxFileStore.V1, AuxFileStore.CrossValidation]
)
@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation])
def test_replication_shutdown(neon_simple_env: NeonEnv):
# Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
env = neon_simple_env