fix(pageserver): allow sibling archived branch for detaching (#11383)

## Problem

close https://github.com/neondatabase/neon/issues/11379

## Summary of changes

Remove checks around archived branches for detach v2. I also updated the
comments `ancestor_retain_lsn`.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-03-31 12:32:55 -04:00
committed by GitHub
parent 00bcafe82e
commit 0ee5bfa2fc
2 changed files with 70 additions and 28 deletions

View File

@@ -235,7 +235,7 @@ pub(super) async fn prepare(
return Err(NoAncestor);
}
check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?;
check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn, behavior)?;
if let DetachBehavior::MultiLevelAndNoReparent = behavior {
// If the ancestor has an ancestor, we might be able to fast-path detach it if the current ancestor does not have any data written/used by the detaching timeline.
@@ -249,7 +249,13 @@ pub(super) async fn prepare(
ancestor_lsn = ancestor.ancestor_lsn; // Get the LSN first before resetting the `ancestor` variable
ancestor = ancestor_of_ancestor;
// TODO: do we still need to check if we don't want to reparent?
check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?;
check_no_archived_children_of_ancestor(
tenant,
detached,
&ancestor,
ancestor_lsn,
behavior,
)?;
}
} else if ancestor.ancestor_timeline.is_some() {
// non-technical requirement; we could flatten N ancestors just as easily but we chose
@@ -1156,31 +1162,44 @@ fn check_no_archived_children_of_ancestor(
detached: &Arc<Timeline>,
ancestor: &Arc<Timeline>,
ancestor_lsn: Lsn,
detach_behavior: DetachBehavior,
) -> Result<(), Error> {
let timelines = tenant.timelines.lock().unwrap();
let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
for timeline in reparentable_timelines(timelines.values(), detached, ancestor, ancestor_lsn) {
if timeline.is_archived() == Some(true) {
return Err(Error::Archived(timeline.timeline_id));
}
}
for timeline_offloaded in timelines_offloaded.values() {
if timeline_offloaded.ancestor_timeline_id != Some(ancestor.timeline_id) {
continue;
}
// This forbids the detach ancestor feature if flattened timelines are present,
// even if the ancestor_lsn is from after the branchpoint of the detached timeline.
// But as per current design, we don't record the ancestor_lsn of flattened timelines.
// This is a bit unfortunate, but as of writing this we don't support flattening
// anyway. Maybe we can evolve the data model in the future.
if let Some(retain_lsn) = timeline_offloaded.ancestor_retain_lsn {
let is_earlier = retain_lsn <= ancestor_lsn;
if !is_earlier {
continue;
match detach_behavior {
DetachBehavior::NoAncestorAndReparent => {
let timelines = tenant.timelines.lock().unwrap();
let timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
for timeline in
reparentable_timelines(timelines.values(), detached, ancestor, ancestor_lsn)
{
if timeline.is_archived() == Some(true) {
return Err(Error::Archived(timeline.timeline_id));
}
}
for timeline_offloaded in timelines_offloaded.values() {
if timeline_offloaded.ancestor_timeline_id != Some(ancestor.timeline_id) {
continue;
}
// This forbids the detach ancestor feature if flattened timelines are present,
// even if the ancestor_lsn is from after the branchpoint of the detached timeline.
// But as per current design, we don't record the ancestor_lsn of flattened timelines.
// This is a bit unfortunate, but as of writing this we don't support flattening
// anyway. Maybe we can evolve the data model in the future.
if let Some(retain_lsn) = timeline_offloaded.ancestor_retain_lsn {
let is_earlier = retain_lsn <= ancestor_lsn;
if !is_earlier {
continue;
}
}
return Err(Error::Archived(timeline_offloaded.timeline_id));
}
}
return Err(Error::Archived(timeline_offloaded.timeline_id));
DetachBehavior::MultiLevelAndNoReparent => {
// We don't need to check anything if the user requested to not reparent.
}
}
Ok(())
}

View File

@@ -343,7 +343,8 @@ def test_ancestor_detach_reparents_earlier(neon_env_builder: NeonEnvBuilder):
wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline)
def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder):
@pytest.mark.parametrize("snapshots_archived", ["archived", "normal"])
def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder, snapshots_archived: str):
"""
Test the v2 behavior of ancestor detach.
@@ -385,6 +386,11 @@ def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder):
ep.safe_psql("INSERT INTO foo SELECT i::bigint FROM generate_series(0, 8191) g(i);")
branchpoint_y = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline)
client.timeline_checkpoint(env.initial_tenant, env.initial_timeline)
ep.safe_psql("INSERT INTO foo SELECT i::bigint FROM generate_series(0, 8191) g(i);")
branchpoint_x = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline)
client.timeline_checkpoint(env.initial_tenant, env.initial_timeline)
@@ -395,6 +401,10 @@ def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder):
"earlier", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_pipe
)
snapshot_branchpoint_old = env.create_branch(
"snapshot_branchpoint_old", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_y
)
snapshot_branchpoint = env.create_branch(
"snapshot_branchpoint", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_x
)
@@ -407,19 +417,32 @@ def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder):
after = env.create_branch("after", ancestor_branch_name="main", ancestor_start_lsn=None)
if snapshots_archived == "archived":
# archive the previous snapshot branchpoint
client.timeline_archival_config(
env.initial_tenant, snapshot_branchpoint_old, TimelineArchivalState.ARCHIVED
)
all_reparented = client.detach_ancestor(
env.initial_tenant, branch_to_detach, detach_behavior="v2"
)
assert set(all_reparented) == set()
if snapshots_archived == "archived":
# restore the branchpoint so that we can query from the endpoint
client.timeline_archival_config(
env.initial_tenant, snapshot_branchpoint_old, TimelineArchivalState.UNARCHIVED
)
env.pageserver.quiesce_tenants()
# checking the ancestor after is much faster than waiting for the endpoint not start
expected_result = [
("main", env.initial_timeline, None, 16384, 1),
("after", after, env.initial_timeline, 16384, 1),
("snapshot_branchpoint", snapshot_branchpoint, env.initial_timeline, 8192, 1),
("branch_to_detach", branch_to_detach, None, 8192, 1),
("main", env.initial_timeline, None, 24576, 1),
("after", after, env.initial_timeline, 24576, 1),
("snapshot_branchpoint_old", snapshot_branchpoint_old, env.initial_timeline, 8192, 1),
("snapshot_branchpoint", snapshot_branchpoint, env.initial_timeline, 16384, 1),
("branch_to_detach", branch_to_detach, None, 16384, 1),
("earlier", earlier, env.initial_timeline, 0, 1),
]