no reparenting for behavior v2

Signed-off-by: Alex Chi Z <chi@neon.tech>
2026-05-21 23:20:40 +00:00 · 2025-03-11 13:38:25 -04:00
parent b225b336e8
commit d29d506b63
6 changed files with 100 additions and 36 deletions
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -71,6 +71,7 @@ use crate::tenant::remote_timeline_client::{
 use crate::tenant::secondary::SecondaryController;
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName};
+use crate::tenant::timeline::detach_ancestor::DetachBehavior;
 use crate::tenant::timeline::offload::{OffloadError, offload_timeline};
 use crate::tenant::timeline::{
    CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout,
@@ -2489,8 +2490,29 @@ async fn timeline_download_remote_layers_handler_get(
    json_response(StatusCode::OK, info)
 }

-async fn timeline_detach_ancestor_handler(
+async fn timeline_detach_ancestor_handler_v1(
    request: Request<Body>,
+    cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    timeline_detach_ancestor_handler_common(request, DetachBehavior::NoAncestorAndReparent, cancel)
+        .await
+}
+
+async fn timeline_detach_ancestor_handler_v2(
+    request: Request<Body>,
+    cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    timeline_detach_ancestor_handler_common(
+        request,
+        DetachBehavior::MultipleLevelAndNoReparent,
+        cancel,
+    )
+    .await
+}
+
+async fn timeline_detach_ancestor_handler_common(
+    request: Request<Body>,
+    behavior: DetachBehavior,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    use pageserver_api::models::detach_ancestor::AncestorDetached;
@@ -2548,7 +2570,7 @@ async fn timeline_detach_ancestor_handler(
        let timeline = tenant.get_timeline(timeline_id, true)?;

        let progress = timeline
-            .prepare_to_detach_from_ancestor(&tenant, options, ctx)
+            .prepare_to_detach_from_ancestor(&tenant, options, behavior, ctx)
            .await?;

        // uncomment to allow early as possible Tenant::drop
@@ -2563,6 +2585,7 @@ async fn timeline_detach_ancestor_handler(
                        tenant_shard_id,
                        timeline_id,
                        prepared,
+                        behavior,
                        attempt,
                        ctx,
                    )
@@ -3742,7 +3765,11 @@ pub fn make_router(
        )
        .put(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/detach_ancestor",
-            |r| api_handler(r, timeline_detach_ancestor_handler),
+            |r| api_handler(r, timeline_detach_ancestor_handler_v1),
+        )
+        .put(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/detach_ancestor_v2",
+            |r| api_handler(r, timeline_detach_ancestor_handler_v2),
        )
        .delete("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
            api_handler(r, timeline_delete_handler)
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -1914,6 +1914,7 @@ impl TenantManager {
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
        prepared: PreparedTimelineDetach,
+        behavior: detach_ancestor::DetachBehavior,
        mut attempt: detach_ancestor::Attempt,
        ctx: &RequestContext,
    ) -> Result<HashSet<TimelineId>, detach_ancestor::Error> {
@@ -1962,6 +1963,7 @@ impl TenantManager {
                prepared,
                attempt.ancestor_timeline_id,
                attempt.ancestor_lsn,
+                behavior,
                ctx,
            )
            .await?;
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -5388,9 +5388,10 @@ impl Timeline {
        self: &Arc<Timeline>,
        tenant: &crate::tenant::Tenant,
        options: detach_ancestor::Options,
+        behavior: detach_ancestor::DetachBehavior,
        ctx: &RequestContext,
    ) -> Result<detach_ancestor::Progress, detach_ancestor::Error> {
-        detach_ancestor::prepare(self, tenant, options, ctx).await
+        detach_ancestor::prepare(self, tenant, behavior, options, ctx).await
    }

    /// Second step of detach from ancestor; detaches the `self` from it's current ancestor and
@@ -5408,6 +5409,7 @@ impl Timeline {
        prepared: detach_ancestor::PreparedTimelineDetach,
        ancestor_timeline_id: TimelineId,
        ancestor_lsn: Lsn,
+        behavior: detach_ancestor::DetachBehavior,
        ctx: &RequestContext,
    ) -> Result<detach_ancestor::DetachingAndReparenting, detach_ancestor::Error> {
        detach_ancestor::detach_and_reparent(
@@ -5416,6 +5418,7 @@ impl Timeline {
            prepared,
            ancestor_timeline_id,
            ancestor_lsn,
+            behavior,
            ctx,
        )
        .await
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -127,13 +127,23 @@ pub(crate) struct PreparedTimelineDetach {
    layers: Vec<Layer>,
 }

-/// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments.
+// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments.
 #[derive(Debug)]
 pub(crate) struct Options {
    pub(crate) rewrite_concurrency: std::num::NonZeroUsize,
    pub(crate) copy_concurrency: std::num::NonZeroUsize,
 }

+/// Controls the detach ancestor behavior.
+/// - When set to `NoAncestorAndReparent`, we will only detach a branch if it has only one level of ancestor. It will automatically reparent the children of the ancestor.
+/// - When set to `MultipleLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen for the children of the ancestor.
+/// - Detach ancstor will always reparent the children of the detached branch.
+#[derive(Debug, Clone, Copy)]
+pub enum DetachBehavior {
+    NoAncestorAndReparent,
+    MultipleLevelAndNoReparent,
+}
+
 impl Default for Options {
    fn default() -> Self {
        Self {
@@ -168,6 +178,7 @@ impl Attempt {
 pub(super) async fn prepare(
    detached: &Arc<Timeline>,
    tenant: &Tenant,
+    behavior: DetachBehavior,
    options: Options,
    ctx: &RequestContext,
 ) -> Result<Progress, Error> {
@@ -230,23 +241,27 @@ pub(super) async fn prepare(

    check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?;

-    // If the ancestor has an ancestor, we might be able to fast-path detach it if the current ancestor does not have any data written/used by the detaching timeline.
-    while let Some(ancestor_of_ancestor) = ancestor.ancestor_timeline.clone() {
-        if ancestor_lsn != ancestor.ancestor_lsn {
-            // non-technical requirement; we could flatten N ancestors just as easily but we chose
-            // not to, at least initially
-            return Err(TooManyAncestors);
+    if let DetachBehavior::MultipleLevelAndNoReparent = behavior {
+        // If the ancestor has an ancestor, we might be able to fast-path detach it if the current ancestor does not have any data written/used by the detaching timeline.
+        while let Some(ancestor_of_ancestor) = ancestor.ancestor_timeline.clone() {
+            if ancestor_lsn != ancestor.ancestor_lsn {
+                // non-technical requirement; we could flatten N ancestors just as easily but we chose
+                // not to, at least initially
+                return Err(TooManyAncestors);
+            }
+            // Use the ancestor of the ancestor as the new ancestor (only when the ancestor LSNs are the same)
+            ancestor_lsn = ancestor.ancestor_lsn; // Get the LSN first before resetting the `ancestor` variable
+            ancestor = ancestor_of_ancestor;
+            // TODO: do we still need to check if we don't want to reparent?
+            check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?;
        }
-        // Use the ancestor of the ancestor as the new ancestor (only when the ancestor LSNs are the same)
-        ancestor_lsn = ancestor.ancestor_lsn; // Get the LSN first before resetting the `ancestor` variable
-        ancestor = ancestor_of_ancestor;
-        check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?;
    }

    tracing::info!(
-        "attempt to detach the timeline from the ancestor: {}@{}",
+        "attempt to detach the timeline from the ancestor: {}@{}, behavior={:?}",
        ancestor.timeline_id,
-        ancestor_lsn
+        ancestor_lsn,
+        behavior
    );

    let attempt = start_new_attempt(detached, tenant, ancestor.timeline_id, ancestor_lsn).await?;
@@ -833,6 +848,7 @@ pub(super) async fn detach_and_reparent(
    prepared: PreparedTimelineDetach,
    ancestor_timeline_id: TimelineId,
    ancestor_lsn: Lsn,
+    behavior: DetachBehavior,
    _ctx: &RequestContext,
 ) -> Result<DetachingAndReparenting, Error> {
    let PreparedTimelineDetach { layers } = prepared;
@@ -956,6 +972,11 @@ pub(super) async fn detach_and_reparent(
        Ancestor::Detached(ancestor, ancestor_lsn) => (ancestor, ancestor_lsn, false),
    };

+    if let DetachBehavior::MultipleLevelAndNoReparent = behavior {
+        // Do not reparent if the user requests to behave so.
+        return Ok(DetachingAndReparenting::Reparented(HashSet::new()));
+    }
+
    let mut tasks = tokio::task::JoinSet::new();

    // Returns a single permit semaphore which will be used to make one reparenting succeed,
@@ -1093,6 +1114,11 @@ pub(super) async fn complete(
 }

 /// Query against a locked `Tenant::timelines`.
+///
+/// A timeline is reparentable if:
+///
+/// - It is not the timeline being detached.
+/// - It has the same ancestor as the timeline being detached. Note that the ancestor might not be the direct ancestor.
 fn reparentable_timelines<'a, I>(
    timelines: I,
    detached: &'a Arc<Timeline>,
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -1070,13 +1070,15 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        tenant_id: TenantId | TenantShardId,
        timeline_id: TimelineId,
        batch_size: int | None = None,
+        behavior_v2: bool = False,
        **kwargs,
    ) -> set[TimelineId]:
        params = {}
        if batch_size is not None:
            params["batch_size"] = batch_size
+        endpoint = "detach_ancestor" if not behavior_v2 else "detach_ancestor_v2"
        res = self.put(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/detach_ancestor",
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/{endpoint}",
            params=params,
            **kwargs,
        )
--- a/test_runner/regress/test_timeline_detach_ancestor.py
+++ b/test_runner/regress/test_timeline_detach_ancestor.py
@@ -343,8 +343,10 @@ def test_ancestor_detach_reparents_earlier(neon_env_builder: NeonEnvBuilder):
    wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline)


-def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder):
+def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder):
    """
+    Test the v2 behavior of ancestor detach.
+
    old main -------|---------X--------->
                    |         |         |
                    |         |         +-> after
@@ -352,18 +354,19 @@ def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder):
                    |            |
                    |            +-> branch-to-detach
                    |
-                    +-> reparented
+                    +-> earlier

    Ends up as:

    old main -------|---------X--------->
-                                        |
-                                       +-> after
-                              
+                    |         |         |
+                    |         |         +-> after
+                    |         +--X empty snapshot branch
+                    |
+                    +-> earlier

-    new main -------|---------X----> branch-to-detach
-                    |         +----> empty snapshot branch
-                    +-> reparented
+
+    new main -------|---------|----> branch-to-detach
    """

    env = neon_env_builder.init_start()
@@ -388,8 +391,8 @@ def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder):
        ep.safe_psql("INSERT INTO foo SELECT i::bigint FROM generate_series(8192, 16383) g(i);")
        wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline)

-    reparented = env.create_branch(
-        "reparented", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_pipe
+    earlier = env.create_branch(
+        "earlier", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_pipe
    )

    snapshot_branchpoint = env.create_branch(
@@ -404,8 +407,8 @@ def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder):

    after = env.create_branch("after", ancestor_branch_name="main", ancestor_start_lsn=None)

-    all_reparented = client.detach_ancestor(env.initial_tenant, branch_to_detach)
-    assert set(all_reparented) == {reparented, snapshot_branchpoint}
+    all_reparented = client.detach_ancestor(env.initial_tenant, branch_to_detach, behavior_v2=True)
+    assert set(all_reparented) == set()

    env.pageserver.quiesce_tenants()

@@ -413,9 +416,9 @@ def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder):
    expected_result = [
        ("main", env.initial_timeline, None, 16384, 1),
        ("after", after, env.initial_timeline, 16384, 1),
-        ("snapshot_branchpoint", snapshot_branchpoint, branch_to_detach, 8192, 1),
+        ("snapshot_branchpoint", snapshot_branchpoint, env.initial_timeline, 8192, 1),
        ("branch_to_detach", branch_to_detach, None, 8192, 1),
-        ("reparented", reparented, branch_to_detach, 0, 1),
+        ("earlier", earlier, env.initial_timeline, 0, 1),
    ]

    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
@@ -463,13 +466,14 @@ def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder):
            assert ep.safe_psql("SELECT count(*) FROM foo;")[0][0] == rows
            assert ep.safe_psql(f"SELECT count(*) FROM audit WHERE starts = {starts}")[0][0] == 1

-    # delete the timelines to confirm detach actually worked
+    # delete the new timeline to confirm it doesn't carry over the anything from the old timeline
+    client.timeline_delete(env.initial_tenant, branch_to_detach)
+    wait_timeline_detail_404(client, env.initial_tenant, branch_to_detach)
+
+    # delete the after timeline
    client.timeline_delete(env.initial_tenant, after)
    wait_timeline_detail_404(client, env.initial_tenant, after)

-    client.timeline_delete(env.initial_tenant, env.initial_timeline)
-    wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline)
-

 def test_detached_receives_flushes_while_being_detached(neon_env_builder: NeonEnvBuilder):
    """