From d29d506b63f1d13ec26d7c6eed3d939a69e473d7 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Tue, 11 Mar 2025 13:38:25 -0400 Subject: [PATCH] no reparenting for behavior v2 Signed-off-by: Alex Chi Z --- pageserver/src/http/routes.rs | 33 ++++++++++-- pageserver/src/tenant/mgr.rs | 2 + pageserver/src/tenant/timeline.rs | 5 +- .../src/tenant/timeline/detach_ancestor.rs | 52 ++++++++++++++----- test_runner/fixtures/pageserver/http.py | 4 +- .../regress/test_timeline_detach_ancestor.py | 40 +++++++------- 6 files changed, 100 insertions(+), 36 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 3c0c23a56d..ac162602db 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -71,6 +71,7 @@ use crate::tenant::remote_timeline_client::{ use crate::tenant::secondary::SecondaryController; use crate::tenant::size::ModelInputs; use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName}; +use crate::tenant::timeline::detach_ancestor::DetachBehavior; use crate::tenant::timeline::offload::{OffloadError, offload_timeline}; use crate::tenant::timeline::{ CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout, @@ -2489,8 +2490,29 @@ async fn timeline_download_remote_layers_handler_get( json_response(StatusCode::OK, info) } -async fn timeline_detach_ancestor_handler( +async fn timeline_detach_ancestor_handler_v1( request: Request, + cancel: CancellationToken, +) -> Result, ApiError> { + timeline_detach_ancestor_handler_common(request, DetachBehavior::NoAncestorAndReparent, cancel) + .await +} + +async fn timeline_detach_ancestor_handler_v2( + request: Request, + cancel: CancellationToken, +) -> Result, ApiError> { + timeline_detach_ancestor_handler_common( + request, + DetachBehavior::MultipleLevelAndNoReparent, + cancel, + ) + .await +} + +async fn timeline_detach_ancestor_handler_common( + request: Request, + behavior: DetachBehavior, _cancel: CancellationToken, ) -> Result, ApiError> { use pageserver_api::models::detach_ancestor::AncestorDetached; @@ -2548,7 +2570,7 @@ async fn timeline_detach_ancestor_handler( let timeline = tenant.get_timeline(timeline_id, true)?; let progress = timeline - .prepare_to_detach_from_ancestor(&tenant, options, ctx) + .prepare_to_detach_from_ancestor(&tenant, options, behavior, ctx) .await?; // uncomment to allow early as possible Tenant::drop @@ -2563,6 +2585,7 @@ async fn timeline_detach_ancestor_handler( tenant_shard_id, timeline_id, prepared, + behavior, attempt, ctx, ) @@ -3742,7 +3765,11 @@ pub fn make_router( ) .put( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/detach_ancestor", - |r| api_handler(r, timeline_detach_ancestor_handler), + |r| api_handler(r, timeline_detach_ancestor_handler_v1), + ) + .put( + "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/detach_ancestor_v2", + |r| api_handler(r, timeline_detach_ancestor_handler_v2), ) .delete("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| { api_handler(r, timeline_delete_handler) diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index c6536cf1ab..092bfdf6c1 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -1914,6 +1914,7 @@ impl TenantManager { tenant_shard_id: TenantShardId, timeline_id: TimelineId, prepared: PreparedTimelineDetach, + behavior: detach_ancestor::DetachBehavior, mut attempt: detach_ancestor::Attempt, ctx: &RequestContext, ) -> Result, detach_ancestor::Error> { @@ -1962,6 +1963,7 @@ impl TenantManager { prepared, attempt.ancestor_timeline_id, attempt.ancestor_lsn, + behavior, ctx, ) .await?; diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 2979408b97..0502e6a02f 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -5388,9 +5388,10 @@ impl Timeline { self: &Arc, tenant: &crate::tenant::Tenant, options: detach_ancestor::Options, + behavior: detach_ancestor::DetachBehavior, ctx: &RequestContext, ) -> Result { - detach_ancestor::prepare(self, tenant, options, ctx).await + detach_ancestor::prepare(self, tenant, behavior, options, ctx).await } /// Second step of detach from ancestor; detaches the `self` from it's current ancestor and @@ -5408,6 +5409,7 @@ impl Timeline { prepared: detach_ancestor::PreparedTimelineDetach, ancestor_timeline_id: TimelineId, ancestor_lsn: Lsn, + behavior: detach_ancestor::DetachBehavior, ctx: &RequestContext, ) -> Result { detach_ancestor::detach_and_reparent( @@ -5416,6 +5418,7 @@ impl Timeline { prepared, ancestor_timeline_id, ancestor_lsn, + behavior, ctx, ) .await diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index 9d9d7b6a36..1c2c09b468 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -127,13 +127,23 @@ pub(crate) struct PreparedTimelineDetach { layers: Vec, } -/// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments. +// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments. #[derive(Debug)] pub(crate) struct Options { pub(crate) rewrite_concurrency: std::num::NonZeroUsize, pub(crate) copy_concurrency: std::num::NonZeroUsize, } +/// Controls the detach ancestor behavior. +/// - When set to `NoAncestorAndReparent`, we will only detach a branch if it has only one level of ancestor. It will automatically reparent the children of the ancestor. +/// - When set to `MultipleLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen for the children of the ancestor. +/// - Detach ancstor will always reparent the children of the detached branch. +#[derive(Debug, Clone, Copy)] +pub enum DetachBehavior { + NoAncestorAndReparent, + MultipleLevelAndNoReparent, +} + impl Default for Options { fn default() -> Self { Self { @@ -168,6 +178,7 @@ impl Attempt { pub(super) async fn prepare( detached: &Arc, tenant: &Tenant, + behavior: DetachBehavior, options: Options, ctx: &RequestContext, ) -> Result { @@ -230,23 +241,27 @@ pub(super) async fn prepare( check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?; - // If the ancestor has an ancestor, we might be able to fast-path detach it if the current ancestor does not have any data written/used by the detaching timeline. - while let Some(ancestor_of_ancestor) = ancestor.ancestor_timeline.clone() { - if ancestor_lsn != ancestor.ancestor_lsn { - // non-technical requirement; we could flatten N ancestors just as easily but we chose - // not to, at least initially - return Err(TooManyAncestors); + if let DetachBehavior::MultipleLevelAndNoReparent = behavior { + // If the ancestor has an ancestor, we might be able to fast-path detach it if the current ancestor does not have any data written/used by the detaching timeline. + while let Some(ancestor_of_ancestor) = ancestor.ancestor_timeline.clone() { + if ancestor_lsn != ancestor.ancestor_lsn { + // non-technical requirement; we could flatten N ancestors just as easily but we chose + // not to, at least initially + return Err(TooManyAncestors); + } + // Use the ancestor of the ancestor as the new ancestor (only when the ancestor LSNs are the same) + ancestor_lsn = ancestor.ancestor_lsn; // Get the LSN first before resetting the `ancestor` variable + ancestor = ancestor_of_ancestor; + // TODO: do we still need to check if we don't want to reparent? + check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?; } - // Use the ancestor of the ancestor as the new ancestor (only when the ancestor LSNs are the same) - ancestor_lsn = ancestor.ancestor_lsn; // Get the LSN first before resetting the `ancestor` variable - ancestor = ancestor_of_ancestor; - check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?; } tracing::info!( - "attempt to detach the timeline from the ancestor: {}@{}", + "attempt to detach the timeline from the ancestor: {}@{}, behavior={:?}", ancestor.timeline_id, - ancestor_lsn + ancestor_lsn, + behavior ); let attempt = start_new_attempt(detached, tenant, ancestor.timeline_id, ancestor_lsn).await?; @@ -833,6 +848,7 @@ pub(super) async fn detach_and_reparent( prepared: PreparedTimelineDetach, ancestor_timeline_id: TimelineId, ancestor_lsn: Lsn, + behavior: DetachBehavior, _ctx: &RequestContext, ) -> Result { let PreparedTimelineDetach { layers } = prepared; @@ -956,6 +972,11 @@ pub(super) async fn detach_and_reparent( Ancestor::Detached(ancestor, ancestor_lsn) => (ancestor, ancestor_lsn, false), }; + if let DetachBehavior::MultipleLevelAndNoReparent = behavior { + // Do not reparent if the user requests to behave so. + return Ok(DetachingAndReparenting::Reparented(HashSet::new())); + } + let mut tasks = tokio::task::JoinSet::new(); // Returns a single permit semaphore which will be used to make one reparenting succeed, @@ -1093,6 +1114,11 @@ pub(super) async fn complete( } /// Query against a locked `Tenant::timelines`. +/// +/// A timeline is reparentable if: +/// +/// - It is not the timeline being detached. +/// - It has the same ancestor as the timeline being detached. Note that the ancestor might not be the direct ancestor. fn reparentable_timelines<'a, I>( timelines: I, detached: &'a Arc, diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 0efe0b9575..4c164d8fad 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -1070,13 +1070,15 @@ class PageserverHttpClient(requests.Session, MetricsGetter): tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, batch_size: int | None = None, + behavior_v2: bool = False, **kwargs, ) -> set[TimelineId]: params = {} if batch_size is not None: params["batch_size"] = batch_size + endpoint = "detach_ancestor" if not behavior_v2 else "detach_ancestor_v2" res = self.put( - f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/detach_ancestor", + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/{endpoint}", params=params, **kwargs, ) diff --git a/test_runner/regress/test_timeline_detach_ancestor.py b/test_runner/regress/test_timeline_detach_ancestor.py index adef1a8f84..79537ba83a 100644 --- a/test_runner/regress/test_timeline_detach_ancestor.py +++ b/test_runner/regress/test_timeline_detach_ancestor.py @@ -343,8 +343,10 @@ def test_ancestor_detach_reparents_earlier(neon_env_builder: NeonEnvBuilder): wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline) -def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder): +def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder): """ + Test the v2 behavior of ancestor detach. + old main -------|---------X---------> | | | | | +-> after @@ -352,18 +354,19 @@ def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder): | | | +-> branch-to-detach | - +-> reparented + +-> earlier Ends up as: old main -------|---------X---------> - | - +-> after - + | | | + | | +-> after + | +--X empty snapshot branch + | + +-> earlier - new main -------|---------X----> branch-to-detach - | +----> empty snapshot branch - +-> reparented + + new main -------|---------|----> branch-to-detach """ env = neon_env_builder.init_start() @@ -388,8 +391,8 @@ def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder): ep.safe_psql("INSERT INTO foo SELECT i::bigint FROM generate_series(8192, 16383) g(i);") wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) - reparented = env.create_branch( - "reparented", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_pipe + earlier = env.create_branch( + "earlier", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_pipe ) snapshot_branchpoint = env.create_branch( @@ -404,8 +407,8 @@ def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder): after = env.create_branch("after", ancestor_branch_name="main", ancestor_start_lsn=None) - all_reparented = client.detach_ancestor(env.initial_tenant, branch_to_detach) - assert set(all_reparented) == {reparented, snapshot_branchpoint} + all_reparented = client.detach_ancestor(env.initial_tenant, branch_to_detach, behavior_v2=True) + assert set(all_reparented) == set() env.pageserver.quiesce_tenants() @@ -413,9 +416,9 @@ def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder): expected_result = [ ("main", env.initial_timeline, None, 16384, 1), ("after", after, env.initial_timeline, 16384, 1), - ("snapshot_branchpoint", snapshot_branchpoint, branch_to_detach, 8192, 1), + ("snapshot_branchpoint", snapshot_branchpoint, env.initial_timeline, 8192, 1), ("branch_to_detach", branch_to_detach, None, 8192, 1), - ("reparented", reparented, branch_to_detach, 0, 1), + ("earlier", earlier, env.initial_timeline, 0, 1), ] assert isinstance(env.pageserver_remote_storage, LocalFsStorage) @@ -463,13 +466,14 @@ def test_ancestor_detach_two_level_ancestors(neon_env_builder: NeonEnvBuilder): assert ep.safe_psql("SELECT count(*) FROM foo;")[0][0] == rows assert ep.safe_psql(f"SELECT count(*) FROM audit WHERE starts = {starts}")[0][0] == 1 - # delete the timelines to confirm detach actually worked + # delete the new timeline to confirm it doesn't carry over the anything from the old timeline + client.timeline_delete(env.initial_tenant, branch_to_detach) + wait_timeline_detail_404(client, env.initial_tenant, branch_to_detach) + + # delete the after timeline client.timeline_delete(env.initial_tenant, after) wait_timeline_detail_404(client, env.initial_tenant, after) - client.timeline_delete(env.initial_tenant, env.initial_timeline) - wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline) - def test_detached_receives_flushes_while_being_detached(neon_env_builder: NeonEnvBuilder): """