diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 4a617438c0..543324228b 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -3707,10 +3707,6 @@ impl Timeline { Ok(ancestor.clone()) } - pub(crate) fn get_ancestor_timeline(&self) -> Option> { - self.ancestor_timeline.clone() - } - pub(crate) fn get_shard_identity(&self) -> &ShardIdentity { &self.shard_identity } diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index b0a06441c6..eafef678ca 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -567,59 +567,12 @@ pub(super) async fn prepare( // report back the timelines which have been reparented by our detach, or which are still // reparentable - let mut all_direct_children = tenant - .timelines - .lock() - .unwrap() - .values() - .filter_map(|tl| { - let is_direct_child = matches!(tl.ancestor_timeline.as_ref(), Some(ancestor) if Arc::ptr_eq(ancestor, detached)); + let reparented_timelines = reparented_direct_children(detached, tenant)?; - if is_direct_child { - Some((tl.ancestor_lsn, tl.clone())) - } else { - None - } - }) - // Collect to avoid lock taking order problem with Tenant::timelines and - // Timeline::remote_client - .collect::>(); - - let mut any_shutdown = false; - - all_direct_children.retain( - |(_, tl)| match tl.remote_client.initialized_upload_queue() { - Ok(accessor) => accessor - .latest_uploaded_index_part() - .lineage - .is_reparented(), - Err(_shutdownalike) => { - // not 100% a shutdown, but let's bail early not to give inconsistent results in - // sharded enviroment. - any_shutdown = true; - true - } - }, - ); - - if any_shutdown { - // it could be one or many being deleted; have client retry - return Err(Error::ShuttingDown); - } - - let mut reparented = all_direct_children; - // why this instead of hashset? there is a reason, but I've forgotten it many times. - // - // maybe if this was a hashset we would not be able to distinguish some race condition. - reparented.sort_unstable_by_key(|(lsn, tl)| (*lsn, tl.timeline_id)); - - // FIXME: add the non-reparented in to the response -- these would be the reparentable, but + // IDEA? add the non-reparented in to the response -- these would be the reparentable, but // no longer reparentable because they appeared *after* gc blocking was released. return Ok(Progress::Done(AncestorDetached { - reparented_timelines: reparented - .into_iter() - .map(|(_, tl)| tl.timeline_id) - .collect(), + reparented_timelines, })); }; @@ -810,6 +763,64 @@ pub(super) async fn prepare( Ok(Progress::Prepared(attempt, prepared)) } +fn reparented_direct_children( + detached: &Arc, + tenant: &Tenant, +) -> Result, Error> { + let mut all_direct_children = tenant + .timelines + .lock() + .unwrap() + .values() + .filter_map(|tl| { + let is_direct_child = matches!(tl.ancestor_timeline.as_ref(), Some(ancestor) if Arc::ptr_eq(ancestor, detached)); + + if is_direct_child { + Some((tl.ancestor_lsn, tl.clone())) + } else { + if let Some(timeline) = tl.ancestor_timeline.as_ref() { + assert_ne!(timeline.timeline_id, detached.timeline_id); + } + None + } + }) + // Collect to avoid lock taking order problem with Tenant::timelines and + // Timeline::remote_client + .collect::>(); + + let mut any_shutdown = false; + + all_direct_children.retain( + |(_, tl)| match tl.remote_client.initialized_upload_queue() { + Ok(accessor) => accessor + .latest_uploaded_index_part() + .lineage + .is_reparented(), + Err(_shutdownalike) => { + // not 100% a shutdown, but let's bail early not to give inconsistent results in + // sharded enviroment. + any_shutdown = true; + true + } + }, + ); + + if any_shutdown { + // it could be one or many being deleted; have client retry + return Err(Error::ShuttingDown); + } + + let mut reparented = all_direct_children; + // why this instead of hashset? there is a reason, but I've forgotten it many times. + // + // maybe if this was a hashset we would not be able to distinguish some race condition. + reparented.sort_unstable_by_key(|(lsn, tl)| (*lsn, tl.timeline_id)); + Ok(reparented + .into_iter() + .map(|(_, tl)| tl.timeline_id) + .collect()) +} + fn partition_work( ancestor_lsn: Lsn, source_layermap: &LayerManager,