refactor: reparented_direct_children query

2026-01-16 18:02:56 +00:00 · 2024-07-19 09:55:49 +00:00
parent cdfaf0700f
commit 44cdb9fb58
2 changed files with 61 additions and 54 deletions
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -3707,10 +3707,6 @@ impl Timeline {
        Ok(ancestor.clone())
    }

-    pub(crate) fn get_ancestor_timeline(&self) -> Option<Arc<Timeline>> {
-        self.ancestor_timeline.clone()
-    }
-
    pub(crate) fn get_shard_identity(&self) -> &ShardIdentity {
        &self.shard_identity
    }
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -567,59 +567,12 @@ pub(super) async fn prepare(
        // report back the timelines which have been reparented by our detach, or which are still
        // reparentable

-        let mut all_direct_children = tenant
-            .timelines
-            .lock()
-            .unwrap()
-            .values()
-            .filter_map(|tl| {
-                let is_direct_child = matches!(tl.ancestor_timeline.as_ref(), Some(ancestor) if Arc::ptr_eq(ancestor, detached));
+        let reparented_timelines = reparented_direct_children(detached, tenant)?;

-                if is_direct_child {
-                    Some((tl.ancestor_lsn, tl.clone()))
-                } else {
-                    None
-                }
-            })
-                // Collect to avoid lock taking order problem with Tenant::timelines and
-                // Timeline::remote_client
-            .collect::<Vec<_>>();
-
-        let mut any_shutdown = false;
-
-        all_direct_children.retain(
-            |(_, tl)| match tl.remote_client.initialized_upload_queue() {
-                Ok(accessor) => accessor
-                    .latest_uploaded_index_part()
-                    .lineage
-                    .is_reparented(),
-                Err(_shutdownalike) => {
-                    // not 100% a shutdown, but let's bail early not to give inconsistent results in
-                    // sharded enviroment.
-                    any_shutdown = true;
-                    true
-                }
-            },
-        );
-
-        if any_shutdown {
-            // it could be one or many being deleted; have client retry
-            return Err(Error::ShuttingDown);
-        }
-
-        let mut reparented = all_direct_children;
-        // why this instead of hashset? there is a reason, but I've forgotten it many times.
-        //
-        // maybe if this was a hashset we would not be able to distinguish some race condition.
-        reparented.sort_unstable_by_key(|(lsn, tl)| (*lsn, tl.timeline_id));
-
-        // FIXME: add the non-reparented in to the response -- these would be the reparentable, but
+        // IDEA? add the non-reparented in to the response -- these would be the reparentable, but
        // no longer reparentable because they appeared *after* gc blocking was released.
        return Ok(Progress::Done(AncestorDetached {
-            reparented_timelines: reparented
-                .into_iter()
-                .map(|(_, tl)| tl.timeline_id)
-                .collect(),
+            reparented_timelines,
        }));
    };

@@ -810,6 +763,64 @@ pub(super) async fn prepare(
    Ok(Progress::Prepared(attempt, prepared))
 }

+fn reparented_direct_children(
+    detached: &Arc<Timeline>,
+    tenant: &Tenant,
+) -> Result<Vec<TimelineId>, Error> {
+    let mut all_direct_children = tenant
+            .timelines
+            .lock()
+            .unwrap()
+            .values()
+            .filter_map(|tl| {
+                let is_direct_child = matches!(tl.ancestor_timeline.as_ref(), Some(ancestor) if Arc::ptr_eq(ancestor, detached));
+
+                if is_direct_child {
+                    Some((tl.ancestor_lsn, tl.clone()))
+                } else {
+                    if let Some(timeline) = tl.ancestor_timeline.as_ref() {
+                        assert_ne!(timeline.timeline_id, detached.timeline_id);
+                    }
+                    None
+                }
+            })
+                // Collect to avoid lock taking order problem with Tenant::timelines and
+                // Timeline::remote_client
+            .collect::<Vec<_>>();
+
+    let mut any_shutdown = false;
+
+    all_direct_children.retain(
+        |(_, tl)| match tl.remote_client.initialized_upload_queue() {
+            Ok(accessor) => accessor
+                .latest_uploaded_index_part()
+                .lineage
+                .is_reparented(),
+            Err(_shutdownalike) => {
+                // not 100% a shutdown, but let's bail early not to give inconsistent results in
+                // sharded enviroment.
+                any_shutdown = true;
+                true
+            }
+        },
+    );
+
+    if any_shutdown {
+        // it could be one or many being deleted; have client retry
+        return Err(Error::ShuttingDown);
+    }
+
+    let mut reparented = all_direct_children;
+    // why this instead of hashset? there is a reason, but I've forgotten it many times.
+    //
+    // maybe if this was a hashset we would not be able to distinguish some race condition.
+    reparented.sort_unstable_by_key(|(lsn, tl)| (*lsn, tl.timeline_id));
+    Ok(reparented
+        .into_iter()
+        .map(|(_, tl)| tl.timeline_id)
+        .collect())
+}
+
 fn partition_work(
    ancestor_lsn: Lsn,
    source_layermap: &LayerManager,