refactor: reparented_direct_children query

This commit is contained in:
Joonas Koivunen
2024-07-19 09:55:49 +00:00
parent cdfaf0700f
commit 44cdb9fb58
2 changed files with 61 additions and 54 deletions

View File

@@ -3707,10 +3707,6 @@ impl Timeline {
Ok(ancestor.clone())
}
pub(crate) fn get_ancestor_timeline(&self) -> Option<Arc<Timeline>> {
self.ancestor_timeline.clone()
}
pub(crate) fn get_shard_identity(&self) -> &ShardIdentity {
&self.shard_identity
}

View File

@@ -567,59 +567,12 @@ pub(super) async fn prepare(
// report back the timelines which have been reparented by our detach, or which are still
// reparentable
let mut all_direct_children = tenant
.timelines
.lock()
.unwrap()
.values()
.filter_map(|tl| {
let is_direct_child = matches!(tl.ancestor_timeline.as_ref(), Some(ancestor) if Arc::ptr_eq(ancestor, detached));
let reparented_timelines = reparented_direct_children(detached, tenant)?;
if is_direct_child {
Some((tl.ancestor_lsn, tl.clone()))
} else {
None
}
})
// Collect to avoid lock taking order problem with Tenant::timelines and
// Timeline::remote_client
.collect::<Vec<_>>();
let mut any_shutdown = false;
all_direct_children.retain(
|(_, tl)| match tl.remote_client.initialized_upload_queue() {
Ok(accessor) => accessor
.latest_uploaded_index_part()
.lineage
.is_reparented(),
Err(_shutdownalike) => {
// not 100% a shutdown, but let's bail early not to give inconsistent results in
// sharded enviroment.
any_shutdown = true;
true
}
},
);
if any_shutdown {
// it could be one or many being deleted; have client retry
return Err(Error::ShuttingDown);
}
let mut reparented = all_direct_children;
// why this instead of hashset? there is a reason, but I've forgotten it many times.
//
// maybe if this was a hashset we would not be able to distinguish some race condition.
reparented.sort_unstable_by_key(|(lsn, tl)| (*lsn, tl.timeline_id));
// FIXME: add the non-reparented in to the response -- these would be the reparentable, but
// IDEA? add the non-reparented in to the response -- these would be the reparentable, but
// no longer reparentable because they appeared *after* gc blocking was released.
return Ok(Progress::Done(AncestorDetached {
reparented_timelines: reparented
.into_iter()
.map(|(_, tl)| tl.timeline_id)
.collect(),
reparented_timelines,
}));
};
@@ -810,6 +763,64 @@ pub(super) async fn prepare(
Ok(Progress::Prepared(attempt, prepared))
}
fn reparented_direct_children(
detached: &Arc<Timeline>,
tenant: &Tenant,
) -> Result<Vec<TimelineId>, Error> {
let mut all_direct_children = tenant
.timelines
.lock()
.unwrap()
.values()
.filter_map(|tl| {
let is_direct_child = matches!(tl.ancestor_timeline.as_ref(), Some(ancestor) if Arc::ptr_eq(ancestor, detached));
if is_direct_child {
Some((tl.ancestor_lsn, tl.clone()))
} else {
if let Some(timeline) = tl.ancestor_timeline.as_ref() {
assert_ne!(timeline.timeline_id, detached.timeline_id);
}
None
}
})
// Collect to avoid lock taking order problem with Tenant::timelines and
// Timeline::remote_client
.collect::<Vec<_>>();
let mut any_shutdown = false;
all_direct_children.retain(
|(_, tl)| match tl.remote_client.initialized_upload_queue() {
Ok(accessor) => accessor
.latest_uploaded_index_part()
.lineage
.is_reparented(),
Err(_shutdownalike) => {
// not 100% a shutdown, but let's bail early not to give inconsistent results in
// sharded enviroment.
any_shutdown = true;
true
}
},
);
if any_shutdown {
// it could be one or many being deleted; have client retry
return Err(Error::ShuttingDown);
}
let mut reparented = all_direct_children;
// why this instead of hashset? there is a reason, but I've forgotten it many times.
//
// maybe if this was a hashset we would not be able to distinguish some race condition.
reparented.sort_unstable_by_key(|(lsn, tl)| (*lsn, tl.timeline_id));
Ok(reparented
.into_iter()
.map(|(_, tl)| tl.timeline_id)
.collect())
}
fn partition_work(
ancestor_lsn: Lsn,
source_layermap: &LayerManager,