From 5bad2deff894d1c44d811e8365297d3422f40680 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 3 Dec 2021 07:42:10 -0500 Subject: [PATCH] Don't hold 'timelines' lock over checkpoint. It was very noticeable that you while the checkpointer was busy, you could not e.g. open a new connection. --- pageserver/src/layered_repository.rs | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index af046536e0..3124cb3488 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -238,16 +238,22 @@ impl Repository for LayeredRepository { } fn checkpoint_iteration(&self, cconf: CheckpointConfig) -> Result<()> { - { - let timelines = self.timelines.lock().unwrap(); + // Scan through the hashmap and collect a list of all the timelines, + // while holding the lock. Then drop the lock and actually perform the + // checkpoints. We don't want to block everything else while the + // checkpoint runs. + let timelines = self.timelines.lock().unwrap(); + let timelines_to_checkpoint: Vec<(ZTimelineId, Arc)> = timelines + .iter() + .map(|(timelineid, timeline)| (*timelineid, timeline.clone())) + .collect(); + drop(timelines); - for (timelineid, timeline) in timelines.iter() { - let _entered = - info_span!("checkpoint", timeline = %timelineid, tenant = %self.tenantid) - .entered(); + for (timelineid, timeline) in timelines_to_checkpoint.iter() { + let _entered = + info_span!("checkpoint", timeline = %timelineid, tenant = %self.tenantid).entered(); - timeline.checkpoint(cconf)?; - } + timeline.checkpoint(cconf)?; } Ok(())