From 7e80d7b864fd19d402034427b17cd0fb6509c522 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 12 May 2023 11:27:30 +0200 Subject: [PATCH] DatadirModification::flush: pre-lock layer map This is preliminary work for/from #4220 (async `Layer::get_value_reconstruct_data`). There, we want to switch `Timeline::layers` to be a `tokio::sync::RwLock`. The problem with `DatadirModification::flush` is that it uses `TimelineWriter::put`, and that method needs to lock `Timeline::layers` down the call tree. So, `TimelineWriter::put` will needs to become async. But `DatadirModification::flush` calls `put()` from inside the `retain` closure. That won't work once `put()` becomes async, because there are no async closures in Rust. I'm not aware of an `async`-compatible alternative for `retain`. Hence, this patch pre-locks `Timeline::layers`, then passes the guard to the closure. Deadlocks ========= `TimelineWriter::put` needs to lock `Timeline::layers` when it needs to create a new `open` layer. While we hold the `Timeline::layers` locked, the flush loop timeline task can't make progress. But the flush loop isn't running yet in these cases. So, I'm wondering, what actually happens at runtime if the bootstrap / basebackup import fills up the first `open` layer. What turns it `frozen`? Is anything flushing? --- pageserver/src/pgdatadir_mapping.rs | 4 +++- pageserver/src/tenant/timeline.rs | 35 ++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 5982bea1a5..e7bc384b24 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1132,11 +1132,13 @@ impl<'a> DatadirModification<'a> { let writer = self.tline.writer(); + let mut layer_map = self.tline.layers.write().unwrap(); + // Flush relation and SLRU data blocks, keep metadata. let mut result: anyhow::Result<()> = Ok(()); self.pending_updates.retain(|&key, value| { if result.is_ok() && (is_rel_block_key(key) || is_slru_block_key(key)) { - result = writer.put(key, self.lsn, value); + result = writer.put_locked(key, self.lsn, value, &mut layer_map); false } else { true diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 6f414ac43e..29e863115e 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -125,7 +125,7 @@ pub struct Timeline { pub pg_version: u32, - pub(super) layers: RwLock>, + pub(crate) layers: RwLock>, /// Set of key ranges which should be covered by image layers to /// allow GC to remove old layers. This set is created by GC and its cutoff LSN is also stored. @@ -256,6 +256,8 @@ pub struct Timeline { initial_logical_size_attempt: Mutex>, } +type LayerMapWriteLockGuard<'t> = std::sync::RwLockWriteGuard<'t, LayerMap>; + /// Internal structure to hold all data needed for logical size calculation. /// /// Calculation consists of two stages: @@ -2661,7 +2663,14 @@ impl Timeline { /// fn get_layer_for_write(&self, lsn: Lsn) -> anyhow::Result> { let mut layers = self.layers.write().unwrap(); + self.get_layer_for_write_locked(lsn, &mut layers) + } + fn get_layer_for_write_locked( + &self, + lsn: Lsn, + layers: &mut LayerMapWriteLockGuard, + ) -> anyhow::Result> { ensure!(lsn.is_aligned()); let last_record_lsn = self.get_last_record_lsn(); @@ -2716,6 +2725,19 @@ impl Timeline { Ok(()) } + fn put_value_locked( + &self, + key: Key, + lsn: Lsn, + val: &Value, + pre_locked_layer_map: &mut LayerMapWriteLockGuard, + ) -> anyhow::Result<()> { + //info!("PUT: key {} at {}", key, lsn); + let layer = self.get_layer_for_write_locked(lsn, pre_locked_layer_map)?; + layer.put_value(key, lsn, val)?; + Ok(()) + } + fn put_tombstone(&self, key_range: Range, lsn: Lsn) -> anyhow::Result<()> { let layer = self.get_layer_for_write(lsn)?; layer.put_tombstone(key_range, lsn)?; @@ -4613,6 +4635,17 @@ impl<'a> TimelineWriter<'a> { self.tl.put_value(key, lsn, value) } + pub fn put_locked( + &self, + key: Key, + lsn: Lsn, + value: &Value, + pre_locked_layer_map: &mut LayerMapWriteLockGuard, + ) -> anyhow::Result<()> { + self.tl + .put_value_locked(key, lsn, value, pre_locked_layer_map) + } + pub fn delete(&self, key_range: Range, lsn: Lsn) -> anyhow::Result<()> { self.tl.put_tombstone(key_range, lsn) }