From 434af3771df8344c6e0f40d02c764cffb6006484 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 1 Mar 2023 11:16:59 +0100 Subject: [PATCH] drop layer map lock before removing droppen popped frozen layer This is a shot in the dark to fix the pageserver deadlock described in https://github.com/neondatabase/neon/issues/3712 The ASCII art in the comment added in this commit describes a potential deadlock that involves page cache and EphemeralFile. So, I guess it's a useful change by itself. But, I can't find a scenario with the current code base where we would actually arrive in the described deadlock. Let's see whether the deadlock from #3712 still reproduces with this fixx. I can't repro it locally. refs https://github.com/neondatabase/neon/issues/3712 --- pageserver/src/tenant/timeline.rs | 39 ++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index d46ac26e7d..a790192e87 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2479,7 +2479,7 @@ impl Timeline { // The new on-disk layers are now in the layer map. We can remove the // in-memory layer from the map now. - { + { let mut layers = self.layers.write().unwrap(); let l = layers.frozen_layers.pop_front(); @@ -2489,6 +2489,43 @@ impl Timeline { assert!(LayerMap::compare_arced_layers(&l.unwrap(), &frozen_layer)); // release lock on 'layers' + drop(layers); + + // Only drop the ephemeral file after releasing the layer map lock. + // The reason is that the drop function needs to lock page cache slots. + // If there is another thread that is already holding a slot which we + // need to lock, and that thread is waiting for the layer map lock, we + // would have a deadlock: + // + // wants ┌─────────┐ + // us ────────────────►│ cache │ + // ▲ │ slot │ + // │ │ │ │ + // assigned │ └────┼────┘ + // │ │ + // │ │assigned + // ┌────┼───┐ │ + // │ │ │ ▼ + // │ layers │◄─────────────── them + // │ │ wants + // └────────┘ + // + // How can this happen? I don't know. Basically we need to walk up + // the call graph for all PageCache::try_lock_for_read and PageCache::try_lock_for_write + // and check whether any of them holds onto the guard object that these methods return. + // The block_io::BlockReader trait implementations look like good candidates. + // For example, the BlockReader::read_blk impl for EphemeralFile returns the guard object + // for the cache slot straight to the caller. + // But, there's are many callers of BlockReader::read_blk, and rust-analyzer has no way + // to find just the ::read_blk callers. + // + // One obvious place is InMemoryLayer::get_value_reconstruct_data , which uses a BlockReader::block_cursor. + // That cursor object holds onto the most recently read page cache slot until cursor object is dropped. + // But, all relevant uses of InMemoryLayer::get_value_reconstruct_data are in Timeline::get_reconstruct_data, + // which already holds the layer map lock in shared mode. So, it can't cause this deadlock. + // + // Maybe this is a red herring? + drop(l); } fail_point!("checkpoint-after-sync");