From de03742ca33ac5881b7639b7cc863c80e0830c53 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Tue, 26 Mar 2024 14:35:36 +0000 Subject: [PATCH] pageserver: drop layer map lock in Timeline::get (#7217) ## Problem We currently hold the layer map read lock while doing IO on the read path. This is not required for correctness. ## Summary of changes Drop the layer map lock after figuring out which layer we wish to read from. Why is this correct: * `Layer` models the lifecycle of an on disk layer. In the event the layer is removed from local disk, it will be on demand downloaded * `InMemoryLayer` holds the `EphemeralFile` which wraps the on disk file. As long as the `InMemoryLayer` is in scope, it's safe to read from it. Related https://github.com/neondatabase/neon/issues/6833 --- pageserver/src/tenant/timeline.rs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 0b8cdac1cc..8b6e93d500 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2587,6 +2587,10 @@ impl Timeline { // Get all the data needed to reconstruct the page version from this layer. // But if we have an older cached page image, no need to go past that. let lsn_floor = max(cached_lsn + 1, start_lsn); + + let open_layer = open_layer.clone(); + drop(guard); + result = match open_layer .get_value_reconstruct_data( key, @@ -2604,10 +2608,7 @@ impl Timeline { traversal_path.push(( result, cont_lsn, - Box::new({ - let open_layer = Arc::clone(open_layer); - move || open_layer.traversal_id() - }), + Box::new(move || open_layer.traversal_id()), )); continue 'outer; } @@ -2617,6 +2618,10 @@ impl Timeline { if cont_lsn > start_lsn { //info!("CHECKING for {} at {} on frozen layer {}", key, cont_lsn, frozen_layer.filename().display()); let lsn_floor = max(cached_lsn + 1, start_lsn); + + let frozen_layer = frozen_layer.clone(); + drop(guard); + result = match frozen_layer .get_value_reconstruct_data( key, @@ -2634,10 +2639,7 @@ impl Timeline { traversal_path.push(( result, cont_lsn, - Box::new({ - let frozen_layer = Arc::clone(frozen_layer); - move || frozen_layer.traversal_id() - }), + Box::new(move || frozen_layer.traversal_id()), )); continue 'outer; } @@ -2645,6 +2647,8 @@ impl Timeline { if let Some(SearchResult { lsn_floor, layer }) = layers.search(key, cont_lsn) { let layer = guard.get_from_desc(&layer); + drop(guard); + // Get all the data needed to reconstruct the page version from this layer. // But if we have an older cached page image, no need to go past that. let lsn_floor = max(cached_lsn + 1, lsn_floor);