diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 4ff2d4b0d8..6b95ef73d0 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -26,7 +26,7 @@ use std::sync::Arc; use tracing::*; use utils::lsn::Lsn; -use super::storage_layer::{InMemoryLayer, Layer}; +use super::storage_layer::{InMemoryLayer, InMemoryOrHistoricLayer, Layer}; /// /// LayerMap tracks what layers exist on a timeline. @@ -241,7 +241,8 @@ where /// Return value of LayerMap::search pub struct SearchResult { - pub layer: Arc, + // FIXME: I wish this could be Arc. But I couldn't make that work. + pub layer: InMemoryOrHistoricLayer, pub lsn_floor: Lsn, } @@ -261,6 +262,30 @@ where /// layer. /// pub fn search(&self, key: Key, end_lsn: Lsn) -> Option> { + // First check if an open or frozen layer matches + if let Some(open_layer) = &self.open_layer { + let start_lsn = open_layer.get_lsn_range().start; + if end_lsn > start_lsn { + return Some(SearchResult { + layer: InMemoryOrHistoricLayer::InMemory(Arc::clone(open_layer)), + lsn_floor: start_lsn, + }); + } + } + for frozen_layer in self.frozen_layers.iter().rev() { + let start_lsn = frozen_layer.get_lsn_range().start; + if end_lsn > start_lsn { + return Some(SearchResult { + layer: InMemoryOrHistoricLayer::InMemory(Arc::clone(frozen_layer)), + lsn_floor: start_lsn, + }); + } + } + + self.search_historic(key, end_lsn) + } + + fn search_historic(&self, key: Key, end_lsn: Lsn) -> Option> { // linear search // Find the latest image layer that covers the given key let mut latest_img: Option> = None; @@ -286,7 +311,7 @@ where if Lsn(img_lsn.0 + 1) == end_lsn { // found exact match return Some(SearchResult { - layer: Arc::clone(l), + layer: InMemoryOrHistoricLayer::Historic(Arc::clone(l)), lsn_floor: img_lsn, }); } @@ -349,13 +374,13 @@ where ); Some(SearchResult { lsn_floor, - layer: l, + layer: InMemoryOrHistoricLayer::Historic(l), }) } else if let Some(l) = latest_img { trace!("found img layer and no deltas for request on {key} at {end_lsn}"); Some(SearchResult { lsn_floor: latest_img_lsn.unwrap(), - layer: l, + layer: InMemoryOrHistoricLayer::Historic(l), }) } else { trace!("no layer found for request on {key} at {end_lsn}"); diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index d87a248bdf..17f4402c2a 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -196,3 +196,38 @@ pub fn downcast_remote_layer( None } } + +pub enum InMemoryOrHistoricLayer { + InMemory(Arc), + Historic(Arc), +} + +impl InMemoryOrHistoricLayer +where + L: PersistentLayer, +{ + pub fn downcast_remote_layer(&self) -> Option> { + match self { + Self::InMemory(_) => None, + Self::Historic(l) => { + if l.is_remote_layer() { + Arc::clone(l).downcast_remote_layer() + } else { + None + } + } + } + } + + pub fn get_value_reconstruct_data( + &self, + key: Key, + lsn_range: Range, + reconstruct_data: &mut ValueReconstructState, + ) -> Result { + match self { + Self::InMemory(l) => l.get_value_reconstruct_data(key, lsn_range, reconstruct_data), + Self::Historic(l) => l.get_value_reconstruct_data(key, lsn_range, reconstruct_data), + } + } +} diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 2c22c6694d..d31997b03e 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -25,8 +25,8 @@ use std::time::{Duration, Instant, SystemTime}; use crate::tenant::remote_timeline_client::{self, index::LayerFileMetadata}; use crate::tenant::storage_layer::{ - DeltaFileName, DeltaLayerWriter, ImageFileName, ImageLayerWriter, InMemoryLayer, LayerFileName, - RemoteLayer, + DeltaFileName, DeltaLayerWriter, ImageFileName, ImageLayerWriter, InMemoryLayer, + InMemoryOrHistoricLayer, LayerFileName, RemoteLayer, }; use crate::tenant::{ ephemeral_file::is_ephemeral_file, @@ -1591,7 +1591,7 @@ trait TraversalLayerExt { fn traversal_id(&self) -> TraversalId; } -impl TraversalLayerExt for Arc { +impl TraversalLayerExt for T { fn traversal_id(&self) -> TraversalId { match self.local_path() { Some(local_path) => { @@ -1621,6 +1621,15 @@ impl TraversalLayerExt for Arc { } } +impl TraversalLayerExt for InMemoryOrHistoricLayer { + fn traversal_id(&self) -> String { + match self { + Self::InMemory(l) => l.traversal_id(), + Self::Historic(l) => l.traversal_id(), + } + } +} + impl Timeline { /// /// Get a handle to a Layer for reading. @@ -1642,8 +1651,11 @@ impl Timeline { // For debugging purposes, collect the path of layers that we traversed // through. It's included in the error message if we fail to find the key. - let mut traversal_path = - Vec::<(ValueReconstructResult, Lsn, Box)>::new(); + let mut traversal_path = Vec::<( + ValueReconstructResult, + Lsn, + Box, + )>::new(); let cached_lsn = if let Some((cached_lsn, _)) = &reconstruct_state.img { *cached_lsn @@ -1679,7 +1691,7 @@ impl Timeline { Lsn(cont_lsn.0 - 1), request_lsn, timeline.ancestor_lsn - ), traversal_path); + ), &traversal_path); } prev_lsn = cont_lsn; } @@ -1689,7 +1701,7 @@ impl Timeline { "could not find data for key {} at LSN {}, for request at LSN {}", key, cont_lsn, request_lsn ), - traversal_path, + &traversal_path, ); } } @@ -1708,82 +1720,54 @@ impl Timeline { timeline_owned = ancestor; timeline = &*timeline_owned; prev_lsn = Lsn(u64::MAX); - continue; + continue 'outer; } - let layers = timeline.layers.read().unwrap(); - - // Check the open and frozen in-memory layers first, in order from newest - // to oldest. - if let Some(open_layer) = &layers.open_layer { - let start_lsn = open_layer.get_lsn_range().start; - if cont_lsn > start_lsn { - //info!("CHECKING for {} at {} on open layer {}", key, cont_lsn, open_layer.filename().display()); - // Get all the data needed to reconstruct the page version from this layer. - // But if we have an older cached page image, no need to go past that. - let lsn_floor = max(cached_lsn + 1, start_lsn); - result = match open_layer.get_value_reconstruct_data( - key, - lsn_floor..cont_lsn, - reconstruct_state, - ) { - Ok(result) => result, - Err(e) => return PageReconstructResult::from(e), - }; - cont_lsn = lsn_floor; - traversal_path.push((result, cont_lsn, Box::new(open_layer.clone()))); - continue; - } - } - for frozen_layer in layers.frozen_layers.iter().rev() { - let start_lsn = frozen_layer.get_lsn_range().start; - if cont_lsn > start_lsn { - //info!("CHECKING for {} at {} on frozen layer {}", key, cont_lsn, frozen_layer.filename().display()); - let lsn_floor = max(cached_lsn + 1, start_lsn); - result = match frozen_layer.get_value_reconstruct_data( - key, - lsn_floor..cont_lsn, - reconstruct_state, - ) { - Ok(result) => result, - Err(e) => return PageReconstructResult::from(e), - }; - cont_lsn = lsn_floor; - traversal_path.push((result, cont_lsn, Box::new(frozen_layer.clone()))); - continue 'outer; - } - } - - if let Some(SearchResult { lsn_floor, layer }) = layers.search(key, cont_lsn) { - //info!("CHECKING for {} at {} on historic layer {}", key, cont_lsn, layer.filename().display()); - - // If it's a remote layer, the caller can do the download and retry. - if let Some(remote_layer) = super::storage_layer::downcast_remote_layer(&layer) { - info!("need remote layer {}", layer.traversal_id()); - return PageReconstructResult::NeedsDownload( - Weak::clone(&timeline.myself), - Arc::downgrade(&remote_layer), - ); - } - - let lsn_floor = max(cached_lsn + 1, lsn_floor); - result = match layer.get_value_reconstruct_data( - key, - lsn_floor..cont_lsn, - reconstruct_state, - ) { - Ok(result) => result, - Err(e) => return PageReconstructResult::from(e), + loop { + let remote_layer = { + let layers = timeline.layers.read().unwrap(); + if let Some(SearchResult { lsn_floor, layer }) = layers.search(key, cont_lsn) { + // If it's a remote layer, download it and retry. + if let Some(remote_layer) = layer.downcast_remote_layer() { + // TODO: push a breadcrumb to 'traversal_path' to record the fact that + // we downloaded / would need to download this. + remote_layer + } else { + // Get all the data needed to reconstruct the page version from this layer. + // But if we have an older cached page image, no need to go past that. + let lsn_floor = max(cached_lsn + 1, lsn_floor); + result = match layer.get_value_reconstruct_data( + key, + lsn_floor..cont_lsn, + reconstruct_state, + ) { + Ok(result) => result, + Err(e) => return PageReconstructResult::from(e), + }; + cont_lsn = lsn_floor; + traversal_path.push((result, cont_lsn, Box::new(layer))); + continue 'outer; + } + } else if timeline.ancestor_timeline.is_some() { + // Nothing on this timeline. Traverse to parent + result = ValueReconstructResult::Continue; + cont_lsn = Lsn(timeline.ancestor_lsn.0 + 1); + continue 'outer; + } else { + // Nothing found + result = ValueReconstructResult::Missing; + continue 'outer; + } }; - cont_lsn = lsn_floor; - traversal_path.push((result, cont_lsn, Box::new(layer.clone()))); - } else if timeline.ancestor_timeline.is_some() { - // Nothing on this timeline. Traverse to parent - result = ValueReconstructResult::Continue; - cont_lsn = Lsn(timeline.ancestor_lsn.0 + 1); - } else { - // Nothing found - result = ValueReconstructResult::Missing; + + // The next layer doesn't exist locally. The caller can do the download and retry. + // (The control flow is a bit complicated here because we must drop the 'layers' + // lock before awaiting on the Future.) + info!("need remote layer {}", remote_layer.traversal_id()); + return PageReconstructResult::NeedsDownload( + Weak::clone(&timeline.myself), + Arc::downgrade(&remote_layer), + ); } } } @@ -3362,7 +3346,7 @@ where /// to an error, as anyhow context information. fn layer_traversal_error( msg: String, - path: Vec<(ValueReconstructResult, Lsn, Box)>, + path: &[(ValueReconstructResult, Lsn, Box)], ) -> PageReconstructResult<()> { // We want the original 'msg' to be the outermost context. The outermost context // is the most high-level information, which also gets propagated to the client.