From 25d9dc6eaf9803675bd694a6d5f107947c8c24aa Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Mon, 22 Apr 2024 15:40:35 +0100 Subject: [PATCH] chore(pageserver): separate missing key error (#7393) As part of https://github.com/neondatabase/neon/pull/7375 and to improve the current vectored get implementation, we separate the missing key error out. This also saves us several Box allocations in the get page implementation. ## Summary of changes * Create a caching field of layer traversal id for each of the layer. * Remove box allocations for layer traversal id retrieval and implement MissingKey error message as before. This should be a little bit faster. * Do not format error message until `Display`. * For in-mem layer, the descriptor is different before/after frozen. I'm using once lock for that. --------- Signed-off-by: Alex Chi Z --- pageserver/src/http/routes.rs | 3 + pageserver/src/pgdatadir_mapping.rs | 12 +- .../tenant/storage_layer/inmemory_layer.rs | 45 ++++- pageserver/src/tenant/storage_layer/layer.rs | 8 + .../src/tenant/storage_layer/layer/tests.rs | 4 +- pageserver/src/tenant/timeline.rs | 165 ++++++++++-------- 6 files changed, 157 insertions(+), 80 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 20258dd950..81508965b4 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -160,6 +160,9 @@ impl From for ApiError { fn from(pre: PageReconstructError) -> ApiError { match pre { PageReconstructError::Other(pre) => ApiError::InternalServerError(pre), + PageReconstructError::MissingKey(e) => { + ApiError::InternalServerError(anyhow::anyhow!("{e}")) + } PageReconstructError::Cancelled => { ApiError::InternalServerError(anyhow::anyhow!("request was cancelled")) } diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 6f7d74bdee..351a766b10 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1446,10 +1446,14 @@ impl<'a> DatadirModification<'a> { // reset the map. return Err(e.into()); } - // FIXME: PageReconstructError doesn't have an explicit variant for key-not-found, so - // we are assuming that all _other_ possible errors represents a missing key. If some - // other error occurs, we may incorrectly reset the map of aux files. - Err(PageReconstructError::Other(_) | PageReconstructError::WalRedo(_)) => { + // Note: we added missing key error variant in https://github.com/neondatabase/neon/pull/7393 but + // the original code assumes all other errors are missing keys. Therefore, we keep the code path + // the same for now, though in theory, we should only match the `MissingKey` variant. + Err( + PageReconstructError::Other(_) + | PageReconstructError::WalRedo(_) + | PageReconstructError::MissingKey { .. }, + ) => { // Key is missing, we must insert an image as the basis for subsequent deltas. let mut dir = AuxFilesDirectory { diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer.rs b/pageserver/src/tenant/storage_layer/inmemory_layer.rs index 29751641b4..a86d0d48c5 100644 --- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs +++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs @@ -26,7 +26,7 @@ use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn, vec_map::VecMap}; // while being able to use std::fmt::Write's methods use crate::metrics::TIMELINE_EPHEMERAL_BYTES; use std::cmp::Ordering; -use std::fmt::Write as _; +use std::fmt::Write; use std::ops::Range; use std::sync::atomic::Ordering as AtomicOrdering; use std::sync::atomic::{AtomicU64, AtomicUsize}; @@ -54,6 +54,12 @@ pub struct InMemoryLayer { /// Writes are only allowed when this is `None`. end_lsn: OnceLock, + /// Used for traversal path. Cached representation of the in-memory layer before frozen. + local_path_str: Arc, + + /// Used for traversal path. Cached representation of the in-memory layer after frozen. + frozen_local_path_str: OnceLock>, + opened_at: Instant, /// The above fields never change, except for `end_lsn`, which is only set once. @@ -241,6 +247,12 @@ impl InMemoryLayer { self.start_lsn..self.end_lsn_or_max() } + pub(crate) fn local_path_str(&self) -> &Arc { + self.frozen_local_path_str + .get() + .unwrap_or(&self.local_path_str) + } + /// debugging function to print out the contents of the layer /// /// this is likely completly unused @@ -430,10 +442,24 @@ impl InMemoryLayer { } } +fn inmem_layer_display(mut f: impl Write, start_lsn: Lsn, end_lsn: Lsn) -> std::fmt::Result { + write!(f, "inmem-{:016X}-{:016X}", start_lsn.0, end_lsn.0) +} + +fn inmem_layer_log_display( + mut f: impl Write, + timeline: TimelineId, + start_lsn: Lsn, + end_lsn: Lsn, +) -> std::fmt::Result { + write!(f, "timeline {} in-memory ", timeline)?; + inmem_layer_display(f, start_lsn, end_lsn) +} + impl std::fmt::Display for InMemoryLayer { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let end_lsn = self.end_lsn_or_max(); - write!(f, "inmem-{:016X}-{:016X}", self.start_lsn.0, end_lsn.0) + inmem_layer_display(f, self.start_lsn, end_lsn) } } @@ -458,6 +484,12 @@ impl InMemoryLayer { Ok(InMemoryLayer { file_id: key, + local_path_str: { + let mut buf = String::new(); + inmem_layer_log_display(&mut buf, timeline_id, start_lsn, Lsn::MAX).unwrap(); + buf.into() + }, + frozen_local_path_str: OnceLock::new(), conf, timeline_id, tenant_shard_id, @@ -552,6 +584,15 @@ impl InMemoryLayer { ); self.end_lsn.set(end_lsn).expect("end_lsn set only once"); + self.frozen_local_path_str + .set({ + let mut buf = String::new(); + inmem_layer_log_display(&mut buf, self.get_timeline_id(), self.start_lsn, end_lsn) + .unwrap(); + buf.into() + }) + .expect("frozen_local_path_str set only once"); + for vec_map in inner.index.values() { for (lsn, _pos) in vec_map.as_slice() { assert!(*lsn < end_lsn); diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index e55299f0fa..316a11f8cc 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -395,6 +395,10 @@ impl Layer { &self.0.path } + pub(crate) fn local_path_str(&self) -> &Arc { + &self.0.path_str + } + pub(crate) fn metadata(&self) -> LayerFileMetadata { self.0.metadata() } @@ -517,6 +521,9 @@ struct LayerInner { /// Full path to the file; unclear if this should exist anymore. path: Utf8PathBuf, + /// String representation of the full path, used for traversal id. + path_str: Arc, + desc: PersistentLayerDesc, /// Timeline access is needed for remote timeline client and metrics. @@ -722,6 +729,7 @@ impl LayerInner { LayerInner { conf, + path_str: path.to_string().into(), path, desc, timeline: Arc::downgrade(timeline), diff --git a/pageserver/src/tenant/storage_layer/layer/tests.rs b/pageserver/src/tenant/storage_layer/layer/tests.rs index f0697fdf28..52f62faa8d 100644 --- a/pageserver/src/tenant/storage_layer/layer/tests.rs +++ b/pageserver/src/tenant/storage_layer/layer/tests.rs @@ -818,11 +818,13 @@ async fn eviction_cancellation_on_drop() { } } +/// A test case to remind you the cost of these structures. You can bump the size limit +/// below if it is really necessary to add more fields to the structures. #[test] fn layer_size() { assert_eq!(std::mem::size_of::(), 2040); assert_eq!(std::mem::size_of::(), 104); - assert_eq!(std::mem::size_of::(), 2328); + assert_eq!(std::mem::size_of::(), 2344); // it also has the utf8 path } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 8e815ddae8..e707c3b244 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -23,7 +23,7 @@ use pageserver_api::{ EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, TimelineState, }, reltag::BlockNumber, - shard::{ShardIdentity, TenantShardId}, + shard::{ShardIdentity, ShardNumber, TenantShardId}, }; use rand::Rng; use serde_with::serde_as; @@ -428,6 +428,62 @@ pub(crate) enum PageReconstructError { /// An error happened replaying WAL records #[error(transparent)] WalRedo(anyhow::Error), + + #[error("{0}")] + MissingKey(MissingKeyError), +} + +#[derive(Debug)] +pub struct MissingKeyError { + stuck_at_lsn: bool, + key: Key, + shard: ShardNumber, + cont_lsn: Lsn, + request_lsn: Lsn, + ancestor_lsn: Option, + traversal_path: Vec, + backtrace: Option, +} + +impl std::fmt::Display for MissingKeyError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.stuck_at_lsn { + // Records are found in this timeline but no image layer or initial delta record was found. + write!( + f, + "could not find layer with more data for key {} (shard {:?}) at LSN {}, request LSN {}", + self.key, self.shard, self.cont_lsn, self.request_lsn + )?; + if let Some(ref ancestor_lsn) = self.ancestor_lsn { + write!(f, ", ancestor {}", ancestor_lsn)?; + } + } else { + // No records in this timeline. + write!( + f, + "could not find data for key {} (shard {:?}) at LSN {}, for request at LSN {}", + self.key, self.shard, self.cont_lsn, self.request_lsn + )?; + } + + if !self.traversal_path.is_empty() { + writeln!(f)?; + } + + for (r, c, l) in &self.traversal_path { + writeln!( + f, + "layer traversal: result {:?}, cont_lsn {}, layer: {}", + r, c, l, + )?; + } + + if let Some(ref backtrace) = self.backtrace { + write!(f, "\n{}", backtrace)?; + } + + Ok(()) + } } impl PageReconstructError { @@ -439,6 +495,7 @@ impl PageReconstructError { AncestorLsnTimeout(_) => false, Cancelled | AncestorStopping(_) => true, WalRedo(_) => false, + MissingKey { .. } => false, } } } @@ -753,7 +810,7 @@ impl Timeline { writeln!( msg, "- layer traversal: result {res:?}, cont_lsn {cont_lsn}, layer: {}", - layer(), + layer, ) .expect("string grows") }); @@ -872,9 +929,11 @@ impl Timeline { Err(Cancelled | AncestorStopping(_)) => { return Err(GetVectoredError::Cancelled) } - Err(Other(err)) if err.to_string().contains("could not find data for key") => { - return Err(GetVectoredError::MissingKey(key)) - } + // we only capture stuck_at_lsn=false now until we figure out https://github.com/neondatabase/neon/issues/7380 + Err(MissingKey(MissingKeyError { + stuck_at_lsn: false, + .. + })) => return Err(GetVectoredError::MissingKey(key)), _ => { values.insert(key, block); key = key.next(); @@ -2692,7 +2751,7 @@ impl Timeline { } } -type TraversalId = String; +type TraversalId = Arc; trait TraversalLayerExt { fn traversal_id(&self) -> TraversalId; @@ -2700,13 +2759,13 @@ trait TraversalLayerExt { impl TraversalLayerExt for Layer { fn traversal_id(&self) -> TraversalId { - self.local_path().to_string() + Arc::clone(self.local_path_str()) } } impl TraversalLayerExt for Arc { fn traversal_id(&self) -> TraversalId { - format!("timeline {} in-memory {self}", self.get_timeline_id()) + Arc::clone(self.local_path_str()) } } @@ -2775,32 +2834,35 @@ impl Timeline { if prev <= cont_lsn { // Didn't make any progress in last iteration. Error out to avoid // getting stuck in the loop. - return Err(layer_traversal_error(format!( - "could not find layer with more data for key {} at LSN {}, request LSN {}, ancestor {}", + return Err(PageReconstructError::MissingKey(MissingKeyError { + stuck_at_lsn: true, key, - Lsn(cont_lsn.0 - 1), + shard: self.shard_identity.get_shard_number(&key), + cont_lsn: Lsn(cont_lsn.0 - 1), request_lsn, - timeline.ancestor_lsn - ), traversal_path)); + ancestor_lsn: Some(timeline.ancestor_lsn), + traversal_path, + backtrace: None, + })); } } prev_lsn = Some(cont_lsn); } ValueReconstructResult::Missing => { - return Err(layer_traversal_error( - if cfg!(test) { - format!( - "could not find data for key {} (shard {:?}) at LSN {}, for request at LSN {}\n{}", - key, self.shard_identity.get_shard_number(&key), cont_lsn, request_lsn, std::backtrace::Backtrace::force_capture(), - ) - } else { - format!( - "could not find data for key {} (shard {:?}) at LSN {}, for request at LSN {}", - key, self.shard_identity.get_shard_number(&key), cont_lsn, request_lsn - ) - }, + return Err(PageReconstructError::MissingKey(MissingKeyError { + stuck_at_lsn: false, + key, + shard: self.shard_identity.get_shard_number(&key), + cont_lsn, + request_lsn, + ancestor_lsn: None, traversal_path, - )); + backtrace: if cfg!(test) { + Some(std::backtrace::Backtrace::force_capture()) + } else { + None + }, + })); } } @@ -2848,11 +2910,7 @@ impl Timeline { }; cont_lsn = lsn_floor; // metrics: open_layer does not count as fs access, so we are not updating `read_count` - traversal_path.push(( - result, - cont_lsn, - Box::new(move || open_layer.traversal_id()), - )); + traversal_path.push((result, cont_lsn, open_layer.traversal_id())); continue 'outer; } } @@ -2879,11 +2937,7 @@ impl Timeline { }; cont_lsn = lsn_floor; // metrics: open_layer does not count as fs access, so we are not updating `read_count` - traversal_path.push(( - result, - cont_lsn, - Box::new(move || frozen_layer.traversal_id()), - )); + traversal_path.push((result, cont_lsn, frozen_layer.traversal_id())); continue 'outer; } } @@ -2904,14 +2958,7 @@ impl Timeline { }; cont_lsn = lsn_floor; *read_count += 1; - traversal_path.push(( - result, - cont_lsn, - Box::new({ - let layer = layer.to_owned(); - move || layer.traversal_id() - }), - )); + traversal_path.push((result, cont_lsn, layer.traversal_id())); continue 'outer; } else if timeline.ancestor_timeline.is_some() { // Nothing on this timeline. Traverse to parent @@ -4656,35 +4703,7 @@ impl Timeline { } } -type TraversalPathItem = ( - ValueReconstructResult, - Lsn, - Box TraversalId>, -); - -/// Helper function for get_reconstruct_data() to add the path of layers traversed -/// to an error, as anyhow context information. -fn layer_traversal_error(msg: String, path: Vec) -> PageReconstructError { - // We want the original 'msg' to be the outermost context. The outermost context - // is the most high-level information, which also gets propagated to the client. - let mut msg_iter = path - .into_iter() - .map(|(r, c, l)| { - format!( - "layer traversal: result {:?}, cont_lsn {}, layer: {}", - r, - c, - l(), - ) - }) - .chain(std::iter::once(msg)); - // Construct initial message from the first traversed layer - let err = anyhow!(msg_iter.next().unwrap()); - - // Append all subsequent traversals, and the error message 'msg', as contexts. - let msg = msg_iter.fold(err, |err, msg| err.context(msg)); - PageReconstructError::from(msg) -} +type TraversalPathItem = (ValueReconstructResult, Lsn, TraversalId); struct TimelineWriterState { open_layer: Arc,