chore(pageserver): separate missing key error (#7393)

As part of https://github.com/neondatabase/neon/pull/7375 and to improve
the current vectored get implementation, we separate the missing key
error out. This also saves us several Box allocations in the get page
implementation.

## Summary of changes

* Create a caching field of layer traversal id for each of the layer.
* Remove box allocations for layer traversal id retrieval and implement
MissingKey error message as before. This should be a little bit faster.
* Do not format error message until `Display`.
* For in-mem layer, the descriptor is different before/after frozen. I'm
using once lock for that.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z
2024-04-22 15:40:35 +01:00
committed by GitHub
parent 139d1346d5
commit 25d9dc6eaf
6 changed files with 157 additions and 80 deletions

View File

@@ -160,6 +160,9 @@ impl From<PageReconstructError> for ApiError {
fn from(pre: PageReconstructError) -> ApiError {
match pre {
PageReconstructError::Other(pre) => ApiError::InternalServerError(pre),
PageReconstructError::MissingKey(e) => {
ApiError::InternalServerError(anyhow::anyhow!("{e}"))
}
PageReconstructError::Cancelled => {
ApiError::InternalServerError(anyhow::anyhow!("request was cancelled"))
}

View File

@@ -1446,10 +1446,14 @@ impl<'a> DatadirModification<'a> {
// reset the map.
return Err(e.into());
}
// FIXME: PageReconstructError doesn't have an explicit variant for key-not-found, so
// we are assuming that all _other_ possible errors represents a missing key. If some
// other error occurs, we may incorrectly reset the map of aux files.
Err(PageReconstructError::Other(_) | PageReconstructError::WalRedo(_)) => {
// Note: we added missing key error variant in https://github.com/neondatabase/neon/pull/7393 but
// the original code assumes all other errors are missing keys. Therefore, we keep the code path
// the same for now, though in theory, we should only match the `MissingKey` variant.
Err(
PageReconstructError::Other(_)
| PageReconstructError::WalRedo(_)
| PageReconstructError::MissingKey { .. },
) => {
// Key is missing, we must insert an image as the basis for subsequent deltas.
let mut dir = AuxFilesDirectory {

View File

@@ -26,7 +26,7 @@ use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn, vec_map::VecMap};
// while being able to use std::fmt::Write's methods
use crate::metrics::TIMELINE_EPHEMERAL_BYTES;
use std::cmp::Ordering;
use std::fmt::Write as _;
use std::fmt::Write;
use std::ops::Range;
use std::sync::atomic::Ordering as AtomicOrdering;
use std::sync::atomic::{AtomicU64, AtomicUsize};
@@ -54,6 +54,12 @@ pub struct InMemoryLayer {
/// Writes are only allowed when this is `None`.
end_lsn: OnceLock<Lsn>,
/// Used for traversal path. Cached representation of the in-memory layer before frozen.
local_path_str: Arc<str>,
/// Used for traversal path. Cached representation of the in-memory layer after frozen.
frozen_local_path_str: OnceLock<Arc<str>>,
opened_at: Instant,
/// The above fields never change, except for `end_lsn`, which is only set once.
@@ -241,6 +247,12 @@ impl InMemoryLayer {
self.start_lsn..self.end_lsn_or_max()
}
pub(crate) fn local_path_str(&self) -> &Arc<str> {
self.frozen_local_path_str
.get()
.unwrap_or(&self.local_path_str)
}
/// debugging function to print out the contents of the layer
///
/// this is likely completly unused
@@ -430,10 +442,24 @@ impl InMemoryLayer {
}
}
fn inmem_layer_display(mut f: impl Write, start_lsn: Lsn, end_lsn: Lsn) -> std::fmt::Result {
write!(f, "inmem-{:016X}-{:016X}", start_lsn.0, end_lsn.0)
}
fn inmem_layer_log_display(
mut f: impl Write,
timeline: TimelineId,
start_lsn: Lsn,
end_lsn: Lsn,
) -> std::fmt::Result {
write!(f, "timeline {} in-memory ", timeline)?;
inmem_layer_display(f, start_lsn, end_lsn)
}
impl std::fmt::Display for InMemoryLayer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let end_lsn = self.end_lsn_or_max();
write!(f, "inmem-{:016X}-{:016X}", self.start_lsn.0, end_lsn.0)
inmem_layer_display(f, self.start_lsn, end_lsn)
}
}
@@ -458,6 +484,12 @@ impl InMemoryLayer {
Ok(InMemoryLayer {
file_id: key,
local_path_str: {
let mut buf = String::new();
inmem_layer_log_display(&mut buf, timeline_id, start_lsn, Lsn::MAX).unwrap();
buf.into()
},
frozen_local_path_str: OnceLock::new(),
conf,
timeline_id,
tenant_shard_id,
@@ -552,6 +584,15 @@ impl InMemoryLayer {
);
self.end_lsn.set(end_lsn).expect("end_lsn set only once");
self.frozen_local_path_str
.set({
let mut buf = String::new();
inmem_layer_log_display(&mut buf, self.get_timeline_id(), self.start_lsn, end_lsn)
.unwrap();
buf.into()
})
.expect("frozen_local_path_str set only once");
for vec_map in inner.index.values() {
for (lsn, _pos) in vec_map.as_slice() {
assert!(*lsn < end_lsn);

View File

@@ -395,6 +395,10 @@ impl Layer {
&self.0.path
}
pub(crate) fn local_path_str(&self) -> &Arc<str> {
&self.0.path_str
}
pub(crate) fn metadata(&self) -> LayerFileMetadata {
self.0.metadata()
}
@@ -517,6 +521,9 @@ struct LayerInner {
/// Full path to the file; unclear if this should exist anymore.
path: Utf8PathBuf,
/// String representation of the full path, used for traversal id.
path_str: Arc<str>,
desc: PersistentLayerDesc,
/// Timeline access is needed for remote timeline client and metrics.
@@ -722,6 +729,7 @@ impl LayerInner {
LayerInner {
conf,
path_str: path.to_string().into(),
path,
desc,
timeline: Arc::downgrade(timeline),

View File

@@ -818,11 +818,13 @@ async fn eviction_cancellation_on_drop() {
}
}
/// A test case to remind you the cost of these structures. You can bump the size limit
/// below if it is really necessary to add more fields to the structures.
#[test]
fn layer_size() {
assert_eq!(std::mem::size_of::<LayerAccessStats>(), 2040);
assert_eq!(std::mem::size_of::<PersistentLayerDesc>(), 104);
assert_eq!(std::mem::size_of::<LayerInner>(), 2328);
assert_eq!(std::mem::size_of::<LayerInner>(), 2344);
// it also has the utf8 path
}

View File

@@ -23,7 +23,7 @@ use pageserver_api::{
EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, TimelineState,
},
reltag::BlockNumber,
shard::{ShardIdentity, TenantShardId},
shard::{ShardIdentity, ShardNumber, TenantShardId},
};
use rand::Rng;
use serde_with::serde_as;
@@ -428,6 +428,62 @@ pub(crate) enum PageReconstructError {
/// An error happened replaying WAL records
#[error(transparent)]
WalRedo(anyhow::Error),
#[error("{0}")]
MissingKey(MissingKeyError),
}
#[derive(Debug)]
pub struct MissingKeyError {
stuck_at_lsn: bool,
key: Key,
shard: ShardNumber,
cont_lsn: Lsn,
request_lsn: Lsn,
ancestor_lsn: Option<Lsn>,
traversal_path: Vec<TraversalPathItem>,
backtrace: Option<std::backtrace::Backtrace>,
}
impl std::fmt::Display for MissingKeyError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.stuck_at_lsn {
// Records are found in this timeline but no image layer or initial delta record was found.
write!(
f,
"could not find layer with more data for key {} (shard {:?}) at LSN {}, request LSN {}",
self.key, self.shard, self.cont_lsn, self.request_lsn
)?;
if let Some(ref ancestor_lsn) = self.ancestor_lsn {
write!(f, ", ancestor {}", ancestor_lsn)?;
}
} else {
// No records in this timeline.
write!(
f,
"could not find data for key {} (shard {:?}) at LSN {}, for request at LSN {}",
self.key, self.shard, self.cont_lsn, self.request_lsn
)?;
}
if !self.traversal_path.is_empty() {
writeln!(f)?;
}
for (r, c, l) in &self.traversal_path {
writeln!(
f,
"layer traversal: result {:?}, cont_lsn {}, layer: {}",
r, c, l,
)?;
}
if let Some(ref backtrace) = self.backtrace {
write!(f, "\n{}", backtrace)?;
}
Ok(())
}
}
impl PageReconstructError {
@@ -439,6 +495,7 @@ impl PageReconstructError {
AncestorLsnTimeout(_) => false,
Cancelled | AncestorStopping(_) => true,
WalRedo(_) => false,
MissingKey { .. } => false,
}
}
}
@@ -753,7 +810,7 @@ impl Timeline {
writeln!(
msg,
"- layer traversal: result {res:?}, cont_lsn {cont_lsn}, layer: {}",
layer(),
layer,
)
.expect("string grows")
});
@@ -872,9 +929,11 @@ impl Timeline {
Err(Cancelled | AncestorStopping(_)) => {
return Err(GetVectoredError::Cancelled)
}
Err(Other(err)) if err.to_string().contains("could not find data for key") => {
return Err(GetVectoredError::MissingKey(key))
}
// we only capture stuck_at_lsn=false now until we figure out https://github.com/neondatabase/neon/issues/7380
Err(MissingKey(MissingKeyError {
stuck_at_lsn: false,
..
})) => return Err(GetVectoredError::MissingKey(key)),
_ => {
values.insert(key, block);
key = key.next();
@@ -2692,7 +2751,7 @@ impl Timeline {
}
}
type TraversalId = String;
type TraversalId = Arc<str>;
trait TraversalLayerExt {
fn traversal_id(&self) -> TraversalId;
@@ -2700,13 +2759,13 @@ trait TraversalLayerExt {
impl TraversalLayerExt for Layer {
fn traversal_id(&self) -> TraversalId {
self.local_path().to_string()
Arc::clone(self.local_path_str())
}
}
impl TraversalLayerExt for Arc<InMemoryLayer> {
fn traversal_id(&self) -> TraversalId {
format!("timeline {} in-memory {self}", self.get_timeline_id())
Arc::clone(self.local_path_str())
}
}
@@ -2775,32 +2834,35 @@ impl Timeline {
if prev <= cont_lsn {
// Didn't make any progress in last iteration. Error out to avoid
// getting stuck in the loop.
return Err(layer_traversal_error(format!(
"could not find layer with more data for key {} at LSN {}, request LSN {}, ancestor {}",
return Err(PageReconstructError::MissingKey(MissingKeyError {
stuck_at_lsn: true,
key,
Lsn(cont_lsn.0 - 1),
shard: self.shard_identity.get_shard_number(&key),
cont_lsn: Lsn(cont_lsn.0 - 1),
request_lsn,
timeline.ancestor_lsn
), traversal_path));
ancestor_lsn: Some(timeline.ancestor_lsn),
traversal_path,
backtrace: None,
}));
}
}
prev_lsn = Some(cont_lsn);
}
ValueReconstructResult::Missing => {
return Err(layer_traversal_error(
if cfg!(test) {
format!(
"could not find data for key {} (shard {:?}) at LSN {}, for request at LSN {}\n{}",
key, self.shard_identity.get_shard_number(&key), cont_lsn, request_lsn, std::backtrace::Backtrace::force_capture(),
)
} else {
format!(
"could not find data for key {} (shard {:?}) at LSN {}, for request at LSN {}",
key, self.shard_identity.get_shard_number(&key), cont_lsn, request_lsn
)
},
return Err(PageReconstructError::MissingKey(MissingKeyError {
stuck_at_lsn: false,
key,
shard: self.shard_identity.get_shard_number(&key),
cont_lsn,
request_lsn,
ancestor_lsn: None,
traversal_path,
));
backtrace: if cfg!(test) {
Some(std::backtrace::Backtrace::force_capture())
} else {
None
},
}));
}
}
@@ -2848,11 +2910,7 @@ impl Timeline {
};
cont_lsn = lsn_floor;
// metrics: open_layer does not count as fs access, so we are not updating `read_count`
traversal_path.push((
result,
cont_lsn,
Box::new(move || open_layer.traversal_id()),
));
traversal_path.push((result, cont_lsn, open_layer.traversal_id()));
continue 'outer;
}
}
@@ -2879,11 +2937,7 @@ impl Timeline {
};
cont_lsn = lsn_floor;
// metrics: open_layer does not count as fs access, so we are not updating `read_count`
traversal_path.push((
result,
cont_lsn,
Box::new(move || frozen_layer.traversal_id()),
));
traversal_path.push((result, cont_lsn, frozen_layer.traversal_id()));
continue 'outer;
}
}
@@ -2904,14 +2958,7 @@ impl Timeline {
};
cont_lsn = lsn_floor;
*read_count += 1;
traversal_path.push((
result,
cont_lsn,
Box::new({
let layer = layer.to_owned();
move || layer.traversal_id()
}),
));
traversal_path.push((result, cont_lsn, layer.traversal_id()));
continue 'outer;
} else if timeline.ancestor_timeline.is_some() {
// Nothing on this timeline. Traverse to parent
@@ -4656,35 +4703,7 @@ impl Timeline {
}
}
type TraversalPathItem = (
ValueReconstructResult,
Lsn,
Box<dyn Send + FnOnce() -> TraversalId>,
);
/// Helper function for get_reconstruct_data() to add the path of layers traversed
/// to an error, as anyhow context information.
fn layer_traversal_error(msg: String, path: Vec<TraversalPathItem>) -> PageReconstructError {
// We want the original 'msg' to be the outermost context. The outermost context
// is the most high-level information, which also gets propagated to the client.
let mut msg_iter = path
.into_iter()
.map(|(r, c, l)| {
format!(
"layer traversal: result {:?}, cont_lsn {}, layer: {}",
r,
c,
l(),
)
})
.chain(std::iter::once(msg));
// Construct initial message from the first traversed layer
let err = anyhow!(msg_iter.next().unwrap());
// Append all subsequent traversals, and the error message 'msg', as contexts.
let msg = msg_iter.fold(err, |err, msg| err.context(msg));
PageReconstructError::from(msg)
}
type TraversalPathItem = (ValueReconstructResult, Lsn, TraversalId);
struct TimelineWriterState {
open_layer: Arc<InMemoryLayer>,