From 0da4046704d5c5f100a81915e68098f7c8e486f7 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 19 May 2022 00:53:28 +0300 Subject: [PATCH] Include traversal path in error message. Previously, the path was printed to the log with separate error!() calls. It's better to include the whole path in the error object and have it printed to the log as one message. Also print the path in the ValueReconstructResult::Missing case. This is what it looks like now: 2022-05-17T21:53:53.611801Z ERROR pagestream{timeline=5adcb4af3e95f00a31550d266aab7a37 tenant=74d9f9ad3293c030c6a6e196dd91c60f}: error reading relation or page version: could not find data for key 000000067F000032BE000000000000000001 at LSN 0/1698C48, for request at LSN 0/1698CF8 Caused by: 0: layer traversal: result Complete, cont_lsn 0/1698C48, layer: 000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001698C48-0000000001698CC1 1: layer traversal: result Continue, cont_lsn 0/1698CC1, layer: inmem-0000000001698CC1-FFFFFFFFFFFFFFFF Stack backtrace: --- pageserver/src/layered_repository.rs | 72 ++++++++++++++++++---------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index bad2e32cc2..79e66e5f17 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -1357,7 +1357,9 @@ impl LayeredTimeline { let mut timeline_owned; let mut timeline = self; - let mut path: Vec<(ValueReconstructResult, Lsn, Arc)> = Vec::new(); + // For debugging purposes, collect the path of layers that we traversed + // through. It's included in the error message if we fail to find the key. + let mut traversal_path: Vec<(ValueReconstructResult, Lsn, Arc)> = Vec::new(); let cached_lsn = if let Some((cached_lsn, _)) = &reconstruct_state.img { *cached_lsn @@ -1387,32 +1389,24 @@ impl LayeredTimeline { if prev_lsn <= cont_lsn { // Didn't make any progress in last iteration. Error out to avoid // getting stuck in the loop. - - // For debugging purposes, print the path of layers that we traversed - // through. - for (r, c, l) in path { - error!( - "PATH: result {:?}, cont_lsn {}, layer: {}", - r, - c, - l.filename().display() - ); - } - bail!("could not find layer with more data for key {} at LSN {}, request LSN {}, ancestor {}", - key, - Lsn(cont_lsn.0 - 1), - request_lsn, - timeline.ancestor_lsn) + return layer_traversal_error(format!( + "could not find layer with more data for key {} at LSN {}, request LSN {}, ancestor {}", + key, + Lsn(cont_lsn.0 - 1), + request_lsn, + timeline.ancestor_lsn + ), traversal_path); } prev_lsn = cont_lsn; } ValueReconstructResult::Missing => { - bail!( - "could not find data for key {} at LSN {}, for request at LSN {}", - key, - cont_lsn, - request_lsn - ) + return layer_traversal_error( + format!( + "could not find data for key {} at LSN {}, for request at LSN {}", + key, cont_lsn, request_lsn + ), + traversal_path, + ); } } @@ -1447,7 +1441,7 @@ impl LayeredTimeline { reconstruct_state, )?; cont_lsn = lsn_floor; - path.push((result, cont_lsn, open_layer.clone())); + traversal_path.push((result, cont_lsn, open_layer.clone())); continue; } } @@ -1462,7 +1456,7 @@ impl LayeredTimeline { reconstruct_state, )?; cont_lsn = lsn_floor; - path.push((result, cont_lsn, frozen_layer.clone())); + traversal_path.push((result, cont_lsn, frozen_layer.clone())); continue 'outer; } } @@ -1477,7 +1471,7 @@ impl LayeredTimeline { reconstruct_state, )?; cont_lsn = lsn_floor; - path.push((result, cont_lsn, layer)); + traversal_path.push((result, cont_lsn, layer)); } else if timeline.ancestor_timeline.is_some() { // Nothing on this timeline. Traverse to parent result = ValueReconstructResult::Continue; @@ -2375,6 +2369,32 @@ impl LayeredTimeline { } } +/// Helper function for get_reconstruct_data() to add the path of layers traversed +/// to an error, as anyhow context information. +fn layer_traversal_error( + msg: String, + path: Vec<(ValueReconstructResult, Lsn, Arc)>, +) -> anyhow::Result<()> { + // We want the original 'msg' to be the outermost context. The outermost context + // is the most high-level information, which also gets propagated to the client. + let mut msg_iter = path + .iter() + .map(|(r, c, l)| { + format!( + "layer traversal: result {:?}, cont_lsn {}, layer: {}", + r, + c, + l.filename().display() + ) + }) + .chain(std::iter::once(msg)); + // Construct initial message from the first traversed layer + let err = anyhow!(msg_iter.next().unwrap()); + + // Append all subsequent traversals, and the error message 'msg', as contexts. + Err(msg_iter.fold(err, |err, msg| err.context(msg))) +} + struct LayeredTimelineWriter<'a> { tl: &'a LayeredTimeline, _write_guard: MutexGuard<'a, ()>,