From d672e44eee7740277b670fc7e4667c9f0ea04355 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 10 Nov 2023 13:58:18 +0000 Subject: [PATCH] pageserver: error type for collect_keyspace (#5846) ## Problem This is a log hygiene fix, for an occasional test failure. warn-level logging in imitate_timeline_cached_layer_accesses can't distinguish actual errors from shutdown cases. ## Summary of changes Replaced anyhow::Error with an explicit CollectKeySpaceError type, that includes conversion from PageReconstructError::Cancelled. --- pageserver/src/http/routes.rs | 2 +- pageserver/src/pgdatadir_mapping.rs | 30 +++++++++++++++---- .../src/tenant/timeline/eviction_task.rs | 14 +++++++-- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 63016042cf..2915178104 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -1478,7 +1478,7 @@ async fn timeline_collect_keyspace( let keys = timeline .collect_keyspace(at_lsn, &ctx) .await - .map_err(ApiError::InternalServerError)?; + .map_err(|e| ApiError::InternalServerError(e.into()))?; json_response(StatusCode::OK, Partitioning { keys, at_lsn }) } diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 09feba9b68..abdb4a8379 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -22,6 +22,7 @@ use std::collections::{hash_map, HashMap, HashSet}; use std::ops::ControlFlow; use std::ops::Range; use tracing::{debug, trace, warn}; +use utils::bin_ser::DeserializeError; use utils::{bin_ser::BeSer, lsn::Lsn}; /// Block number within a relation or SLRU. This matches PostgreSQL's BlockNumber type. @@ -67,6 +68,25 @@ pub enum CalculateLogicalSizeError { Other(#[from] anyhow::Error), } +#[derive(Debug, thiserror::Error)] +pub(crate) enum CollectKeySpaceError { + #[error(transparent)] + Decode(#[from] DeserializeError), + #[error(transparent)] + PageRead(PageReconstructError), + #[error("cancelled")] + Cancelled, +} + +impl From for CollectKeySpaceError { + fn from(err: PageReconstructError) -> Self { + match err { + PageReconstructError::Cancelled => Self::Cancelled, + err => Self::PageRead(err), + } + } +} + impl From for CalculateLogicalSizeError { fn from(pre: PageReconstructError) -> Self { match pre { @@ -635,11 +655,11 @@ impl Timeline { /// Get a KeySpace that covers all the Keys that are in use at the given LSN. /// Anything that's not listed maybe removed from the underlying storage (from /// that LSN forwards). - pub async fn collect_keyspace( + pub(crate) async fn collect_keyspace( &self, lsn: Lsn, ctx: &RequestContext, - ) -> anyhow::Result { + ) -> Result { // Iterate through key ranges, greedily packing them into partitions let mut result = KeySpaceAccum::new(); @@ -648,7 +668,7 @@ impl Timeline { // Fetch list of database dirs and iterate them let buf = self.get(DBDIR_KEY, lsn, ctx).await?; - let dbdir = DbDirectory::des(&buf).context("deserialization failure")?; + let dbdir = DbDirectory::des(&buf)?; let mut dbs: Vec<(Oid, Oid)> = dbdir.dbdirs.keys().cloned().collect(); dbs.sort_unstable(); @@ -681,7 +701,7 @@ impl Timeline { let slrudir_key = slru_dir_to_key(kind); result.add_key(slrudir_key); let buf = self.get(slrudir_key, lsn, ctx).await?; - let dir = SlruSegmentDirectory::des(&buf).context("deserialization failure")?; + let dir = SlruSegmentDirectory::des(&buf)?; let mut segments: Vec = dir.segments.iter().cloned().collect(); segments.sort_unstable(); for segno in segments { @@ -699,7 +719,7 @@ impl Timeline { // Then pg_twophase result.add_key(TWOPHASEDIR_KEY); let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?; - let twophase_dir = TwoPhaseDirectory::des(&buf).context("deserialization failure")?; + let twophase_dir = TwoPhaseDirectory::des(&buf)?; let mut xids: Vec = twophase_dir.xids.iter().cloned().collect(); xids.sort_unstable(); for xid in xids { diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 183fcb872f..79bc434a2a 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -26,6 +26,7 @@ use tracing::{debug, error, info, info_span, instrument, warn, Instrument}; use crate::{ context::{DownloadBehavior, RequestContext}, + pgdatadir_mapping::CollectKeySpaceError, task_mgr::{self, TaskKind, BACKGROUND_RUNTIME}, tenant::{ config::{EvictionPolicy, EvictionPolicyLayerAccessThreshold}, @@ -397,9 +398,16 @@ impl Timeline { if size.is_err() { // ignore, see above comment } else { - warn!( - "failed to collect keyspace but succeeded in calculating logical size: {e:#}" - ); + match e { + CollectKeySpaceError::Cancelled => { + // Shutting down, ignore + } + err => { + warn!( + "failed to collect keyspace but succeeded in calculating logical size: {err:#}" + ); + } + } } } }