From 6014f15157b3a47f122b814b3cf109e9d1851abd Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 20 Sep 2024 17:07:09 +0100 Subject: [PATCH] pageserver: suppress noisy "layer became visible" logs (#9064) ## Problem When layer visibility was added, an info log was included for the situation where actual access to a layer disagrees with the visibility calculation. This situation is safe, but I was interested in seeing when it happens. The log is pretty high volume, so this PR refines it to fire less often. ## Summary of changes - For cases where accessing non-visible layers is normal, don't log at all. - Extend a unit test to increase confidence that the updates to visibility on access are working as expected - During compaction, only call the visibility calculation routine if some image layers were created: previously, frequent calls resulted in the visibility of layers getting reset every time we passed through create_image_layers. --- pageserver/src/tenant/storage_layer/layer.rs | 29 +++++++++++++++---- .../src/tenant/storage_layer/layer/tests.rs | 9 ++++++ pageserver/src/tenant/timeline.rs | 4 ++- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index b15cd4da39..f0e2ca5c83 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -439,11 +439,30 @@ impl Layer { fn record_access(&self, ctx: &RequestContext) { if self.0.access_stats.record_access(ctx) { - // Visibility was modified to Visible - tracing::info!( - "Layer {} became visible as a result of access", - self.0.desc.key() - ); + // Visibility was modified to Visible: maybe log about this + match ctx.task_kind() { + TaskKind::CalculateSyntheticSize + | TaskKind::GarbageCollector + | TaskKind::MgmtRequest => { + // This situation is expected in code paths do binary searches of the LSN space to resolve + // an LSN to a timestamp, which happens during GC, during GC cutoff calculations in synthetic size, + // and on-demand for certain HTTP API requests. + } + _ => { + // In all other contexts, it is unusual to do I/O involving layers which are not visible at + // some branch tip, so we log the fact that we are accessing something that the visibility + // calculation thought should not be visible. + // + // This case is legal in brief time windows: for example an in-flight getpage request can hold on to a layer object + // which was covered by a concurrent compaction. + tracing::info!( + "Layer {} became visible as a result of access", + self.0.desc.key() + ); + } + } + + // Update the timeline's visible bytes count if let Some(tl) = self.0.timeline.upgrade() { tl.metrics .visible_physical_size_gauge diff --git a/pageserver/src/tenant/storage_layer/layer/tests.rs b/pageserver/src/tenant/storage_layer/layer/tests.rs index 0b9bde4f57..9de70f14ee 100644 --- a/pageserver/src/tenant/storage_layer/layer/tests.rs +++ b/pageserver/src/tenant/storage_layer/layer/tests.rs @@ -1025,6 +1025,15 @@ fn access_stats() { assert_eq!(access_stats.latest_activity(), lowres_time(atime)); access_stats.set_visibility(LayerVisibilityHint::Visible); assert_eq!(access_stats.latest_activity(), lowres_time(atime)); + + // Recording access implicitly makes layer visible, if it wasn't already + let atime = UNIX_EPOCH + Duration::from_secs(2200000000); + access_stats.set_visibility(LayerVisibilityHint::Covered); + assert_eq!(access_stats.visibility(), LayerVisibilityHint::Covered); + assert!(access_stats.record_access_at(atime)); + access_stats.set_visibility(LayerVisibilityHint::Visible); + assert!(!access_stats.record_access_at(atime)); + access_stats.set_visibility(LayerVisibilityHint::Visible); } #[test] diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index f08f5caf95..114a6dd468 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -4316,7 +4316,9 @@ impl Timeline { timer.stop_and_record(); // Creating image layers may have caused some previously visible layers to be covered - self.update_layer_visibility().await?; + if !image_layers.is_empty() { + self.update_layer_visibility().await?; + } Ok(image_layers) }