From 42229aacf60831443d9ec5e2342db34a143f9f1d Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 6 Aug 2024 14:47:01 +0100 Subject: [PATCH] pageserver: fixes for layer visibility metric (#8603) ## Problem In staging, we could see that occasionally tenants were wrapping their pageserver_visible_physical_size metric past zero to 2^64. This is harmless right now, but will matter more later when we start using visible size in things like the /utilization endpoint. ## Summary of changes - Add debug asserts that detect this case. `test_gc_of_remote_layers` works as a reproducer for this issue once the asserts are added. - Tighten up the interface around access_stats so that only Layer can mutate it. - In Layer, wrap calls to `record_access` in code that will update the visible size statistic if the access implicitly marks the layer visible (this was what caused the bug) - In LayerManager::rewrite_layers, use the proper set_visibility layer function instead of directly using access_stats (this is an additional path where metrics could go bad.) - Removed unused instances of LayerAccessStats in DeltaLayer and ImageLayer which I noticed while reviewing the code paths that call record_access. --- pageserver/src/tenant/storage_layer.rs | 14 +++++--- .../src/tenant/storage_layer/delta_layer.rs | 8 +---- .../src/tenant/storage_layer/image_layer.rs | 4 --- pageserver/src/tenant/storage_layer/layer.rs | 35 ++++++++++++++++--- pageserver/src/tenant/timeline.rs | 4 +-- .../src/tenant/timeline/eviction_task.rs | 2 +- .../src/tenant/timeline/layer_manager.rs | 7 ++-- 7 files changed, 46 insertions(+), 28 deletions(-) diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index ab32a6035e..04f89db401 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -539,19 +539,25 @@ impl LayerAccessStats { self.record_residence_event_at(SystemTime::now()) } - pub(crate) fn record_access_at(&self, now: SystemTime) { + fn record_access_at(&self, now: SystemTime) -> bool { let (mut mask, mut value) = Self::to_low_res_timestamp(Self::ATIME_SHIFT, now); // A layer which is accessed must be visible. mask |= 0x1 << Self::VISIBILITY_SHIFT; value |= 0x1 << Self::VISIBILITY_SHIFT; - self.write_bits(mask, value); + let old_bits = self.write_bits(mask, value); + !matches!( + self.decode_visibility(old_bits), + LayerVisibilityHint::Visible + ) } - pub(crate) fn record_access(&self, ctx: &RequestContext) { + /// Returns true if we modified the layer's visibility to set it to Visible implicitly + /// as a result of this access + pub(crate) fn record_access(&self, ctx: &RequestContext) -> bool { if ctx.access_stats_behavior() == AccessStatsBehavior::Skip { - return; + return false; } self.record_access_at(SystemTime::now()) diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index a17dd28547..962faa6796 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -72,10 +72,7 @@ use utils::{ lsn::Lsn, }; -use super::{ - AsLayerDesc, LayerAccessStats, LayerName, PersistentLayerDesc, ResidentLayer, - ValuesReconstructState, -}; +use super::{AsLayerDesc, LayerName, PersistentLayerDesc, ResidentLayer, ValuesReconstructState}; /// /// Header stored in the beginning of the file @@ -200,7 +197,6 @@ impl DeltaKey { pub struct DeltaLayer { path: Utf8PathBuf, pub desc: PersistentLayerDesc, - access_stats: LayerAccessStats, inner: OnceCell>, } @@ -299,7 +295,6 @@ impl DeltaLayer { /// not loaded already. /// async fn load(&self, ctx: &RequestContext) -> Result<&Arc> { - self.access_stats.record_access(ctx); // Quick exit if already loaded self.inner .get_or_try_init(|| self.load_inner(ctx)) @@ -350,7 +345,6 @@ impl DeltaLayer { summary.lsn_range, metadata.len(), ), - access_stats: Default::default(), inner: OnceCell::new(), }) } diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index b2173455ab..16ba0fda94 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -32,7 +32,6 @@ use crate::tenant::block_io::{BlockBuf, BlockReader, FileBlockReader}; use crate::tenant::disk_btree::{ DiskBtreeBuilder, DiskBtreeIterator, DiskBtreeReader, VisitDirection, }; -use crate::tenant::storage_layer::LayerAccessStats; use crate::tenant::timeline::GetVectoredError; use crate::tenant::vectored_blob_io::{ BlobFlag, MaxVectoredReadBytes, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, @@ -135,7 +134,6 @@ pub struct ImageLayer { pub desc: PersistentLayerDesc, // This entry contains an image of all pages as of this LSN, should be the same as desc.lsn pub lsn: Lsn, - access_stats: LayerAccessStats, inner: OnceCell, } @@ -253,7 +251,6 @@ impl ImageLayer { /// not loaded already. /// async fn load(&self, ctx: &RequestContext) -> Result<&ImageLayerInner> { - self.access_stats.record_access(ctx); self.inner .get_or_try_init(|| self.load_inner(ctx)) .await @@ -304,7 +301,6 @@ impl ImageLayer { metadata.len(), ), // Now we assume image layer ALWAYS covers the full range. This may change in the future. lsn: summary.lsn, - access_stats: Default::default(), inner: OnceCell::new(), }) } diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index cee2fe7342..83450d24bb 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -316,7 +316,7 @@ impl Layer { other => GetVectoredError::Other(anyhow::anyhow!(other)), })?; - self.0.access_stats.record_access(ctx); + self.record_access(ctx); layer .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_data, &self.0, ctx) @@ -396,8 +396,12 @@ impl Layer { self.0.info(reset) } - pub(crate) fn access_stats(&self) -> &LayerAccessStats { - &self.0.access_stats + pub(crate) fn latest_activity(&self) -> SystemTime { + self.0.access_stats.latest_activity() + } + + pub(crate) fn visibility(&self) -> LayerVisibilityHint { + self.0.access_stats.visibility() } pub(crate) fn local_path(&self) -> &Utf8Path { @@ -447,13 +451,31 @@ impl Layer { } } + fn record_access(&self, ctx: &RequestContext) { + if self.0.access_stats.record_access(ctx) { + // Visibility was modified to Visible + tracing::info!( + "Layer {} became visible as a result of access", + self.0.desc.key() + ); + if let Some(tl) = self.0.timeline.upgrade() { + tl.metrics + .visible_physical_size_gauge + .add(self.0.desc.file_size) + } + } + } + pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) { - let old_visibility = self.access_stats().set_visibility(visibility.clone()); + let old_visibility = self.0.access_stats.set_visibility(visibility.clone()); use LayerVisibilityHint::*; match (old_visibility, visibility) { (Visible, Covered) => { // Subtract this layer's contribution to the visible size metric if let Some(tl) = self.0.timeline.upgrade() { + debug_assert!( + tl.metrics.visible_physical_size_gauge.get() >= self.0.desc.file_size + ); tl.metrics .visible_physical_size_gauge .sub(self.0.desc.file_size) @@ -671,6 +693,9 @@ impl Drop for LayerInner { } if matches!(self.access_stats.visibility(), LayerVisibilityHint::Visible) { + debug_assert!( + timeline.metrics.visible_physical_size_gauge.get() >= self.desc.file_size + ); timeline .metrics .visible_physical_size_gauge @@ -1810,7 +1835,7 @@ impl ResidentLayer { // this is valid because the DownloadedLayer::kind is a OnceCell, not a // Mutex, so we cannot go and deinitialize the value with OnceCell::take // while it's being held. - owner.access_stats.record_access(ctx); + self.owner.record_access(ctx); delta_layer::DeltaLayerInner::load_keys(d, ctx) .await diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 5a02fd4a4c..6c67fb9cb6 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2920,7 +2920,7 @@ impl Timeline { let guard = self.layers.read().await; let resident = guard.likely_resident_layers().map(|layer| { - let last_activity_ts = layer.access_stats().latest_activity(); + let last_activity_ts = layer.latest_activity(); HeatMapLayer::new( layer.layer_desc().layer_name(), @@ -5182,7 +5182,7 @@ impl Timeline { let file_size = layer.layer_desc().file_size; max_layer_size = max_layer_size.map_or(Some(file_size), |m| Some(m.max(file_size))); - let last_activity_ts = layer.access_stats().latest_activity(); + let last_activity_ts = layer.latest_activity(); EvictionCandidate { layer: layer.into(), diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index fec66aabc1..1ba1bf9de5 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -225,7 +225,7 @@ impl Timeline { continue; } - let last_activity_ts = layer.access_stats().latest_activity(); + let last_activity_ts = layer.latest_activity(); let no_activity_for = match now.duration_since(last_activity_ts) { Ok(d) => d, diff --git a/pageserver/src/tenant/timeline/layer_manager.rs b/pageserver/src/tenant/timeline/layer_manager.rs index 1bc2acbd34..e6e7bc2e77 100644 --- a/pageserver/src/tenant/timeline/layer_manager.rs +++ b/pageserver/src/tenant/timeline/layer_manager.rs @@ -259,13 +259,10 @@ impl LayerManager { new_layer.layer_desc().lsn_range ); - // Transfer visibilty hint from old to new layer, since the new layer covers the same key space. This is not guaranteed to + // Transfer visibility hint from old to new layer, since the new layer covers the same key space. This is not guaranteed to // be accurate (as the new layer may cover a different subset of the key range), but is a sensible default, and prevents // always marking rewritten layers as visible. - new_layer - .as_ref() - .access_stats() - .set_visibility(old_layer.access_stats().visibility()); + new_layer.as_ref().set_visibility(old_layer.visibility()); // Safety: we may never rewrite the same file in-place. Callers are responsible // for ensuring that they only rewrite layers after something changes the path,