pageserver: add LayerVisibilityHint (#8432)

## Problem As described in https://github.com/neondatabase/neon/issues/8398, layer visibility is a new hint that will help us manage disk space more efficiently. ## Summary of changes - Introduce LayerVisibilityHint and store it as part of access stats - Automatically mark a layer visible if it is accessed, or when it is created. The impact on the access stats size will be reversed in https://github.com/neondatabase/neon/pull/8431 This is functionally a no-op change: subsequent PRs will add the logic that sets layers to Covered, and which uses the layer visibility as an input to eviction and heatmap generation. --------- Co-authored-by: Joonas Koivunen <joonas@neon.tech>
2026-01-08 22:12:56 +00:00 · 2024-07-23 15:37:12 +01:00
parent 129f348aae
commit 1a4c1eba92
4 changed files with 48 additions and 3 deletions
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -458,6 +458,26 @@ pub enum ValueReconstructResult {
    Missing,
 }

+/// Layers contain a hint indicating whether they are likely to be used for reads.  This is a hint rather
+/// than an authoritative value, so that we do not have to update it synchronously when changing the visibility
+/// of layers (for example when creating a branch that makes some previously covered layers visible).  It should
+/// be used for cache management but not for correctness-critical checks.
+#[derive(Default, Debug, Clone)]
+pub(crate) enum LayerVisibilityHint {
+    /// A Visible layer might be read while serving a read, because there is not an image layer between it
+    /// and a readable LSN (the tip of the branch or a child's branch point)
+    Visible,
+    /// A Covered layer probably won't be read right now, but _can_ be read in future if someone creates
+    /// a branch or ephemeral endpoint at an LSN below the layer that covers this.
+    #[allow(unused)]
+    Covered,
+    /// Calculating layer visibilty requires I/O, so until this has happened layers are loaded
+    /// in this state.  Note that newly written layers may be called Visible immediately, this uninitialized
+    /// state is for when existing layers are constructed while loading a timeline.
+    #[default]
+    Uninitialized,
+}
+
 #[derive(Debug)]
 pub struct LayerAccessStats(Mutex<LayerAccessStatsLocked>);

@@ -469,6 +489,7 @@ pub struct LayerAccessStats(Mutex<LayerAccessStatsLocked>);
 struct LayerAccessStatsLocked {
    for_scraping_api: LayerAccessStatsInner,
    for_eviction_policy: LayerAccessStatsInner,
+    visibility: LayerVisibilityHint,
 }

 impl LayerAccessStatsLocked {
@@ -592,7 +613,13 @@ impl LayerAccessStats {
            inner.count_by_access_kind[access_kind] += 1;
            inner.task_kind_flag |= ctx.task_kind();
            inner.last_accesses.write(this_access);
-        })
+        });
+
+        // We may access a layer marked as Covered, if a new branch was created that depends on
+        // this layer, and background updates to layer visibility didn't notice it yet
+        if !matches!(locked.visibility, LayerVisibilityHint::Visible) {
+            locked.visibility = LayerVisibilityHint::Visible;
+        }
    }

    fn as_api_model(
@@ -694,6 +721,14 @@ impl LayerAccessStats {
            (Some(a), Some(r)) => a.when >= r.timestamp,
        }
    }
+
+    pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) {
+        self.0.lock().unwrap().visibility = visibility;
+    }
+
+    pub(crate) fn visibility(&self) -> LayerVisibilityHint {
+        self.0.lock().unwrap().visibility.clone()
+    }
 }

 /// Get a layer descriptor from a layer.
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -250,6 +250,8 @@ impl Layer {
                LayerResidenceStatus::Resident,
                LayerResidenceEventReason::LayerCreate,
            );
+            // Newly created layers are marked visible by default: the usual case is that they were created to be read.
+            access_stats.set_visibility(super::LayerVisibilityHint::Visible);

            let local_path = local_layer_path(
                conf,
--- a/pageserver/src/tenant/storage_layer/layer/tests.rs
+++ b/pageserver/src/tenant/storage_layer/layer/tests.rs
@@ -826,9 +826,9 @@ async fn eviction_cancellation_on_drop() {
 #[test]
 #[cfg(target_arch = "x86_64")]
 fn layer_size() {
-    assert_eq!(std::mem::size_of::<LayerAccessStats>(), 2040);
+    assert_eq!(std::mem::size_of::<LayerAccessStats>(), 2048);
    assert_eq!(std::mem::size_of::<PersistentLayerDesc>(), 104);
-    assert_eq!(std::mem::size_of::<LayerInner>(), 2344);
+    assert_eq!(std::mem::size_of::<LayerInner>(), 2352);
    // it also has the utf8 path
 }

--- a/pageserver/src/tenant/timeline/layer_manager.rs
+++ b/pageserver/src/tenant/timeline/layer_manager.rs
@@ -255,6 +255,14 @@ impl LayerManager {
                new_layer.layer_desc().lsn_range
            );

+            // Transfer visibilty hint from old to new layer, since the new layer covers the same key space.  This is not guaranteed to
+            // be accurate (as the new layer may cover a different subset of the key range), but is a sensible default, and prevents
+            // always marking rewritten layers as visible.
+            new_layer
+                .as_ref()
+                .access_stats()
+                .set_visibility(old_layer.access_stats().visibility());
+
            // Safety: we may never rewrite the same file in-place.  Callers are responsible
            // for ensuring that they only rewrite layers after something changes the path,
            // such as an increment in the generation number.