pageserver: add LayerVisibilityHint (#8432)

## Problem

As described in https://github.com/neondatabase/neon/issues/8398, layer
visibility is a new hint that will help us manage disk space more
efficiently.

## Summary of changes

- Introduce LayerVisibilityHint and store it as part of access stats
- Automatically mark a layer visible if it is accessed, or when it is
created.

The impact on the access stats size will be reversed in
https://github.com/neondatabase/neon/pull/8431

This is functionally a no-op change: subsequent PRs will add the logic
that sets layers to Covered, and which uses the layer visibility as an
input to eviction and heatmap generation.

---------

Co-authored-by: Joonas Koivunen <joonas@neon.tech>
This commit is contained in:
John Spray
2024-07-23 15:37:12 +01:00
committed by GitHub
parent 129f348aae
commit 1a4c1eba92
4 changed files with 48 additions and 3 deletions

View File

@@ -458,6 +458,26 @@ pub enum ValueReconstructResult {
Missing,
}
/// Layers contain a hint indicating whether they are likely to be used for reads. This is a hint rather
/// than an authoritative value, so that we do not have to update it synchronously when changing the visibility
/// of layers (for example when creating a branch that makes some previously covered layers visible). It should
/// be used for cache management but not for correctness-critical checks.
#[derive(Default, Debug, Clone)]
pub(crate) enum LayerVisibilityHint {
/// A Visible layer might be read while serving a read, because there is not an image layer between it
/// and a readable LSN (the tip of the branch or a child's branch point)
Visible,
/// A Covered layer probably won't be read right now, but _can_ be read in future if someone creates
/// a branch or ephemeral endpoint at an LSN below the layer that covers this.
#[allow(unused)]
Covered,
/// Calculating layer visibilty requires I/O, so until this has happened layers are loaded
/// in this state. Note that newly written layers may be called Visible immediately, this uninitialized
/// state is for when existing layers are constructed while loading a timeline.
#[default]
Uninitialized,
}
#[derive(Debug)]
pub struct LayerAccessStats(Mutex<LayerAccessStatsLocked>);
@@ -469,6 +489,7 @@ pub struct LayerAccessStats(Mutex<LayerAccessStatsLocked>);
struct LayerAccessStatsLocked {
for_scraping_api: LayerAccessStatsInner,
for_eviction_policy: LayerAccessStatsInner,
visibility: LayerVisibilityHint,
}
impl LayerAccessStatsLocked {
@@ -592,7 +613,13 @@ impl LayerAccessStats {
inner.count_by_access_kind[access_kind] += 1;
inner.task_kind_flag |= ctx.task_kind();
inner.last_accesses.write(this_access);
})
});
// We may access a layer marked as Covered, if a new branch was created that depends on
// this layer, and background updates to layer visibility didn't notice it yet
if !matches!(locked.visibility, LayerVisibilityHint::Visible) {
locked.visibility = LayerVisibilityHint::Visible;
}
}
fn as_api_model(
@@ -694,6 +721,14 @@ impl LayerAccessStats {
(Some(a), Some(r)) => a.when >= r.timestamp,
}
}
pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) {
self.0.lock().unwrap().visibility = visibility;
}
pub(crate) fn visibility(&self) -> LayerVisibilityHint {
self.0.lock().unwrap().visibility.clone()
}
}
/// Get a layer descriptor from a layer.

View File

@@ -250,6 +250,8 @@ impl Layer {
LayerResidenceStatus::Resident,
LayerResidenceEventReason::LayerCreate,
);
// Newly created layers are marked visible by default: the usual case is that they were created to be read.
access_stats.set_visibility(super::LayerVisibilityHint::Visible);
let local_path = local_layer_path(
conf,

View File

@@ -826,9 +826,9 @@ async fn eviction_cancellation_on_drop() {
#[test]
#[cfg(target_arch = "x86_64")]
fn layer_size() {
assert_eq!(std::mem::size_of::<LayerAccessStats>(), 2040);
assert_eq!(std::mem::size_of::<LayerAccessStats>(), 2048);
assert_eq!(std::mem::size_of::<PersistentLayerDesc>(), 104);
assert_eq!(std::mem::size_of::<LayerInner>(), 2344);
assert_eq!(std::mem::size_of::<LayerInner>(), 2352);
// it also has the utf8 path
}

View File

@@ -255,6 +255,14 @@ impl LayerManager {
new_layer.layer_desc().lsn_range
);
// Transfer visibilty hint from old to new layer, since the new layer covers the same key space. This is not guaranteed to
// be accurate (as the new layer may cover a different subset of the key range), but is a sensible default, and prevents
// always marking rewritten layers as visible.
new_layer
.as_ref()
.access_stats()
.set_visibility(old_layer.access_stats().visibility());
// Safety: we may never rewrite the same file in-place. Callers are responsible
// for ensuring that they only rewrite layers after something changes the path,
// such as an increment in the generation number.