From d3a97fdf881e69636f29f462efb0a3b799b854c8 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 14 Aug 2023 10:18:22 +0100 Subject: [PATCH] pageserver: avoid incrementing access time when reading layers for compaction (#4971) ## Problem Currently, image generation reads delta layers before writing out subsequent image layers, which updates the access time of the delta layers and effectively puts them at the back of the queue for eviction. This is the opposite of what we want, because after a delta layer is covered by a later image layer, it's likely that subsequent reads of latest data will hit the image rather than the delta layer, so the delta layer should be quite a good candidate for eviction. ## Summary of changes `RequestContext` gets a new `ATimeBehavior` field, and a `RequestContextBuilder` helper so that we can optionally add the new field without growing `RequestContext::new` every time we add something like this. Request context is passed into the `record_access` function, and the access time is not updated if `ATimeBehavior::Skip` is set. The compaction background task constructs its request context with this skip policy. Closes: https://github.com/neondatabase/neon/issues/4969 --- pageserver/src/context.rs | 79 +++++++++++++++++-- pageserver/src/tenant/storage_layer.rs | 12 ++- .../src/tenant/storage_layer/delta_layer.rs | 3 +- .../src/tenant/storage_layer/image_layer.rs | 3 +- pageserver/src/tenant/timeline.rs | 11 ++- 5 files changed, 90 insertions(+), 18 deletions(-) diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index a1a5c30ae9..2953208d1e 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -85,6 +85,7 @@ //! The solution is that all code paths are infected with precisely one //! [`RequestContext`] argument. Functions in the middle of the call chain //! only need to pass it on. + use crate::task_mgr::TaskKind; // The main structure of this module, see module-level comment. @@ -92,6 +93,7 @@ use crate::task_mgr::TaskKind; pub struct RequestContext { task_kind: TaskKind, download_behavior: DownloadBehavior, + access_stats_behavior: AccessStatsBehavior, } /// Desired behavior if the operation requires an on-demand download @@ -109,6 +111,67 @@ pub enum DownloadBehavior { Error, } +/// Whether this request should update access times used in LRU eviction +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub(crate) enum AccessStatsBehavior { + /// Update access times: this request's access to data should be taken + /// as a hint that the accessed layer is likely to be accessed again + Update, + + /// Do not update access times: this request is accessing the layer + /// but does not want to indicate that the layer should be retained in cache, + /// perhaps because the requestor is a compaction routine that will soon cover + /// this layer with another. + Skip, +} + +pub struct RequestContextBuilder { + inner: RequestContext, +} + +impl RequestContextBuilder { + /// A new builder with default settings + pub fn new(task_kind: TaskKind) -> Self { + Self { + inner: RequestContext { + task_kind, + download_behavior: DownloadBehavior::Download, + access_stats_behavior: AccessStatsBehavior::Update, + }, + } + } + + pub fn extend(original: &RequestContext) -> Self { + Self { + // This is like a Copy, but avoid implementing Copy because ordinary users of + // RequestContext should always move or ref it. + inner: RequestContext { + task_kind: original.task_kind, + download_behavior: original.download_behavior, + access_stats_behavior: original.access_stats_behavior, + }, + } + } + + /// Configure the DownloadBehavior of the context: whether to + /// download missing layers, and/or warn on the download. + pub fn download_behavior(mut self, b: DownloadBehavior) -> Self { + self.inner.download_behavior = b; + self + } + + /// Configure the AccessStatsBehavior of the context: whether layer + /// accesses should update the access time of the layer. + pub(crate) fn access_stats_behavior(mut self, b: AccessStatsBehavior) -> Self { + self.inner.access_stats_behavior = b; + self + } + + pub fn build(self) -> RequestContext { + self.inner + } +} + impl RequestContext { /// Create a new RequestContext that has no parent. /// @@ -123,10 +186,9 @@ impl RequestContext { /// because someone explicitly canceled it. /// It has no parent, so it cannot inherit cancellation from there. pub fn new(task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self { - RequestContext { - task_kind, - download_behavior, - } + RequestContextBuilder::new(task_kind) + .download_behavior(download_behavior) + .build() } /// Create a detached child context for a task that may outlive `self`. @@ -187,10 +249,7 @@ impl RequestContext { } fn child_impl(&self, task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self { - RequestContext { - task_kind, - download_behavior, - } + Self::new(task_kind, download_behavior) } pub fn task_kind(&self) -> TaskKind { @@ -200,4 +259,8 @@ impl RequestContext { pub fn download_behavior(&self) -> DownloadBehavior { self.download_behavior } + + pub(crate) fn access_stats_behavior(&self) -> AccessStatsBehavior { + self.access_stats_behavior + } } diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index 951fa38d8d..4107161aa1 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -8,7 +8,7 @@ mod layer_desc; mod remote_layer; use crate::config::PageServerConf; -use crate::context::RequestContext; +use crate::context::{AccessStatsBehavior, RequestContext}; use crate::repository::Key; use crate::task_mgr::TaskKind; use crate::walrecord::NeonWalRecord; @@ -241,10 +241,14 @@ impl LayerAccessStats { }); } - fn record_access(&self, access_kind: LayerAccessKind, task_kind: TaskKind) { + fn record_access(&self, access_kind: LayerAccessKind, ctx: &RequestContext) { + if ctx.access_stats_behavior() == AccessStatsBehavior::Skip { + return; + } + let this_access = LayerAccessStatFullDetails { when: SystemTime::now(), - task_kind, + task_kind: ctx.task_kind(), access_kind, }; @@ -252,7 +256,7 @@ impl LayerAccessStats { locked.iter_mut().for_each(|inner| { inner.first_access.get_or_insert(this_access); inner.count_by_access_kind[access_kind] += 1; - inner.task_kind_flag |= task_kind; + inner.task_kind_flag |= ctx.task_kind(); inner.last_accesses.write(this_access); }) } diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 5208fdb7b3..9a7108c4c8 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -452,8 +452,7 @@ impl DeltaLayer { access_kind: LayerAccessKind, ctx: &RequestContext, ) -> Result<&Arc> { - self.access_stats - .record_access(access_kind, ctx.task_kind()); + self.access_stats.record_access(access_kind, ctx); // Quick exit if already loaded self.inner .get_or_try_init(|| self.load_inner()) diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 2824abba75..2d5ef402ae 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -323,8 +323,7 @@ impl ImageLayer { access_kind: LayerAccessKind, ctx: &RequestContext, ) -> Result<&ImageLayerInner> { - self.access_stats - .record_access(access_kind, ctx.task_kind()); + self.access_stats.record_access(access_kind, ctx); self.inner .get_or_try_init(|| self.load_inner()) .await diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 2a5db0516a..95e86c6bcc 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -35,7 +35,9 @@ use std::sync::atomic::Ordering as AtomicOrdering; use std::sync::{Arc, Mutex, RwLock, Weak}; use std::time::{Duration, Instant, SystemTime}; -use crate::context::{DownloadBehavior, RequestContext}; +use crate::context::{ + AccessStatsBehavior, DownloadBehavior, RequestContext, RequestContextBuilder, +}; use crate::tenant::remote_timeline_client::{self, index::LayerFileMetadata}; use crate::tenant::storage_layer::{ DeltaFileName, DeltaLayerWriter, ImageFileName, ImageLayerWriter, InMemoryLayer, @@ -799,10 +801,15 @@ impl Timeline { .await { Ok((partitioning, lsn)) => { + // Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them + let image_ctx = RequestContextBuilder::extend(ctx) + .access_stats_behavior(AccessStatsBehavior::Skip) + .build(); + // 2. Create new image layers for partitions that have been modified // "enough". let layer_paths_to_upload = self - .create_image_layers(&partitioning, lsn, false, ctx) + .create_image_layers(&partitioning, lsn, false, &image_ctx) .await .map_err(anyhow::Error::from)?; if let Some(remote_client) = &self.remote_client {