From fe156245708525d87bd3682595a3383e389efc65 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Mon, 27 Mar 2023 13:33:40 +0200 Subject: [PATCH] eviction_task: only refresh layer accesses once per p.threshold (#3877) Without this, we run it every p.period, which can be quite low. For example, the running experiment with 3000 tenants in prod uses a period of 1 minute. Doing it once per p.threshold is enough to prevent eviction. --- pageserver/src/tenant/timeline.rs | 8 ++++++++ pageserver/src/tenant/timeline/eviction_task.rs | 15 ++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 5fde1a77e0..dfa0e842f1 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -71,6 +71,8 @@ use crate::ZERO_PAGE; use crate::{is_temporary, task_mgr}; use walreceiver::spawn_connection_manager_task; +use self::eviction_task::EvictionTaskTimelineState; + use super::layer_map::BatchedUpdates; use super::remote_timeline_client::index::IndexPart; use super::remote_timeline_client::RemoteTimelineClient; @@ -216,6 +218,8 @@ pub struct Timeline { download_all_remote_layers_task_info: RwLock>, state: watch::Sender, + + eviction_task_timeline_state: tokio::sync::Mutex, } /// Internal structure to hold all data needed for logical size calculation. @@ -1252,6 +1256,10 @@ impl Timeline { download_all_remote_layers_task_info: RwLock::new(None), state, + + eviction_task_timeline_state: tokio::sync::Mutex::new( + EvictionTaskTimelineState::default(), + ), }; result.repartition_threshold = result.get_checkpoint_distance() / 10; result diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 666768ff87..06dfe7a0b9 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -35,6 +35,11 @@ use crate::{ use super::Timeline; +#[derive(Default)] +pub struct EvictionTaskTimelineState { + last_refresh_required_in_restart: Option, +} + impl Timeline { pub(super) fn launch_eviction_task(self: &Arc) { let self_clone = Arc::clone(self); @@ -139,7 +144,15 @@ impl Timeline { // for active tenants this will likely materialized page cache or in-memory layers. for // inactive tenants it will refresh the last_access timestamps so that we will not evict // and re-download on restart these layers. - self.refresh_layers_required_in_restart(cancel, ctx).await; + let mut state = self.eviction_task_timeline_state.lock().await; + match state.last_refresh_required_in_restart { + Some(ts) if ts.elapsed() < p.threshold => { /* no need to run */ } + _ => { + self.refresh_layers_required_in_restart(cancel, ctx).await; + state.last_refresh_required_in_restart = Some(tokio::time::Instant::now()) + } + } + drop(state); if cancel.is_cancelled() { return ControlFlow::Break(());