From 700b102b0ffb7447f577a94e3b79b33ff48ba519 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Fri, 25 Oct 2024 17:48:29 +0300 Subject: [PATCH] safekeeper: retry eviction. (#9485) Without this manager may sleep forever after eviction failure without retries. --- safekeeper/src/bin/safekeeper.rs | 2 ++ safekeeper/src/timeline_eviction.rs | 9 +++++---- safekeeper/src/timeline_manager.rs | 7 ++++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 1e5f963a4f..1248428d33 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -193,6 +193,8 @@ struct Args { /// Usually, timeline eviction has to wait for `partial_backup_timeout` before being eligible for eviction, /// but if a timeline is un-evicted and then _not_ written to, it would immediately flap to evicting again, /// if it weren't for `eviction_min_resident` preventing that. + /// + /// Also defines interval for eviction retries. #[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_EVICTION_MIN_RESIDENT)] eviction_min_resident: Duration, } diff --git a/safekeeper/src/timeline_eviction.rs b/safekeeper/src/timeline_eviction.rs index fae6571277..f5363ae9b0 100644 --- a/safekeeper/src/timeline_eviction.rs +++ b/safekeeper/src/timeline_eviction.rs @@ -66,15 +66,15 @@ impl Manager { ready } - /// Evict the timeline to remote storage. + /// Evict the timeline to remote storage. Returns whether the eviction was successful. #[instrument(name = "evict_timeline", skip_all)] - pub(crate) async fn evict_timeline(&mut self) { + pub(crate) async fn evict_timeline(&mut self) -> bool { assert!(!self.is_offloaded); let partial_backup_uploaded = match &self.partial_backup_uploaded { Some(p) => p.clone(), None => { warn!("no partial backup uploaded, skipping eviction"); - return; + return false; } }; @@ -91,11 +91,12 @@ impl Manager { if let Err(e) = do_eviction(self, &partial_backup_uploaded).await { warn!("failed to evict timeline: {:?}", e); - return; + return false; } info!("successfully evicted timeline"); NUM_EVICTED_TIMELINES.inc(); + true } /// Attempt to restore evicted timeline from remote storage; it must be diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index 2129e86baa..f0583dd3ff 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -297,7 +297,12 @@ pub async fn main_task( match mgr.global_rate_limiter.try_acquire_eviction() { Some(_permit) => { mgr.set_status(Status::EvictTimeline); - mgr.evict_timeline().await; + if !mgr.evict_timeline().await { + // eviction failed, try again later + mgr.evict_not_before = + Instant::now() + rand_duration(&mgr.conf.eviction_min_resident); + update_next_event(&mut next_event, mgr.evict_not_before); + } } None => { // we can't evict timeline now, will try again later