From e4e444f59fc1ba2c5241acaabd143d2d3cabfb31 Mon Sep 17 00:00:00 2001 From: Arthur Petukhovsky Date: Thu, 6 Jun 2024 18:54:44 +0100 Subject: [PATCH] Remove random sleep in partial backup (#7982) We had a random sleep in the beginning of partial backup task, which was needed for the first partial backup deploy. It helped with gradual upload of segments without causing network overload. Now partial backup is deployed everywhere, so we don't need this random sleep anymore. We also had an issue related to this, in which manager task was not shut down for a long time. The cause of the issue is this random sleep that didn't take timeline cancellation into account, meanwhile manager task waited for partial backup to complete. Fixes https://github.com/neondatabase/neon/issues/7967 --- safekeeper/src/timeline_manager.rs | 3 +++ safekeeper/src/wal_backup_partial.rs | 8 -------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index 7174d843fc..087b988c69 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -213,6 +213,9 @@ pub async fn main_task( } }; + // remove timeline from the broker active set sooner, before waiting for background tasks + tli_broker_active.set(false); + // shutdown background tasks if conf.is_wal_backup_enabled() { wal_backup::update_task(&conf, &tli, false, &last_state, &mut backup_task).await; diff --git a/safekeeper/src/wal_backup_partial.rs b/safekeeper/src/wal_backup_partial.rs index 6c0f35095b..ed5ddb71f5 100644 --- a/safekeeper/src/wal_backup_partial.rs +++ b/safekeeper/src/wal_backup_partial.rs @@ -20,7 +20,6 @@ use camino::Utf8PathBuf; use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI}; -use rand::Rng; use remote_storage::RemotePath; use serde::{Deserialize, Serialize}; @@ -276,13 +275,6 @@ pub async fn main_task(tli: FullAccessTimeline, conf: SafeKeeperConf) { debug!("started"); let await_duration = conf.partial_backup_timeout; - // sleep for random time to avoid thundering herd - { - let randf64 = rand::thread_rng().gen_range(0.0..1.0); - let sleep_duration = await_duration.mul_f64(randf64); - tokio::time::sleep(sleep_duration).await; - } - let (_, persistent_state) = tli.get_state().await; let mut commit_lsn_rx = tli.get_commit_lsn_watch_rx(); let mut flush_lsn_rx = tli.get_term_flush_lsn_watch_rx();