Remove random sleep in partial backup (#7982)

We had a random sleep in the beginning of partial backup task, which was needed for the first partial backup deploy. It helped with gradual upload of segments without causing network overload. Now partial backup is deployed everywhere, so we don't need this random sleep anymore. We also had an issue related to this, in which manager task was not shut down for a long time. The cause of the issue is this random sleep that didn't take timeline cancellation into account, meanwhile manager task waited for partial backup to complete. Fixes https://github.com/neondatabase/neon/issues/7967
2026-01-07 21:42:56 +00:00 · 2024-06-06 18:54:44 +01:00
parent d46d19456d
commit e4e444f59f
2 changed files with 3 additions and 8 deletions
--- a/safekeeper/src/timeline_manager.rs
+++ b/safekeeper/src/timeline_manager.rs
@@ -213,6 +213,9 @@ pub async fn main_task(
        }
    };

+    // remove timeline from the broker active set sooner, before waiting for background tasks
+    tli_broker_active.set(false);
+
    // shutdown background tasks
    if conf.is_wal_backup_enabled() {
        wal_backup::update_task(&conf, &tli, false, &last_state, &mut backup_task).await;
--- a/safekeeper/src/wal_backup_partial.rs
+++ b/safekeeper/src/wal_backup_partial.rs
@@ -20,7 +20,6 @@

 use camino::Utf8PathBuf;
 use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI};
-use rand::Rng;
 use remote_storage::RemotePath;
 use serde::{Deserialize, Serialize};

@@ -276,13 +275,6 @@ pub async fn main_task(tli: FullAccessTimeline, conf: SafeKeeperConf) {
    debug!("started");
    let await_duration = conf.partial_backup_timeout;

-    // sleep for random time to avoid thundering herd
-    {
-        let randf64 = rand::thread_rng().gen_range(0.0..1.0);
-        let sleep_duration = await_duration.mul_f64(randf64);
-        tokio::time::sleep(sleep_duration).await;
-    }
-
    let (_, persistent_state) = tli.get_state().await;
    let mut commit_lsn_rx = tli.get_commit_lsn_watch_rx();
    let mut flush_lsn_rx = tli.get_term_flush_lsn_watch_rx();