From ba4a96fdb1cc08098c57e5b4f75492c5ea30345b Mon Sep 17 00:00:00 2001 From: Arthur Petukhovsky Date: Thu, 6 Apr 2023 20:57:06 +0300 Subject: [PATCH] Eagerly update wal_backup_lsn after each segment offload (#3976) Otherwise it can lag a lot, preventing WAL segments cleanup. Also max wal_backup_lsn on update, pulling it down is pointless. Should help with https://github.com/neondatabase/neon/issues/3957, but will not fix it completely. --- safekeeper/src/timeline.rs | 3 ++- safekeeper/src/wal_backup.rs | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 9dd8a63cf0..8097c863fa 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -674,7 +674,8 @@ impl Timeline { bail!(TimelineError::Cancelled(self.ttid)); } - self.write_shared_state().sk.inmem.backup_lsn = backup_lsn; + let mut state = self.write_shared_state(); + state.sk.inmem.backup_lsn = max(state.sk.inmem.backup_lsn, backup_lsn); // we should check whether to shut down offloader, but this will be done // soon by peer communication anyway. Ok(()) diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs index 798b9abaf3..163ac99be8 100644 --- a/safekeeper/src/wal_backup.rs +++ b/safekeeper/src/wal_backup.rs @@ -323,7 +323,8 @@ impl WalBackupTask { } match backup_lsn_range( - backup_lsn, + &self.timeline, + &mut backup_lsn, commit_lsn, self.wal_seg_size, &self.timeline_dir, @@ -331,13 +332,7 @@ impl WalBackupTask { ) .await { - Ok(backup_lsn_result) => { - backup_lsn = backup_lsn_result; - let res = self.timeline.set_wal_backup_lsn(backup_lsn_result); - if let Err(e) = res { - error!("failed to set wal_backup_lsn: {}", e); - return; - } + Ok(()) => { retry_attempt = 0; } Err(e) => { @@ -354,20 +349,25 @@ impl WalBackupTask { } pub async fn backup_lsn_range( - start_lsn: Lsn, + timeline: &Arc, + backup_lsn: &mut Lsn, end_lsn: Lsn, wal_seg_size: usize, timeline_dir: &Path, workspace_dir: &Path, -) -> Result { - let mut res = start_lsn; +) -> Result<()> { + let start_lsn = *backup_lsn; let segments = get_segments(start_lsn, end_lsn, wal_seg_size); for s in &segments { backup_single_segment(s, timeline_dir, workspace_dir) .await .with_context(|| format!("offloading segno {}", s.seg_no))?; - res = s.end_lsn; + let new_backup_lsn = s.end_lsn; + timeline + .set_wal_backup_lsn(new_backup_lsn) + .context("setting wal_backup_lsn")?; + *backup_lsn = new_backup_lsn; } info!( "offloaded segnos {:?} up to {}, previous backup_lsn {}", @@ -375,7 +375,7 @@ pub async fn backup_lsn_range( end_lsn, start_lsn, ); - Ok(res) + Ok(()) } async fn backup_single_segment(