From 8ceb4f0a6994849524c5091ee374db94b7f49eb9 Mon Sep 17 00:00:00 2001 From: Arthur Petukhovsky Date: Fri, 5 Apr 2024 12:48:08 +0200 Subject: [PATCH] Fix partial zero segment upload (#7318) Found these logs on staging safekeepers: ``` INFO Partial backup{ttid=X/Y}: failed to upload 000000010000000000000000_173_0000000000000000_0000000000000000_sk56.partial: Failed to open file "/storage/safekeeper/data/X/Y/000000010000000000000000.partial" for wal backup: No such file or directory (os error 2) INFO Partial backup{ttid=X/Y}:upload{name=000000010000000000000000_173_0000000000000000_0000000000000000_sk56.partial}: starting upload PartialRemoteSegment { status: InProgress, name: "000000010000000000000000_173_0000000000000000_0000000000000000_sk56.partial", commit_lsn: 0/0, flush_lsn: 0/0, term: 173 } ``` This is because partial backup tries to upload zero segment when there is no data in timeline. This PR fixes this bug introduced in #6530. --- safekeeper/src/wal_backup_partial.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/safekeeper/src/wal_backup_partial.rs b/safekeeper/src/wal_backup_partial.rs index a535c814ea..200096ac5c 100644 --- a/safekeeper/src/wal_backup_partial.rs +++ b/safekeeper/src/wal_backup_partial.rs @@ -337,6 +337,17 @@ pub async fn main_task(tli: Arc, conf: SafeKeeperConf) { } } + // if we don't have any data and zero LSNs, wait for something + while flush_lsn_rx.borrow().lsn == Lsn(0) { + tokio::select! { + _ = cancellation_rx.changed() => { + info!("timeline canceled"); + return; + } + _ = flush_lsn_rx.changed() => {} + } + } + // fixing the segno and waiting some time to prevent reuploading the same segment too often let pending_segno = backup.segno(flush_lsn_rx.borrow().lsn); let timeout = tokio::time::sleep(await_duration);