diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs index 8517fa0344..2f6b91cf47 100644 --- a/safekeeper/src/wal_backup.rs +++ b/safekeeper/src/wal_backup.rs @@ -310,9 +310,12 @@ impl WalBackupTask { retry_attempt = 0; } Err(e) => { + // We might have managed to upload some segment even though + // some later in the range failed, so log backup_lsn + // separately. error!( - "failed while offloading range {}-{}: {:?}", - backup_lsn, commit_lsn, e + "failed while offloading range {}-{}, backup_lsn {}: {:?}", + backup_lsn, commit_lsn, backup_lsn, e ); retry_attempt = retry_attempt.saturating_add(1); @@ -338,6 +341,13 @@ async fn backup_lsn_range( let start_lsn = *backup_lsn; let segments = get_segments(start_lsn, end_lsn, wal_seg_size); + info!( + "offloading segnos {:?} of range [{}-{})", + segments.iter().map(|&s| s.seg_no).collect::>(), + start_lsn, + end_lsn, + ); + // Pool of concurrent upload tasks. We use `FuturesOrdered` to // preserve order of uploads, and update `backup_lsn` only after // all previous uploads are finished. @@ -374,10 +384,10 @@ async fn backup_lsn_range( } info!( - "offloaded segnos {:?} up to {}, previous backup_lsn {}", + "offloaded segnos {:?} of range [{}-{})", segments.iter().map(|&s| s.seg_no).collect::>(), - end_lsn, start_lsn, + end_lsn, ); Ok(()) } diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 2b6a267bdf..21b2ad479c 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -566,10 +566,14 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder): assert_prefix_empty(neon_env_builder.safekeepers_remote_storage, prefix) -def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder): +# This test is flaky, probably because PUTs of local fs storage are not atomic. +# Let's keep both remote storage kinds for a while to see if this is the case. +# https://github.com/neondatabase/neon/issues/10761 +@pytest.mark.parametrize("remote_storage_kind", [s3_storage(), RemoteStorageKind.LOCAL_FS]) +def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind): neon_env_builder.num_safekeepers = 3 - neon_env_builder.enable_safekeeper_remote_storage(default_remote_storage()) + neon_env_builder.enable_safekeeper_remote_storage(remote_storage_kind) env = neon_env_builder.init_start() tenant_id = env.initial_tenant