mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
fix(safekeeper): retry if open segment fail (#12757)
## Problem Fix LKB-2632. The safekeeper wal read path does not seem to retry at all. This would cause client read errors on the customer side. ## Summary of changes - Retry on `safekeeper::wal_backup::read_object`. - Note that this only retries on S3 HTTP connection errors. Subsequent reads could fail, and that needs more refactors to make the retry mechanism work across the path. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -12,7 +12,7 @@ use futures::stream::{self, FuturesOrdered};
|
||||
use postgres_ffi::v14::xlog_utils::XLogSegNoOffsetToRecPtr;
|
||||
use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo};
|
||||
use remote_storage::{
|
||||
DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath, StorageMetadata,
|
||||
DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath, StorageMetadata,
|
||||
};
|
||||
use safekeeper_api::models::PeerInfo;
|
||||
use tokio::fs::File;
|
||||
@@ -607,6 +607,9 @@ pub(crate) async fn copy_partial_segment(
|
||||
storage.copy_object(source, destination, &cancel).await
|
||||
}
|
||||
|
||||
const WAL_READ_WARN_THRESHOLD: u32 = 2;
|
||||
const WAL_READ_MAX_RETRIES: u32 = 3;
|
||||
|
||||
pub async fn read_object(
|
||||
storage: &GenericRemoteStorage,
|
||||
file_path: &RemotePath,
|
||||
@@ -620,12 +623,23 @@ pub async fn read_object(
|
||||
byte_start: std::ops::Bound::Included(offset),
|
||||
..Default::default()
|
||||
};
|
||||
let download = storage
|
||||
.download(file_path, &opts, &cancel)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to open WAL segment download stream for remote path {file_path:?}")
|
||||
})?;
|
||||
|
||||
// This retry only solves the connect errors: subsequent reads can still fail as this function returns
|
||||
// a stream.
|
||||
let download = backoff::retry(
|
||||
|| async { storage.download(file_path, &opts, &cancel).await },
|
||||
DownloadError::is_permanent,
|
||||
WAL_READ_WARN_THRESHOLD,
|
||||
WAL_READ_MAX_RETRIES,
|
||||
"download WAL segment",
|
||||
&cancel,
|
||||
)
|
||||
.await
|
||||
.ok_or_else(|| DownloadError::Cancelled)
|
||||
.and_then(|x| x)
|
||||
.with_context(|| {
|
||||
format!("Failed to open WAL segment download stream for remote path {file_path:?}")
|
||||
})?;
|
||||
|
||||
let reader = tokio_util::io::StreamReader::new(download.download_stream);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user