Split out retry handler to separate module

This commit is contained in:
Erik Grinaker
2025-07-04 20:20:09 +02:00
parent 00affada26
commit f6cc5cbd0c
3 changed files with 189 additions and 145 deletions

View File

@@ -3,16 +3,15 @@ use std::sync::Arc;
use anyhow::anyhow;
use futures::stream::FuturesUnordered;
use futures::{FutureExt as _, StreamExt};
use tokio::time::Instant;
use tracing::{error, info, instrument, warn};
use futures::{FutureExt as _, StreamExt as _};
use tracing::instrument;
use crate::pool::{ChannelPool, ClientGuard, ClientPool, StreamGuard, StreamPool};
use crate::retry::Retry;
use crate::split::GetPageSplitter;
use compute_api::spec::PageserverProtocol;
use pageserver_api::shard::ShardStripeSize;
use pageserver_page_api as page_api;
use utils::backoff::exponential_backoff_duration;
use utils::id::{TenantId, TimelineId};
use utils::shard::{ShardCount, ShardIndex, ShardNumber};
@@ -31,6 +30,7 @@ use utils::shard::{ShardCount, ShardIndex, ShardNumber};
pub struct PageserverClient {
// TODO: support swapping out the shard map, e.g. via an ArcSwap.
shards: Shards,
retry: Retry,
}
impl PageserverClient {
@@ -44,7 +44,10 @@ impl PageserverClient {
auth_token: Option<String>,
) -> anyhow::Result<Self> {
let shards = Shards::new(tenant_id, timeline_id, shard_map, stripe_size, auth_token)?;
Ok(Self { shards })
Ok(Self {
shards,
retry: Retry,
})
}
/// Returns whether a relation exists.
@@ -53,12 +56,13 @@ impl PageserverClient {
&self,
req: page_api::CheckRelExistsRequest,
) -> tonic::Result<page_api::CheckRelExistsResponse> {
self.with_retries(async || {
// Relation metadata is only available on shard 0.
let mut client = self.shards.get_zero().client().await?;
client.check_rel_exists(req).await
})
.await
self.retry
.with(async || {
// Relation metadata is only available on shard 0.
let mut client = self.shards.get_zero().client().await?;
client.check_rel_exists(req).await
})
.await
}
/// Returns the total size of a database, as # of bytes.
@@ -67,19 +71,20 @@ impl PageserverClient {
&self,
req: page_api::GetDbSizeRequest,
) -> tonic::Result<page_api::GetDbSizeResponse> {
self.with_retries(async || {
// Relation metadata is only available on shard 0.
let mut client = self.shards.get_zero().client().await?;
client.get_db_size(req).await
})
.await
self.retry
.with(async || {
// Relation metadata is only available on shard 0.
let mut client = self.shards.get_zero().client().await?;
client.get_db_size(req).await
})
.await
}
/// Fetches pages. The `request_id` must be unique across all in-flight requests. Will
/// automatically split requests that span multiple shards, and reassemble the responses.
/// Fetches pages. The `request_id` must be unique across all in-flight requests. Automatically
/// splits requests that straddle shard boundaries, and assembles the responses.
///
/// Unlike the `page_api::Client`, this client automatically converts `status_code` into
/// `tonic::Status` errors. All responses will have `GetPageStatusCode::Ok`.
/// Unlike `page_api::Client`, this automatically converts `status_code` into `tonic::Status`
/// errors. All responses will have `GetPageStatusCode::Ok`.
#[instrument(skip_all, fields(
req_id = %req.request_id,
rel = %req.rel,
@@ -134,7 +139,8 @@ impl PageserverClient {
req: page_api::GetPageRequest,
) -> tonic::Result<page_api::GetPageResponse> {
let resp = self
.with_retries(async || {
.retry
.with(async || {
let stream = self.shards.get(shard_id)?.stream().await;
let resp = stream.send(req.clone()).await?;
@@ -168,12 +174,13 @@ impl PageserverClient {
&self,
req: page_api::GetRelSizeRequest,
) -> tonic::Result<page_api::GetRelSizeResponse> {
self.with_retries(async || {
// Relation metadata is only available on shard 0.
let mut client = self.shards.get_zero().client().await?;
client.get_rel_size(req).await
})
.await
self.retry
.with(async || {
// Relation metadata is only available on shard 0.
let mut client = self.shards.get_zero().client().await?;
client.get_rel_size(req).await
})
.await
}
/// Fetches an SLRU segment.
@@ -182,123 +189,13 @@ impl PageserverClient {
&self,
req: page_api::GetSlruSegmentRequest,
) -> tonic::Result<page_api::GetSlruSegmentResponse> {
self.with_retries(async || {
// SLRU segments are only available on shard 0.
let mut client = self.shards.get_zero().client().await?;
client.get_slru_segment(req).await
})
.await
}
/// Runs the given closure with retries (exponential backoff). Logs errors.
async fn with_retries<T, F, O>(&self, mut f: F) -> tonic::Result<T>
where
F: FnMut() -> O,
O: Future<Output = tonic::Result<T>>,
{
// TODO: tune these, and/or make them configurable. Should we retry forever?
const REQUEST_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
const TOTAL_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60);
const BASE_BACKOFF: f64 = 0.1;
const MAX_BACKOFF: f64 = 10.0;
const LOG_SUCCESS: bool = false; // TODO: for debugging
fn should_retry(code: tonic::Code) -> bool {
match code {
tonic::Code::Ok => panic!("unexpected Ok status code"),
// These codes are transient, so retry them.
tonic::Code::Aborted => true,
tonic::Code::Cancelled => true,
tonic::Code::DeadlineExceeded => true, // maybe transient slowness
tonic::Code::Internal => true, // maybe transient failure?
tonic::Code::ResourceExhausted => true,
tonic::Code::Unavailable => true,
// The following codes will like continue to fail, so don't retry.
tonic::Code::AlreadyExists => false,
tonic::Code::DataLoss => false,
tonic::Code::FailedPrecondition => false,
tonic::Code::InvalidArgument => false,
tonic::Code::NotFound => false,
tonic::Code::OutOfRange => false,
tonic::Code::PermissionDenied => false,
tonic::Code::Unauthenticated => false,
tonic::Code::Unimplemented => false,
tonic::Code::Unknown => false,
}
}
let started = Instant::now();
let deadline = started + TOTAL_TIMEOUT;
let mut last_error = None;
let mut retries = 0;
loop {
// Set up a future to wait for the backoff (if any) and run the request with a timeout.
let backoff = exponential_backoff_duration(retries, BASE_BACKOFF, MAX_BACKOFF);
let backoff_and_try = async {
tokio::time::sleep(backoff).await;
let request_started = Instant::now();
tokio::time::timeout(REQUEST_TIMEOUT, f())
.await
.map_err(|_| {
tonic::Status::deadline_exceeded(format!(
"request timed out after {:.3}s",
request_started.elapsed().as_secs_f64()
))
})?
};
// Wait for the backoff and request, or bail out if the total timeout is exceeded.
let result = tokio::select! {
result = backoff_and_try => result,
_ = tokio::time::sleep_until(deadline) => {
let last_error = last_error.unwrap_or_else(|| {
tonic::Status::deadline_exceeded(format!(
"request timed out after {:.3}s",
started.elapsed().as_secs_f64()
))
});
error!(
"giving up after {:.3}s and {retries} retries, last error {:?}: {}",
started.elapsed().as_secs_f64(), last_error.code(), last_error.message(),
);
return Err(last_error);
}
};
match result {
Ok(result) => {
if retries > 0 || LOG_SUCCESS {
info!(
"request succeeded after {retries} retries in {:.3}s",
started.elapsed().as_secs_f64(),
);
}
return Ok(result);
}
Err(status) => {
let (code, message) = (status.code(), status.message());
let should_retry = should_retry(code);
let attempt = retries + 1;
if !should_retry {
// NB: include the attempt here too. This isn't necessarily the first
// attempt, because the error may change between attempts.
error!(
"request failed with {code:?}: {message}, not retrying (attempt {attempt})"
);
return Err(status);
}
warn!("request failed with {code:?}: {message}, retrying (attempt {attempt})");
retries += 1;
last_error = Some(status);
}
}
}
self.retry
.with(async || {
// SLRU segments are only available on shard 0.
let mut client = self.shards.get_zero().client().await?;
client.get_slru_segment(req).await
})
.await
}
}

View File

@@ -1,5 +1,6 @@
mod client;
mod pool;
mod retry;
mod split;
pub use client::PageserverClient;

View File

@@ -0,0 +1,146 @@
use std::time::Duration;
use tokio::time::Instant;
use tracing::{error, info, warn};
use utils::backoff::exponential_backoff_duration;
/// A retry handler for Pageserver gRPC requests.
///
/// This is used instead of backoff::retry for better control and observability.
pub struct Retry;
impl Retry {
/// The per-request timeout.
// TODO: tune these, and/or make them configurable. Should we retry forever?
const REQUEST_TIMEOUT: Duration = Duration::from_secs(10);
/// The total timeout across all attempts
const TOTAL_TIMEOUT: Duration = Duration::from_secs(60);
/// The initial backoff duration.
const BASE_BACKOFF: Duration = Duration::from_millis(10);
/// The maximum backoff duration.
const MAX_BACKOFF: Duration = Duration::from_secs(10);
/// If true, log successful requests. For debugging.
const LOG_SUCCESS: bool = false;
/// Runs the given async closure with timeouts and retries (exponential backoff). Logs errors,
/// using the current tracing span for context.
///
/// Only certain gRPC status codes are retried, see [`Self::should_retry`]. For default
/// timeouts, see [`Self::REQUEST_TIMEOUT`] and [`Self::TOTAL_TIMEOUT`].
pub async fn with<T, F, O>(&self, mut f: F) -> tonic::Result<T>
where
F: FnMut() -> O,
O: Future<Output = tonic::Result<T>>,
{
let started = Instant::now();
let deadline = started + Self::TOTAL_TIMEOUT;
let mut last_error = None;
let mut retries = 0;
loop {
// Set up a future to wait for the backoff (if any) and run the request with a timeout.
let backoff_and_try = async {
tokio::time::sleep(Self::backoff_duration(retries)).await;
let request_started = Instant::now();
tokio::time::timeout(Self::REQUEST_TIMEOUT, f())
.await
.map_err(|_| {
tonic::Status::deadline_exceeded(format!(
"request timed out after {:.3}s",
request_started.elapsed().as_secs_f64()
))
})?
};
// Wait for the backoff and request, or bail out if the total timeout is exceeded.
let result = tokio::select! {
result = backoff_and_try => result,
_ = tokio::time::sleep_until(deadline) => {
let last_error = last_error.unwrap_or_else(|| {
tonic::Status::deadline_exceeded(format!(
"request timed out after {:.3}s",
started.elapsed().as_secs_f64()
))
});
error!(
"giving up after {:.3}s and {retries} retries, last error {:?}: {}",
started.elapsed().as_secs_f64(), last_error.code(), last_error.message(),
);
return Err(last_error);
}
};
match result {
// Success, return the result.
Ok(result) => {
if retries > 0 || Self::LOG_SUCCESS {
info!(
"request succeeded after {retries} retries in {:.3}s",
started.elapsed().as_secs_f64(),
);
}
return Ok(result);
}
// Error, retry or bail out.
Err(status) => {
let (code, message) = (status.code(), status.message());
let attempt = retries + 1;
if !Self::should_retry(code) {
// NB: include the attempt here too. This isn't necessarily the first
// attempt, because the error may change between attempts.
error!(
"request failed with {code:?}: {message}, not retrying (attempt {attempt})"
);
return Err(status);
}
warn!("request failed with {code:?}: {message}, retrying (attempt {attempt})");
retries += 1;
last_error = Some(status);
}
}
}
}
/// Returns the backoff duration for the given retry attempt.
fn backoff_duration(retry: usize) -> Duration {
exponential_backoff_duration(
retry as u32,
Self::BASE_BACKOFF.as_secs_f64(),
Self::MAX_BACKOFF.as_secs_f64(),
)
}
/// Returns true if the given status code should be retries.
fn should_retry(code: tonic::Code) -> bool {
match code {
tonic::Code::Ok => panic!("unexpected Ok status code"),
// These codes are transient, so retry them.
tonic::Code::Aborted => true,
tonic::Code::Cancelled => true,
tonic::Code::DeadlineExceeded => true, // maybe transient slowness
tonic::Code::Internal => true, // maybe transient failure?
tonic::Code::ResourceExhausted => true,
tonic::Code::Unavailable => true,
// The following codes will like continue to fail, so don't retry.
tonic::Code::AlreadyExists => false,
tonic::Code::DataLoss => false,
tonic::Code::FailedPrecondition => false,
tonic::Code::InvalidArgument => false,
tonic::Code::NotFound => false,
tonic::Code::OutOfRange => false,
tonic::Code::PermissionDenied => false,
tonic::Code::Unauthenticated => false,
tonic::Code::Unimplemented => false,
tonic::Code::Unknown => false,
}
}
}