pageserver/page_api: add attempt to GetPage request ID (#12536)

## Problem

`GetPageRequest::request_id` is supposed to be a unique ID for a
request. It's not, because we may retry the request using the same ID.
This causes assertion failures and confusion.

Touches #11735.
Requires #12480.

## Summary of changes

Extend the request ID with a retry attempt, and handle it in the gRPC
client and server.
This commit is contained in:
Erik Grinaker
2025-07-10 22:39:42 +02:00
committed by GitHub
parent 1b7339b53e
commit 44ea17b7b2
7 changed files with 110 additions and 31 deletions

View File

@@ -153,7 +153,7 @@ message GetDbSizeResponse {
message GetPageRequest {
// A request ID. Will be included in the response. Should be unique for
// in-flight requests on the stream.
uint64 request_id = 1;
RequestID request_id = 1;
// The request class.
GetPageClass request_class = 2;
// The LSN to read at.
@@ -177,6 +177,14 @@ message GetPageRequest {
repeated uint32 block_number = 5;
}
// A Request ID. Should be unique for in-flight requests on a stream. Included in the response.
message RequestID {
// The base request ID.
uint64 id = 1;
// The request attempt. Starts at 0, incremented on each retry.
uint32 attempt = 2;
}
// A GetPageRequest class. Primarily intended for observability, but may also be
// used for prioritization in the future.
enum GetPageClass {
@@ -199,7 +207,7 @@ enum GetPageClass {
// the entire batch is ready, so no one can make use of the individual pages.
message GetPageResponse {
// The original request's ID.
uint64 request_id = 1;
RequestID request_id = 1;
// The response status code.
GetPageStatusCode status_code = 2;
// A string describing the status, if any.

View File

@@ -356,7 +356,10 @@ impl TryFrom<proto::GetPageRequest> for GetPageRequest {
return Err(ProtocolError::Missing("block_number"));
}
Ok(Self {
request_id: pb.request_id,
request_id: pb
.request_id
.ok_or(ProtocolError::Missing("request_id"))?
.into(),
request_class: pb.request_class.into(),
read_lsn: pb
.read_lsn
@@ -371,7 +374,7 @@ impl TryFrom<proto::GetPageRequest> for GetPageRequest {
impl From<GetPageRequest> for proto::GetPageRequest {
fn from(request: GetPageRequest) -> Self {
Self {
request_id: request.request_id,
request_id: Some(request.request_id.into()),
request_class: request.request_class.into(),
read_lsn: Some(request.read_lsn.into()),
rel: Some(request.rel.into()),
@@ -380,8 +383,51 @@ impl From<GetPageRequest> for proto::GetPageRequest {
}
}
/// A GetPage request ID.
pub type RequestID = u64;
/// A GetPage request ID and retry attempt. Should be unique for in-flight requests on a stream.
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct RequestID {
/// The base request ID.
pub id: u64,
// The request attempt. Starts at 0, incremented on each retry.
pub attempt: u32,
}
impl RequestID {
/// Creates a new RequestID with the given ID and an initial attempt of 0.
pub fn new(id: u64) -> Self {
Self { id, attempt: 0 }
}
}
impl Display for RequestID {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}.{}", self.id, self.attempt)
}
}
impl From<proto::RequestId> for RequestID {
fn from(pb: proto::RequestId) -> Self {
Self {
id: pb.id,
attempt: pb.attempt,
}
}
}
impl From<u64> for RequestID {
fn from(id: u64) -> Self {
Self::new(id)
}
}
impl From<RequestID> for proto::RequestId {
fn from(request_id: RequestID) -> Self {
Self {
id: request_id.id,
attempt: request_id.attempt,
}
}
}
/// A GetPage request class.
#[derive(Clone, Copy, Debug, strum_macros::Display)]
@@ -467,7 +513,7 @@ pub struct GetPageResponse {
impl From<proto::GetPageResponse> for GetPageResponse {
fn from(pb: proto::GetPageResponse) -> Self {
Self {
request_id: pb.request_id,
request_id: pb.request_id.unwrap_or_default().into(),
status_code: pb.status_code.into(),
reason: Some(pb.reason).filter(|r| !r.is_empty()),
page_images: pb.page_image,
@@ -478,7 +524,7 @@ impl From<proto::GetPageResponse> for GetPageResponse {
impl From<GetPageResponse> for proto::GetPageResponse {
fn from(response: GetPageResponse) -> Self {
Self {
request_id: response.request_id,
request_id: Some(response.request_id.into()),
status_code: response.status_code.into(),
reason: response.reason.unwrap_or_default(),
page_image: response.page_images,