From 194b9ffc41434d4b574d9c0836f663d99db6c027 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Mon, 21 Jul 2025 13:43:26 +0200 Subject: [PATCH] pageserver: remove gRPC `CheckRelExists` (#12616) ## Problem Postgres will often immediately follow a relation existence check with a relation size query. This incurs two roundtrips, and may prevent effective caching. See [Slack thread](https://databricks.slack.com/archives/C091SDX74SC/p1751951732136139). Touches #11728. ## Summary of changes For the gRPC API: * Add an `allow_missing` parameter to `GetRelSize`, which returns `missing=true` instead of a `NotFound` error. * Remove `CheckRelExists`. There are no changes to libpq behavior. --- pageserver/client_grpc/src/client.rs | 17 ----- pageserver/page_api/proto/page_service.proto | 26 +++----- pageserver/page_api/src/client.rs | 13 +--- pageserver/page_api/src/model.rs | 65 +++++--------------- pageserver/src/page_service.rs | 65 +++++++------------- pageserver/src/pgdatadir_mapping.rs | 32 +++++++--- 6 files changed, 75 insertions(+), 143 deletions(-) diff --git a/pageserver/client_grpc/src/client.rs b/pageserver/client_grpc/src/client.rs index e4670f74cc..42bc3c40ac 100644 --- a/pageserver/client_grpc/src/client.rs +++ b/pageserver/client_grpc/src/client.rs @@ -157,23 +157,6 @@ impl PageserverClient { Ok(()) } - /// Returns whether a relation exists. - #[instrument(skip_all, fields(rel=%req.rel, lsn=%req.read_lsn))] - pub async fn check_rel_exists( - &self, - req: page_api::CheckRelExistsRequest, - ) -> tonic::Result { - debug!("sending request: {req:?}"); - let resp = Self::with_retries(CALL_TIMEOUT, async |_| { - // Relation metadata is only available on shard 0. - let mut client = self.shards.load_full().get_zero().client().await?; - Self::with_timeout(REQUEST_TIMEOUT, client.check_rel_exists(req)).await - }) - .await?; - debug!("received response: {resp:?}"); - Ok(resp) - } - /// Returns the total size of a database, as # of bytes. #[instrument(skip_all, fields(db_oid=%req.db_oid, lsn=%req.read_lsn))] pub async fn get_db_size( diff --git a/pageserver/page_api/proto/page_service.proto b/pageserver/page_api/proto/page_service.proto index d113a04a42..aaccbd5ef0 100644 --- a/pageserver/page_api/proto/page_service.proto +++ b/pageserver/page_api/proto/page_service.proto @@ -17,11 +17,11 @@ // grpcurl \ // -plaintext \ // -H "neon-tenant-id: 7c4a1f9e3bd6470c8f3e21a65bd2e980" \ -// -H "neon-shard-id: 0b10" \ +// -H "neon-shard-id: 0000" \ // -H "neon-timeline-id: f08c4e9a2d5f76b1e3a7c2d8910f4b3e" \ // -H "authorization: Bearer $JWT" \ -// -d '{"read_lsn": {"request_lsn": 1234567890}, "rel": {"spc_oid": 1663, "db_oid": 1234, "rel_number": 5678, "fork_number": 0}}' -// localhost:51051 page_api.PageService/CheckRelExists +// -d '{"read_lsn": {"request_lsn": 100000000, "not_modified_since_lsn": 1}, "db_oid": 1}' \ +// localhost:51051 page_api.PageService/GetDbSize // ``` // // TODO: consider adding neon-compute-mode ("primary", "static", "replica"). @@ -38,8 +38,8 @@ package page_api; import "google/protobuf/timestamp.proto"; service PageService { - // Returns whether a relation exists. - rpc CheckRelExists(CheckRelExistsRequest) returns (CheckRelExistsResponse); + // NB: unlike libpq, there is no CheckRelExists in gRPC, at the compute team's request. Instead, + // use GetRelSize with allow_missing=true to check existence. // Fetches a base backup. rpc GetBaseBackup (GetBaseBackupRequest) returns (stream GetBaseBackupResponseChunk); @@ -97,17 +97,6 @@ message RelTag { uint32 fork_number = 4; } -// Checks whether a relation exists, at the given LSN. Only valid on shard 0, -// other shards will error. -message CheckRelExistsRequest { - ReadLsn read_lsn = 1; - RelTag rel = 2; -} - -message CheckRelExistsResponse { - bool exists = 1; -} - // Requests a base backup. message GetBaseBackupRequest { // The LSN to fetch the base backup at. 0 or absent means the latest LSN known to the Pageserver. @@ -260,10 +249,15 @@ enum GetPageStatusCode { message GetRelSizeRequest { ReadLsn read_lsn = 1; RelTag rel = 2; + // If true, return missing=true for missing relations instead of a NotFound error. + bool allow_missing = 3; } message GetRelSizeResponse { + // The number of blocks in the relation. uint32 num_blocks = 1; + // If allow_missing=true, this is true for missing relations. + bool missing = 2; } // Requests an SLRU segment. Only valid on shard 0, other shards will error. diff --git a/pageserver/page_api/src/client.rs b/pageserver/page_api/src/client.rs index f70d0e7b28..fc27ea448b 100644 --- a/pageserver/page_api/src/client.rs +++ b/pageserver/page_api/src/client.rs @@ -69,16 +69,6 @@ impl Client { Ok(Self { inner }) } - /// Returns whether a relation exists. - pub async fn check_rel_exists( - &mut self, - req: CheckRelExistsRequest, - ) -> tonic::Result { - let req = proto::CheckRelExistsRequest::from(req); - let resp = self.inner.check_rel_exists(req).await?.into_inner(); - Ok(resp.into()) - } - /// Fetches a base backup. pub async fn get_base_backup( &mut self, @@ -114,7 +104,8 @@ impl Client { Ok(resps.and_then(|resp| ready(GetPageResponse::try_from(resp).map_err(|err| err.into())))) } - /// Returns the size of a relation, as # of blocks. + /// Returns the size of a relation as # of blocks, or None if allow_missing=true and the + /// relation does not exist. pub async fn get_rel_size( &mut self, req: GetRelSizeRequest, diff --git a/pageserver/page_api/src/model.rs b/pageserver/page_api/src/model.rs index a3286ecf15..6375c47998 100644 --- a/pageserver/page_api/src/model.rs +++ b/pageserver/page_api/src/model.rs @@ -139,50 +139,6 @@ impl From for proto::RelTag { } } -/// Checks whether a relation exists, at the given LSN. Only valid on shard 0, other shards error. -#[derive(Clone, Copy, Debug)] -pub struct CheckRelExistsRequest { - pub read_lsn: ReadLsn, - pub rel: RelTag, -} - -impl TryFrom for CheckRelExistsRequest { - type Error = ProtocolError; - - fn try_from(pb: proto::CheckRelExistsRequest) -> Result { - Ok(Self { - read_lsn: pb - .read_lsn - .ok_or(ProtocolError::Missing("read_lsn"))? - .try_into()?, - rel: pb.rel.ok_or(ProtocolError::Missing("rel"))?.try_into()?, - }) - } -} - -impl From for proto::CheckRelExistsRequest { - fn from(request: CheckRelExistsRequest) -> Self { - Self { - read_lsn: Some(request.read_lsn.into()), - rel: Some(request.rel.into()), - } - } -} - -pub type CheckRelExistsResponse = bool; - -impl From for CheckRelExistsResponse { - fn from(pb: proto::CheckRelExistsResponse) -> Self { - pb.exists - } -} - -impl From for proto::CheckRelExistsResponse { - fn from(exists: CheckRelExistsResponse) -> Self { - Self { exists } - } -} - /// Requests a base backup. #[derive(Clone, Copy, Debug)] pub struct GetBaseBackupRequest { @@ -707,6 +663,8 @@ impl From for tonic::Code { pub struct GetRelSizeRequest { pub read_lsn: ReadLsn, pub rel: RelTag, + /// If true, return missing=true for missing relations instead of a NotFound error. + pub allow_missing: bool, } impl TryFrom for GetRelSizeRequest { @@ -719,6 +677,7 @@ impl TryFrom for GetRelSizeRequest { .ok_or(ProtocolError::Missing("read_lsn"))? .try_into()?, rel: proto.rel.ok_or(ProtocolError::Missing("rel"))?.try_into()?, + allow_missing: proto.allow_missing, }) } } @@ -728,21 +687,29 @@ impl From for proto::GetRelSizeRequest { Self { read_lsn: Some(request.read_lsn.into()), rel: Some(request.rel.into()), + allow_missing: request.allow_missing, } } } -pub type GetRelSizeResponse = u32; +/// The size of a relation as number of blocks, or None if `allow_missing=true` and the relation +/// does not exist. +/// +/// INVARIANT: never None if `allow_missing=false` (returns `NotFound` error instead). +pub type GetRelSizeResponse = Option; impl From for GetRelSizeResponse { - fn from(proto: proto::GetRelSizeResponse) -> Self { - proto.num_blocks + fn from(pb: proto::GetRelSizeResponse) -> Self { + (!pb.missing).then_some(pb.num_blocks) } } impl From for proto::GetRelSizeResponse { - fn from(num_blocks: GetRelSizeResponse) -> Self { - Self { num_blocks } + fn from(resp: GetRelSizeResponse) -> Self { + Self { + num_blocks: resp.unwrap_or_default(), + missing: resp.is_none(), + } } } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 2b266c6811..26a23da66f 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -1636,9 +1636,10 @@ impl PageServerHandler { let (shard, ctx) = upgrade_handle_and_set_context!(shard); ( vec![ - Self::handle_get_nblocks_request(&shard, &req, &ctx) + Self::handle_get_nblocks_request(&shard, &req, false, &ctx) .instrument(span.clone()) .await + .map(|msg| msg.expect("allow_missing=false")) .map(|msg| (PagestreamBeMessage::Nblocks(msg), timer, ctx)) .map_err(|err| BatchedPageStreamError { err, req: req.hdr }), ], @@ -2303,12 +2304,16 @@ impl PageServerHandler { Ok(PagestreamExistsResponse { req: *req, exists }) } + /// If `allow_missing` is true, returns None instead of Err on missing relations. Otherwise, + /// never returns None. It is only supported by the gRPC protocol, so we pass it separately to + /// avoid changing the libpq protocol types. #[instrument(skip_all, fields(shard_id))] async fn handle_get_nblocks_request( timeline: &Timeline, req: &PagestreamNblocksRequest, + allow_missing: bool, ctx: &RequestContext, - ) -> Result { + ) -> Result, PageStreamError> { let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( timeline, @@ -2320,20 +2325,25 @@ impl PageServerHandler { .await?; let n_blocks = timeline - .get_rel_size( + .get_rel_size_in_reldir( req.rel, Version::LsnRange(LsnRange { effective_lsn: lsn, request_lsn: req.hdr.request_lsn, }), + None, + allow_missing, ctx, ) .await?; + let Some(n_blocks) = n_blocks else { + return Ok(None); + }; - Ok(PagestreamNblocksResponse { + Ok(Some(PagestreamNblocksResponse { req: *req, n_blocks, - }) + })) } #[instrument(skip_all, fields(shard_id))] @@ -3539,39 +3549,6 @@ impl proto::PageService for GrpcPageServiceHandler { type GetPagesStream = Pin> + Send>>; - #[instrument(skip_all, fields(rel, lsn))] - async fn check_rel_exists( - &self, - req: tonic::Request, - ) -> Result, tonic::Status> { - let received_at = extract::(&req).0; - let timeline = self.get_request_timeline(&req).await?; - let ctx = self.ctx.with_scope_page_service_pagestream(&timeline); - - // Validate the request, decorate the span, and convert it to a Pagestream request. - Self::ensure_shard_zero(&timeline)?; - let req: page_api::CheckRelExistsRequest = req.into_inner().try_into()?; - - span_record!(rel=%req.rel, lsn=%req.read_lsn); - - let req = PagestreamExistsRequest { - hdr: Self::make_hdr(req.read_lsn, None), - rel: req.rel, - }; - - // Execute the request and convert the response. - let _timer = Self::record_op_start_and_throttle( - &timeline, - metrics::SmgrQueryType::GetRelExists, - received_at, - ) - .await?; - - let resp = PageServerHandler::handle_get_rel_exists_request(&timeline, &req, &ctx).await?; - let resp: page_api::CheckRelExistsResponse = resp.exists; - Ok(tonic::Response::new(resp.into())) - } - #[instrument(skip_all, fields(lsn))] async fn get_base_backup( &self, @@ -3798,7 +3775,7 @@ impl proto::PageService for GrpcPageServiceHandler { Ok(tonic::Response::new(Box::pin(resps))) } - #[instrument(skip_all, fields(rel, lsn))] + #[instrument(skip_all, fields(rel, lsn, allow_missing))] async fn get_rel_size( &self, req: tonic::Request, @@ -3810,8 +3787,9 @@ impl proto::PageService for GrpcPageServiceHandler { // Validate the request, decorate the span, and convert it to a Pagestream request. Self::ensure_shard_zero(&timeline)?; let req: page_api::GetRelSizeRequest = req.into_inner().try_into()?; + let allow_missing = req.allow_missing; - span_record!(rel=%req.rel, lsn=%req.read_lsn); + span_record!(rel=%req.rel, lsn=%req.read_lsn, allow_missing=%req.allow_missing); let req = PagestreamNblocksRequest { hdr: Self::make_hdr(req.read_lsn, None), @@ -3826,8 +3804,11 @@ impl proto::PageService for GrpcPageServiceHandler { ) .await?; - let resp = PageServerHandler::handle_get_nblocks_request(&timeline, &req, &ctx).await?; - let resp: page_api::GetRelSizeResponse = resp.n_blocks; + let resp = + PageServerHandler::handle_get_nblocks_request(&timeline, &req, allow_missing, &ctx) + .await?; + let resp: page_api::GetRelSizeResponse = resp.map(|resp| resp.n_blocks); + Ok(tonic::Response::new(resp.into())) } diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index ea0fb5de2f..ab9cc88e5f 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -504,8 +504,9 @@ impl Timeline { for rel in rels { let n_blocks = self - .get_rel_size_in_reldir(rel, version, Some((reldir_key, &reldir)), ctx) - .await?; + .get_rel_size_in_reldir(rel, version, Some((reldir_key, &reldir)), false, ctx) + .await? + .expect("allow_missing=false"); total_blocks += n_blocks as usize; } Ok(total_blocks) @@ -521,10 +522,16 @@ impl Timeline { version: Version<'_>, ctx: &RequestContext, ) -> Result { - self.get_rel_size_in_reldir(tag, version, None, ctx).await + Ok(self + .get_rel_size_in_reldir(tag, version, None, false, ctx) + .await? + .expect("allow_missing=false")) } - /// Get size of a relation file. The relation must exist, otherwise an error is returned. + /// Get size of a relation file. If `allow_missing` is true, returns None for missing relations, + /// otherwise errors. + /// + /// INVARIANT: never returns None if `allow_missing=false`. /// /// See [`Self::get_rel_exists_in_reldir`] on why we need `deserialized_reldir_v1`. pub(crate) async fn get_rel_size_in_reldir( @@ -532,8 +539,9 @@ impl Timeline { tag: RelTag, version: Version<'_>, deserialized_reldir_v1: Option<(Key, &RelDirectory)>, + allow_missing: bool, ctx: &RequestContext, - ) -> Result { + ) -> Result, PageReconstructError> { if tag.relnode == 0 { return Err(PageReconstructError::Other( RelationError::InvalidRelnode.into(), @@ -541,7 +549,15 @@ impl Timeline { } if let Some(nblocks) = self.get_cached_rel_size(&tag, version) { - return Ok(nblocks); + return Ok(Some(nblocks)); + } + + if allow_missing + && !self + .get_rel_exists_in_reldir(tag, version, deserialized_reldir_v1, ctx) + .await? + { + return Ok(None); } if (tag.forknum == FSM_FORKNUM || tag.forknum == VISIBILITYMAP_FORKNUM) @@ -553,7 +569,7 @@ impl Timeline { // FSM, and smgrnblocks() on it immediately afterwards, // without extending it. Tolerate that by claiming that // any non-existent FSM fork has size 0. - return Ok(0); + return Ok(Some(0)); } let key = rel_size_to_key(tag); @@ -562,7 +578,7 @@ impl Timeline { self.update_cached_rel_size(tag, version, nblocks); - Ok(nblocks) + Ok(Some(nblocks)) } /// Does the relation exist?