diff --git a/pageserver/page_api/proto/page_service.proto b/pageserver/page_api/proto/page_service.proto index 2b1045a283..9612d8eb91 100644 --- a/pageserver/page_api/proto/page_service.proto +++ b/pageserver/page_api/proto/page_service.proto @@ -1,9 +1,12 @@ -// Page service presented by pageservers, for computes. +// Page service, presented by pageservers for computes. +// +// This is the compute read path. It primarily serves page versions at given +// LSNs, but also base backups, SLRU segments, and relation metadata. // // Request metadata: // - authorization: JWT token ("Bearer "), if auth is enabled // - neon-tenant-id: tenant ID ("7c4a1f9e3bd6470c8f3e21a65bd2e980") -// - neon-shard-id: shard ID, as in hex ("0b10" = shard 11 of 16) +// - neon-shard-id: shard ID, as in hex ("0b10" = shard 11 of 16, 0-based) // - neon-timeline-id: timeline ID ("f08c4e9a2d5f76b1e3a7c2d8910f4b3e") // // TODO: write implementation guidance on @@ -30,14 +33,19 @@ service PageService { // This is implemented as a bidirectional streaming RPC for performance. Unary // requests incur costs for e.g. HTTP/2 stream setup, header parsing, // authentication, and so on -- with streaming, we only pay these costs during - // the initial stream setup. This ~doubles throughput in benchmarks. + // the initial stream setup. This ~doubles throughput in benchmarks. Other + // requests use regular unary requests, since they are not as frequent and + // performance-critical, and this simplifies implementation. // - // NB: a status response (e.g. for errors) will terminate the stream. The - // stream may be shared by e.g. multiple Postgres backends, so we should avoid - // this. Most errors are instead propagated in the GetPageResponse. + // NB: a status response (e.g. errors) will terminate the stream. The stream + // may be shared by e.g. multiple Postgres backends, so we should avoid this. + // Most errors are therefore sent as GetPageResponse.status instead. rpc GetPages (stream GetPageRequestBatch) returns (stream GetPageResponse); // Fetches an SLRU segment. + // + // TODO: can these be significantly larger than 256 KB (8 pages)? If so, + // consider streaming the response instead. rpc GetSlruSegment (GetSlruSegmentRequest) returns (GetSlruSegmentResponse); // Returns whether a relation exists. @@ -48,11 +56,16 @@ service PageService { } +// Common request fields. message RequestCommon { + // The LSN to read at. uint64 request_lsn = 1; + // If given, the caller guarantees that the page has not been modified + // since this LSN. uint64 not_modified_since_lsn = 2; } +// A relation identifier. message RelTag { uint32 spc_oid = 1; uint32 db_oid = 2; @@ -60,32 +73,45 @@ message RelTag { uint32 fork_number = 4; } -message RelExistsRequest { +// Requests the size of a database, as # of bytes. This is only accurate on +// shard 0; other shards will return their view of the database according to +// which pages they have. +message DbSizeRequest { RequestCommon common = 1; - RelTag rel = 2; + uint32 db_oid = 2; } -message RelExistsResponse { - bool exists = 1; +message DbSizeResponse { + uint64 num_bytes = 1; } -message RelSizeRequest { +// Requests a base backup at a given LSN. +message GetBaseBackupRequest { + // The LSN to fetch a base backup at. RequestCommon common = 1; - RelTag rel = 2; + // If true, logical replication slots will not be created. + bool replica = 2; } -message RelSizeResponse { - uint32 num_blocks = 1; +// Base backup response chunk, returned as an ordered stream. +message GetBaseBackupResponseChunk { + // A basebackup data chunk. The size is undefined, but bounded by the 4 MB + // gRPC message size limit. + bytes chunk = 1; } -// A single GetPage request. +// Requests a single page. message GetPageRequest { // A request ID. Will be included in the response. Should be unique for // in-flight requests on the stream. uint64 id = 1; + // The LSN to read at. RequestCommon common = 2; + // The relation to read from. RelTag rel = 3; + // The page number to read. Must belong to the remote shard. uint32 block_number = 4; + // The request class. GetPageClass class = 5; } @@ -113,8 +139,6 @@ message GetPageRequestBatch { } // A GetPage response. May be emitted out of order. -// -// TODO: should this include page metadata, like reltag, LSN, and block number? message GetPageResponse { // The original request's ID. uint64 id = 1; @@ -144,30 +168,41 @@ enum GetPageStatus { GET_PAGE_STATUS_SLOW_DOWN = 4; } -message DbSizeRequest { - RequestCommon common = 1; - uint32 db_oid = 2; -} - -message DbSizeResponse { - uint64 num_bytes = 1; -} - -message GetBaseBackupRequest { - RequestCommon common = 1; - bool replica = 2; -} - -message GetBaseBackupResponseChunk { - bytes chunk = 1; -} - +// Requests an SLRU segment. message GetSlruSegmentRequest { RequestCommon common = 1; uint32 kind = 2; uint32 segno = 3; } +// Returns an SLRU segment. +// +// TODO: can these be significantly larger than 256 KB (8 pages)? If so, +// consider chunking and streaming the response instead. message GetSlruSegmentResponse { bytes segment = 1; +} + +// Checks whether a relation exists, at the given LSN. This is only accurate on +// shard 0; other shards will return their view of the relation according to +// which pages they have. +message RelExistsRequest { + RequestCommon common = 1; + RelTag rel = 2; +} + +message RelExistsResponse { + bool exists = 1; +} + +// Fetches the size of a relation at a given LSN, as # of blocks. This is only +// accurate on shard 0; other shards will return their view of the relation +// according to which pages they have. +message RelSizeRequest { + RequestCommon common = 1; + RelTag rel = 2; +} + +message RelSizeResponse { + uint32 num_blocks = 1; } \ No newline at end of file