Move pagestream api to libs/pageserver_api (#2698)

This commit is contained in:
bojanserafimov
2022-10-26 17:32:31 -04:00
committed by GitHub
parent 259a5f356e
commit 0c54eb65fb
13 changed files with 181 additions and 166 deletions

View File

@@ -7,6 +7,9 @@ edition = "2021"
serde = { version = "1.0", features = ["derive"] }
serde_with = "2.0"
const_format = "0.2.21"
anyhow = { version = "1.0", features = ["backtrace"] }
bytes = "1.0.1"
utils = { path = "../utils" }
postgres_ffi = { path = "../postgres_ffi" }
workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -2,6 +2,7 @@ use const_format::formatcp;
/// Public API types
pub mod models;
pub mod reltag;
pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");

View File

@@ -7,6 +7,10 @@ use utils::{
lsn::Lsn,
};
use crate::reltag::RelTag;
use anyhow::bail;
use bytes::{Buf, BufMut, Bytes, BytesMut};
/// A state of a tenant in pageserver's memory.
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum TenantState {
@@ -219,3 +223,160 @@ pub struct FailpointConfig {
pub struct TimelineGcRequest {
pub gc_horizon: Option<u64>,
}
// Wrapped in libpq CopyData
pub enum PagestreamFeMessage {
Exists(PagestreamExistsRequest),
Nblocks(PagestreamNblocksRequest),
GetPage(PagestreamGetPageRequest),
DbSize(PagestreamDbSizeRequest),
}
// Wrapped in libpq CopyData
pub enum PagestreamBeMessage {
Exists(PagestreamExistsResponse),
Nblocks(PagestreamNblocksResponse),
GetPage(PagestreamGetPageResponse),
Error(PagestreamErrorResponse),
DbSize(PagestreamDbSizeResponse),
}
#[derive(Debug)]
pub struct PagestreamExistsRequest {
pub latest: bool,
pub lsn: Lsn,
pub rel: RelTag,
}
#[derive(Debug)]
pub struct PagestreamNblocksRequest {
pub latest: bool,
pub lsn: Lsn,
pub rel: RelTag,
}
#[derive(Debug)]
pub struct PagestreamGetPageRequest {
pub latest: bool,
pub lsn: Lsn,
pub rel: RelTag,
pub blkno: u32,
}
#[derive(Debug)]
pub struct PagestreamDbSizeRequest {
pub latest: bool,
pub lsn: Lsn,
pub dbnode: u32,
}
#[derive(Debug)]
pub struct PagestreamExistsResponse {
pub exists: bool,
}
#[derive(Debug)]
pub struct PagestreamNblocksResponse {
pub n_blocks: u32,
}
#[derive(Debug)]
pub struct PagestreamGetPageResponse {
pub page: Bytes,
}
#[derive(Debug)]
pub struct PagestreamErrorResponse {
pub message: String,
}
#[derive(Debug)]
pub struct PagestreamDbSizeResponse {
pub db_size: i64,
}
impl PagestreamFeMessage {
pub fn parse(mut body: Bytes) -> anyhow::Result<PagestreamFeMessage> {
// TODO these gets can fail
// these correspond to the NeonMessageTag enum in pagestore_client.h
//
// TODO: consider using protobuf or serde bincode for less error prone
// serialization.
let msg_tag = body.get_u8();
match msg_tag {
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
})),
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
})),
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
rel: RelTag {
spcnode: body.get_u32(),
dbnode: body.get_u32(),
relnode: body.get_u32(),
forknum: body.get_u8(),
},
blkno: body.get_u32(),
})),
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
latest: body.get_u8() != 0,
lsn: Lsn::from(body.get_u64()),
dbnode: body.get_u32(),
})),
_ => bail!("unknown smgr message tag: {},'{:?}'", msg_tag, body),
}
}
}
impl PagestreamBeMessage {
pub fn serialize(&self) -> Bytes {
let mut bytes = BytesMut::new();
match self {
Self::Exists(resp) => {
bytes.put_u8(100); /* tag from pagestore_client.h */
bytes.put_u8(resp.exists as u8);
}
Self::Nblocks(resp) => {
bytes.put_u8(101); /* tag from pagestore_client.h */
bytes.put_u32(resp.n_blocks);
}
Self::GetPage(resp) => {
bytes.put_u8(102); /* tag from pagestore_client.h */
bytes.put(&resp.page[..]);
}
Self::Error(resp) => {
bytes.put_u8(103); /* tag from pagestore_client.h */
bytes.put(resp.message.as_bytes());
bytes.put_u8(0); // null terminator
}
Self::DbSize(resp) => {
bytes.put_u8(104); /* tag from pagestore_client.h */
bytes.put_i64(resp.db_size);
}
}
bytes.into()
}
}

View File

@@ -0,0 +1,128 @@
use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::fmt;
use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
use postgres_ffi::relfile_utils::forknumber_to_name;
use postgres_ffi::Oid;
///
/// Relation data file segment id throughout the Postgres cluster.
///
/// Every data file in Postgres is uniquely identified by 4 numbers:
/// - relation id / node (`relnode`)
/// - database id (`dbnode`)
/// - tablespace id (`spcnode`), in short this is a unique id of a separate
/// directory to store data files.
/// - forknumber (`forknum`) is used to split different kinds of data of the same relation
/// between some set of files (`relnode`, `relnode_fsm`, `relnode_vm`).
///
/// In native Postgres code `RelFileNode` structure and individual `ForkNumber` value
/// are used for the same purpose.
/// [See more related comments here](https:///github.com/postgres/postgres/blob/99c5852e20a0987eca1c38ba0c09329d4076b6a0/src/include/storage/relfilenode.h#L57).
///
// FIXME: should move 'forknum' as last field to keep this consistent with Postgres.
// Then we could replace the custo Ord and PartialOrd implementations below with
// deriving them.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
pub struct RelTag {
pub forknum: u8,
pub spcnode: Oid,
pub dbnode: Oid,
pub relnode: Oid,
}
impl PartialOrd for RelTag {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for RelTag {
fn cmp(&self, other: &Self) -> Ordering {
let mut cmp = self.spcnode.cmp(&other.spcnode);
if cmp != Ordering::Equal {
return cmp;
}
cmp = self.dbnode.cmp(&other.dbnode);
if cmp != Ordering::Equal {
return cmp;
}
cmp = self.relnode.cmp(&other.relnode);
if cmp != Ordering::Equal {
return cmp;
}
cmp = self.forknum.cmp(&other.forknum);
cmp
}
}
/// Display RelTag in the same format that's used in most PostgreSQL debug messages:
///
/// <spcnode>/<dbnode>/<relnode>[_fsm|_vm|_init]
///
impl fmt::Display for RelTag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(forkname) = forknumber_to_name(self.forknum) {
write!(
f,
"{}/{}/{}_{}",
self.spcnode, self.dbnode, self.relnode, forkname
)
} else {
write!(f, "{}/{}/{}", self.spcnode, self.dbnode, self.relnode)
}
}
}
impl RelTag {
pub fn to_segfile_name(&self, segno: u32) -> String {
let mut name = if self.spcnode == GLOBALTABLESPACE_OID {
"global/".to_string()
} else {
format!("base/{}/", self.dbnode)
};
name += &self.relnode.to_string();
if let Some(fork_name) = forknumber_to_name(self.forknum) {
name += "_";
name += fork_name;
}
if segno != 0 {
name += ".";
name += &segno.to_string();
}
name
}
}
///
/// Non-relation transaction status files (clog (a.k.a. pg_xact) and
/// pg_multixact) in Postgres are handled by SLRU (Simple LRU) buffer,
/// hence the name.
///
/// These files are global for a postgres instance.
///
/// These files are divided into segments, which are divided into
/// pages of the same BLCKSZ as used for relation files.
///
#[derive(Debug, Clone, Copy, Hash, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
pub enum SlruKind {
Clog,
MultiXactMembers,
MultiXactOffsets,
}
impl SlruKind {
pub fn to_str(&self) -> &'static str {
match self {
Self::Clog => "pg_xact",
Self::MultiXactMembers => "pg_multixact/members",
Self::MultiXactOffsets => "pg_multixact/offsets",
}
}
}