mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-04 03:52:56 +00:00
Add response tag to getpage request in V3 protocol version (#8686)
## Problem We have several serious data corruption incidents caused by mismatch of get-age requests: https://neondb.slack.com/archives/C07FJS4QF7V/p1723032720164359 We hope that the problem is fixed now. But it is better to prevent such kind of problems in future. Part of https://github.com/neondatabase/cloud/issues/16472 ## Summary of changes This PR introduce new V3 version of compute<->pageserver protocol, adding tag to getpage response. So now compute is able to check if it really gets response to the requested page. ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech> Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
This commit is contained in:
committed by
GitHub
parent
f4739d49e3
commit
20c40eb733
@@ -1460,75 +1460,91 @@ impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
// interface allows sending both LSNs, and let the pageserver do the right thing. There was no
|
||||
// difference in the responses between V1 and V2.
|
||||
//
|
||||
#[derive(Clone, Copy)]
|
||||
// V3 version of protocol adds request ID to all requests. This request ID is also included in response
|
||||
// as well as other fields from requests, which allows to verify that we receive response for our request.
|
||||
// We copy fields from request to response to make checking more reliable: request ID is formed from process ID
|
||||
// and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
|
||||
//
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum PagestreamProtocolVersion {
|
||||
V2,
|
||||
V3,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub type RequestId = u64;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamRequest {
|
||||
pub reqid: RequestId,
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamExistsRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamNblocksRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetPageRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
pub blkno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamDbSizeRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetSlruSegmentRequest {
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
pub hdr: PagestreamRequest,
|
||||
pub kind: u8,
|
||||
pub segno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsResponse {
|
||||
pub req: PagestreamExistsRequest,
|
||||
pub exists: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksResponse {
|
||||
pub req: PagestreamNblocksRequest,
|
||||
pub n_blocks: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageResponse {
|
||||
pub req: PagestreamGetPageRequest,
|
||||
pub page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetSlruSegmentResponse {
|
||||
pub req: PagestreamGetSlruSegmentRequest,
|
||||
pub segment: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamErrorResponse {
|
||||
pub req: PagestreamRequest,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeResponse {
|
||||
pub req: PagestreamDbSizeRequest,
|
||||
pub db_size: i64,
|
||||
}
|
||||
|
||||
@@ -1545,15 +1561,16 @@ pub struct TenantHistorySize {
|
||||
|
||||
impl PagestreamFeMessage {
|
||||
/// Serialize a compute -> pageserver message. This is currently only used in testing
|
||||
/// tools. Always uses protocol version 2.
|
||||
/// tools. Always uses protocol version 3.
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
match self {
|
||||
Self::Exists(req) => {
|
||||
bytes.put_u8(0);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
@@ -1562,8 +1579,9 @@ impl PagestreamFeMessage {
|
||||
|
||||
Self::Nblocks(req) => {
|
||||
bytes.put_u8(1);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
@@ -1572,8 +1590,9 @@ impl PagestreamFeMessage {
|
||||
|
||||
Self::GetPage(req) => {
|
||||
bytes.put_u8(2);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
@@ -1583,15 +1602,17 @@ impl PagestreamFeMessage {
|
||||
|
||||
Self::DbSize(req) => {
|
||||
bytes.put_u8(3);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.dbnode);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(req) => {
|
||||
bytes.put_u8(4);
|
||||
bytes.put_u64(req.request_lsn.0);
|
||||
bytes.put_u64(req.not_modified_since.0);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(req.kind);
|
||||
bytes.put_u32(req.segno);
|
||||
}
|
||||
@@ -1600,21 +1621,35 @@ impl PagestreamFeMessage {
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn parse<R: std::io::Read>(body: &mut R) -> anyhow::Result<PagestreamFeMessage> {
|
||||
pub fn parse<R: std::io::Read>(
|
||||
body: &mut R,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
) -> anyhow::Result<PagestreamFeMessage> {
|
||||
// these correspond to the NeonMessageTag enum in pagestore_client.h
|
||||
//
|
||||
// TODO: consider using protobuf or serde bincode for less error prone
|
||||
// serialization.
|
||||
let msg_tag = body.read_u8()?;
|
||||
|
||||
// these two fields are the same for every request type
|
||||
let request_lsn = Lsn::from(body.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn::from(body.read_u64::<BigEndian>()?);
|
||||
let (reqid, request_lsn, not_modified_since) = match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => (
|
||||
0,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
PagestreamProtocolVersion::V3 => (
|
||||
body.read_u64::<BigEndian>()?,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
};
|
||||
|
||||
match msg_tag {
|
||||
0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
@@ -1623,8 +1658,11 @@ impl PagestreamFeMessage {
|
||||
},
|
||||
})),
|
||||
1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
@@ -1633,8 +1671,11 @@ impl PagestreamFeMessage {
|
||||
},
|
||||
})),
|
||||
2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
@@ -1644,14 +1685,20 @@ impl PagestreamFeMessage {
|
||||
blkno: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
})),
|
||||
4 => Ok(PagestreamFeMessage::GetSlruSegment(
|
||||
PagestreamGetSlruSegmentRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind: body.read_u8()?,
|
||||
segno: body.read_u32::<BigEndian>()?,
|
||||
},
|
||||
@@ -1662,43 +1709,114 @@ impl PagestreamFeMessage {
|
||||
}
|
||||
|
||||
impl PagestreamBeMessage {
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
pub fn serialize(&self, protocol_version: PagestreamProtocolVersion) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put(&resp.page[..]);
|
||||
}
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
PagestreamProtocolVersion::V3 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.req.blkno);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put_u64(resp.req.reqid);
|
||||
bytes.put_u64(resp.req.request_lsn.0);
|
||||
bytes.put_u64(resp.req.not_modified_since.0);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.dbnode);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(resp.req.kind);
|
||||
bytes.put_u32(resp.req.segno);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
@@ -1710,38 +1828,131 @@ impl PagestreamBeMessage {
|
||||
let ok =
|
||||
match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
|
||||
Tag::Exists => {
|
||||
let exists = buf.read_u8()?;
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let exists = buf.read_u8()? != 0;
|
||||
Self::Exists(PagestreamExistsResponse {
|
||||
exists: exists != 0,
|
||||
req: PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
exists,
|
||||
})
|
||||
}
|
||||
Tag::Nblocks => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
Self::Nblocks(PagestreamNblocksResponse { n_blocks })
|
||||
Self::Nblocks(PagestreamNblocksResponse {
|
||||
req: PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
n_blocks,
|
||||
})
|
||||
}
|
||||
Tag::GetPage => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let blkno = buf.read_u32::<BigEndian>()?;
|
||||
let mut page = vec![0; 8192]; // TODO: use MaybeUninit
|
||||
buf.read_exact(&mut page)?;
|
||||
PagestreamBeMessage::GetPage(PagestreamGetPageResponse { page: page.into() })
|
||||
Self::GetPage(PagestreamGetPageResponse {
|
||||
req: PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
blkno,
|
||||
},
|
||||
page: page.into(),
|
||||
})
|
||||
}
|
||||
Tag::Error => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let mut msg = Vec::new();
|
||||
buf.read_until(0, &mut msg)?;
|
||||
let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
|
||||
let rust_str = cstring.to_str()?;
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
Self::Error(PagestreamErrorResponse {
|
||||
req: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
message: rust_str.to_owned(),
|
||||
})
|
||||
}
|
||||
Tag::DbSize => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let dbnode = buf.read_u32::<BigEndian>()?;
|
||||
let db_size = buf.read_i64::<BigEndian>()?;
|
||||
Self::DbSize(PagestreamDbSizeResponse { db_size })
|
||||
Self::DbSize(PagestreamDbSizeResponse {
|
||||
req: PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode,
|
||||
},
|
||||
db_size,
|
||||
})
|
||||
}
|
||||
Tag::GetSlruSegment => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let kind = buf.read_u8()?;
|
||||
let segno = buf.read_u32::<BigEndian>()?;
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
let mut segment = vec![0; n_blocks as usize * BLCKSZ as usize];
|
||||
buf.read_exact(&mut segment)?;
|
||||
Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
|
||||
req: PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind,
|
||||
segno,
|
||||
},
|
||||
segment: segment.into(),
|
||||
})
|
||||
}
|
||||
@@ -1780,8 +1991,11 @@ mod tests {
|
||||
// Test serialization/deserialization of PagestreamFeMessage
|
||||
let messages = vec![
|
||||
PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
@@ -1790,8 +2004,11 @@ mod tests {
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(4),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(4),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
@@ -1800,8 +2017,11 @@ mod tests {
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
@@ -1811,14 +2031,19 @@ mod tests {
|
||||
blkno: 7,
|
||||
}),
|
||||
PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
dbnode: 7,
|
||||
}),
|
||||
];
|
||||
for msg in messages {
|
||||
let bytes = msg.serialize();
|
||||
let reconstructed = PagestreamFeMessage::parse(&mut bytes.reader()).unwrap();
|
||||
let reconstructed =
|
||||
PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V3)
|
||||
.unwrap();
|
||||
assert!(msg == reconstructed);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,7 +60,7 @@ impl Client {
|
||||
) -> anyhow::Result<PagestreamClient> {
|
||||
let copy_both: tokio_postgres::CopyBothDuplex<bytes::Bytes> = self
|
||||
.client
|
||||
.copy_both_simple(&format!("pagestream_v2 {tenant_id} {timeline_id}"))
|
||||
.copy_both_simple(&format!("pagestream_v3 {tenant_id} {timeline_id}"))
|
||||
.await?;
|
||||
let Client {
|
||||
cancel_on_client_drop,
|
||||
|
||||
@@ -2,7 +2,7 @@ use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::keyspace::KeySpaceAccum;
|
||||
use pageserver_api::models::PagestreamGetPageRequest;
|
||||
use pageserver_api::models::{PagestreamGetPageRequest, PagestreamRequest};
|
||||
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -322,12 +322,15 @@ async fn main_impl(
|
||||
.to_rel_block()
|
||||
.expect("we filter non-rel-block keys out above");
|
||||
PagestreamGetPageRequest {
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
} else {
|
||||
r.timeline_lsn
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
} else {
|
||||
r.timeline_lsn
|
||||
},
|
||||
not_modified_since: r.timeline_lsn,
|
||||
},
|
||||
not_modified_since: r.timeline_lsn,
|
||||
rel: rel_tag,
|
||||
blkno: block_no,
|
||||
}
|
||||
|
||||
@@ -1854,6 +1854,7 @@ pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
|
||||
|
||||
#[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
|
||||
pub(crate) enum ComputeCommandKind {
|
||||
PageStreamV3,
|
||||
PageStreamV2,
|
||||
Basebackup,
|
||||
Fullbackup,
|
||||
|
||||
@@ -17,7 +17,7 @@ use pageserver_api::models::{
|
||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
|
||||
PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||
PagestreamProtocolVersion,
|
||||
PagestreamProtocolVersion, PagestreamRequest,
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use postgres_backend::{
|
||||
@@ -67,7 +67,7 @@ use crate::tenant::PageReconstructError;
|
||||
use crate::tenant::Timeline;
|
||||
use crate::{basebackup, timed_after_cancellation};
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use pageserver_api::reltag::SlruKind;
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
@@ -537,6 +537,23 @@ impl From<WaitLsnError> for QueryError {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
struct BatchedPageStreamError {
|
||||
req: PagestreamRequest,
|
||||
err: PageStreamError,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for BatchedPageStreamError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.err.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
struct BatchedGetPageRequest {
|
||||
req: PagestreamGetPageRequest,
|
||||
timer: SmgrOpTimer,
|
||||
}
|
||||
|
||||
enum BatchedFeMessage {
|
||||
Exists {
|
||||
span: Span,
|
||||
@@ -554,7 +571,7 @@ enum BatchedFeMessage {
|
||||
span: Span,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
effective_request_lsn: Lsn,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber, SmgrOpTimer); 1]>,
|
||||
pages: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
|
||||
},
|
||||
DbSize {
|
||||
span: Span,
|
||||
@@ -570,7 +587,7 @@ enum BatchedFeMessage {
|
||||
},
|
||||
RespondError {
|
||||
span: Span,
|
||||
error: PageStreamError,
|
||||
error: BatchedPageStreamError,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -595,7 +612,7 @@ impl BatchedFeMessage {
|
||||
BatchedFeMessage::GetPage { shard, pages, .. } => (
|
||||
shard,
|
||||
pages.len(),
|
||||
itertools::Either::Right(pages.iter_mut().map(|(_, _, timer)| timer)),
|
||||
itertools::Either::Right(pages.iter_mut().map(|p| &mut p.timer)),
|
||||
),
|
||||
BatchedFeMessage::RespondError { .. } => return Ok(()),
|
||||
};
|
||||
@@ -654,6 +671,7 @@ impl PageServerHandler {
|
||||
)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn pagestream_read_message<IO>(
|
||||
pgb: &mut PostgresBackendReader<IO>,
|
||||
tenant_id: TenantId,
|
||||
@@ -661,6 +679,7 @@ impl PageServerHandler {
|
||||
timeline_handles: &mut TimelineHandles,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
parent_span: Span,
|
||||
) -> Result<Option<BatchedFeMessage>, QueryError>
|
||||
where
|
||||
@@ -695,11 +714,12 @@ impl PageServerHandler {
|
||||
fail::fail_point!("ps::handle-pagerequest-message");
|
||||
|
||||
// parse request
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
let neon_fe_msg =
|
||||
PagestreamFeMessage::parse(&mut copy_data_bytes.reader(), protocol_version)?;
|
||||
|
||||
let batched_msg = match neon_fe_msg {
|
||||
PagestreamFeMessage::Exists(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn);
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.hdr.request_lsn);
|
||||
let shard = timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
@@ -715,7 +735,7 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::Nblocks(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn);
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.hdr.request_lsn);
|
||||
let shard = timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
@@ -731,7 +751,7 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::DbSize(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn);
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.hdr.request_lsn);
|
||||
let shard = timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
@@ -747,7 +767,7 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::GetSlruSegment(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn);
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.hdr.request_lsn);
|
||||
let shard = timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Zero)
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
@@ -762,25 +782,23 @@ impl PageServerHandler {
|
||||
req,
|
||||
}
|
||||
}
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
rel,
|
||||
blkno,
|
||||
}) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_page_at_lsn_request_batched", req_lsn = %request_lsn);
|
||||
PagestreamFeMessage::GetPage(req) => {
|
||||
let span = tracing::info_span!(parent: parent_span, "handle_get_page_at_lsn_request_batched", req_lsn = %req.hdr.request_lsn);
|
||||
|
||||
macro_rules! respond_error {
|
||||
($error:expr) => {{
|
||||
let error = BatchedFeMessage::RespondError {
|
||||
span,
|
||||
error: $error,
|
||||
error: BatchedPageStreamError {
|
||||
req: req.hdr,
|
||||
err: $error,
|
||||
},
|
||||
};
|
||||
Ok(Some(error))
|
||||
}};
|
||||
}
|
||||
|
||||
let key = rel_block_to_key(rel, blkno);
|
||||
let key = rel_block_to_key(req.rel, req.blkno);
|
||||
let shard = match timeline_handles
|
||||
.get(tenant_id, timeline_id, ShardSelector::Page(key))
|
||||
.instrument(span.clone()) // sets `shard_id` field
|
||||
@@ -814,8 +832,8 @@ impl PageServerHandler {
|
||||
|
||||
let effective_request_lsn = match Self::wait_or_get_last_lsn(
|
||||
&shard,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&shard.get_latest_gc_cutoff_lsn(),
|
||||
ctx,
|
||||
)
|
||||
@@ -831,7 +849,7 @@ impl PageServerHandler {
|
||||
span,
|
||||
shard,
|
||||
effective_request_lsn,
|
||||
pages: smallvec::smallvec![(rel, blkno, timer)],
|
||||
pages: smallvec::smallvec![BatchedGetPageRequest { req, timer }],
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -910,6 +928,7 @@ impl PageServerHandler {
|
||||
pgb_writer: &mut PostgresBackend<IO>,
|
||||
batch: BatchedFeMessage,
|
||||
cancel: &CancellationToken,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), QueryError>
|
||||
where
|
||||
@@ -917,7 +936,7 @@ impl PageServerHandler {
|
||||
{
|
||||
// invoke handler function
|
||||
let (handler_results, span): (
|
||||
Vec<Result<(PagestreamBeMessage, SmgrOpTimer), PageStreamError>>,
|
||||
Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>>,
|
||||
_,
|
||||
) = match batch {
|
||||
BatchedFeMessage::Exists {
|
||||
@@ -932,7 +951,8 @@ impl PageServerHandler {
|
||||
.handle_get_rel_exists_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
.map(|msg| (msg, timer))
|
||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
||||
span,
|
||||
)
|
||||
}
|
||||
@@ -948,7 +968,8 @@ impl PageServerHandler {
|
||||
.handle_get_nblocks_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
.map(|msg| (msg, timer))
|
||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
||||
span,
|
||||
)
|
||||
}
|
||||
@@ -990,7 +1011,8 @@ impl PageServerHandler {
|
||||
.handle_db_size_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
.map(|msg| (msg, timer))
|
||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
||||
span,
|
||||
)
|
||||
}
|
||||
@@ -1006,7 +1028,8 @@ impl PageServerHandler {
|
||||
.handle_get_slru_segment_request(&shard, &req, ctx)
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
.map(|msg| (msg, timer))],
|
||||
.map(|msg| (msg, timer))
|
||||
.map_err(|err| BatchedPageStreamError { err, req: req.hdr })],
|
||||
span,
|
||||
)
|
||||
}
|
||||
@@ -1022,7 +1045,7 @@ impl PageServerHandler {
|
||||
// Other handler errors are sent back as an error message and we stay in pagestream protocol.
|
||||
for handler_result in handler_results {
|
||||
let (response_msg, timer) = match handler_result {
|
||||
Err(e) => match &e {
|
||||
Err(e) => match &e.err {
|
||||
PageStreamError::Shutdown => {
|
||||
// If we fail to fulfil a request during shutdown, which may be _because_ of
|
||||
// shutdown, then do not send the error to the client. Instead just drop the
|
||||
@@ -1041,13 +1064,14 @@ impl PageServerHandler {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough. Do not log if shutting down, as the anyhow::Error
|
||||
// here includes cancellation which is not an error.
|
||||
let full = utils::error::report_compact_sources(&e);
|
||||
let full = utils::error::report_compact_sources(&e.err);
|
||||
span.in_scope(|| {
|
||||
error!("error reading relation or page version: {full:#}")
|
||||
});
|
||||
(
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
req: e.req,
|
||||
message: e.err.to_string(),
|
||||
}),
|
||||
None, // TODO: measure errors
|
||||
)
|
||||
@@ -1060,7 +1084,9 @@ impl PageServerHandler {
|
||||
// marshal & transmit response message
|
||||
//
|
||||
|
||||
pgb_writer.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
|
||||
pgb_writer.write_message_noflush(&BeMessage::CopyData(
|
||||
&response_msg.serialize(protocol_version),
|
||||
))?;
|
||||
|
||||
// We purposefully don't count flush time into the timer.
|
||||
//
|
||||
@@ -1123,7 +1149,7 @@ impl PageServerHandler {
|
||||
pgb: &mut PostgresBackend<IO>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
_protocol_version: PagestreamProtocolVersion,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
ctx: RequestContext,
|
||||
) -> Result<(), QueryError>
|
||||
where
|
||||
@@ -1163,6 +1189,7 @@ impl PageServerHandler {
|
||||
timeline_handles,
|
||||
request_span,
|
||||
pipelining_config,
|
||||
protocol_version,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1175,6 +1202,7 @@ impl PageServerHandler {
|
||||
timeline_id,
|
||||
timeline_handles,
|
||||
request_span,
|
||||
protocol_version,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
@@ -1201,6 +1229,7 @@ impl PageServerHandler {
|
||||
timeline_id: TimelineId,
|
||||
mut timeline_handles: TimelineHandles,
|
||||
request_span: Span,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
ctx: &RequestContext,
|
||||
) -> (
|
||||
(PostgresBackendReader<IO>, TimelineHandles),
|
||||
@@ -1218,6 +1247,7 @@ impl PageServerHandler {
|
||||
&mut timeline_handles,
|
||||
&cancel,
|
||||
ctx,
|
||||
protocol_version,
|
||||
request_span.clone(),
|
||||
)
|
||||
.await;
|
||||
@@ -1238,7 +1268,7 @@ impl PageServerHandler {
|
||||
}
|
||||
|
||||
let err = self
|
||||
.pagesteam_handle_batched_message(pgb_writer, msg, &cancel, ctx)
|
||||
.pagesteam_handle_batched_message(pgb_writer, msg, &cancel, protocol_version, ctx)
|
||||
.await;
|
||||
match err {
|
||||
Ok(()) => {}
|
||||
@@ -1261,6 +1291,7 @@ impl PageServerHandler {
|
||||
mut timeline_handles: TimelineHandles,
|
||||
request_span: Span,
|
||||
pipelining_config: PageServicePipeliningConfigPipelined,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
ctx: &RequestContext,
|
||||
) -> (
|
||||
(PostgresBackendReader<IO>, TimelineHandles),
|
||||
@@ -1358,6 +1389,7 @@ impl PageServerHandler {
|
||||
&mut timeline_handles,
|
||||
&cancel_batcher,
|
||||
&ctx,
|
||||
protocol_version,
|
||||
request_span.clone(),
|
||||
)
|
||||
.await;
|
||||
@@ -1403,8 +1435,14 @@ impl PageServerHandler {
|
||||
batch
|
||||
.throttle_and_record_start_processing(&self.cancel)
|
||||
.await?;
|
||||
self.pagesteam_handle_batched_message(pgb_writer, batch, &cancel, &ctx)
|
||||
.await?;
|
||||
self.pagesteam_handle_batched_message(
|
||||
pgb_writer,
|
||||
batch,
|
||||
&cancel,
|
||||
protocol_version,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -1578,8 +1616,8 @@ impl PageServerHandler {
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
req.request_lsn,
|
||||
req.not_modified_since,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&latest_gc_cutoff_lsn,
|
||||
ctx,
|
||||
)
|
||||
@@ -1590,6 +1628,7 @@ impl PageServerHandler {
|
||||
.await?;
|
||||
|
||||
Ok(PagestreamBeMessage::Exists(PagestreamExistsResponse {
|
||||
req: *req,
|
||||
exists,
|
||||
}))
|
||||
}
|
||||
@@ -1604,8 +1643,8 @@ impl PageServerHandler {
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
req.request_lsn,
|
||||
req.not_modified_since,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&latest_gc_cutoff_lsn,
|
||||
ctx,
|
||||
)
|
||||
@@ -1616,6 +1655,7 @@ impl PageServerHandler {
|
||||
.await?;
|
||||
|
||||
Ok(PagestreamBeMessage::Nblocks(PagestreamNblocksResponse {
|
||||
req: *req,
|
||||
n_blocks,
|
||||
}))
|
||||
}
|
||||
@@ -1630,8 +1670,8 @@ impl PageServerHandler {
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
req.request_lsn,
|
||||
req.not_modified_since,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&latest_gc_cutoff_lsn,
|
||||
ctx,
|
||||
)
|
||||
@@ -1643,6 +1683,7 @@ impl PageServerHandler {
|
||||
let db_size = total_blocks as i64 * BLCKSZ as i64;
|
||||
|
||||
Ok(PagestreamBeMessage::DbSize(PagestreamDbSizeResponse {
|
||||
req: *req,
|
||||
db_size,
|
||||
}))
|
||||
}
|
||||
@@ -1652,9 +1693,9 @@ impl PageServerHandler {
|
||||
&mut self,
|
||||
timeline: &Timeline,
|
||||
effective_lsn: Lsn,
|
||||
requests: smallvec::SmallVec<[(RelTag, BlockNumber, SmgrOpTimer); 1]>,
|
||||
requests: smallvec::SmallVec<[BatchedGetPageRequest; 1]>,
|
||||
ctx: &RequestContext,
|
||||
) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), PageStreamError>> {
|
||||
) -> Vec<Result<(PagestreamBeMessage, SmgrOpTimer), BatchedPageStreamError>> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
timeline
|
||||
@@ -1663,7 +1704,7 @@ impl PageServerHandler {
|
||||
|
||||
let results = timeline
|
||||
.get_rel_page_at_lsn_batched(
|
||||
requests.iter().map(|(reltag, blkno, _)| (reltag, blkno)),
|
||||
requests.iter().map(|p| (&p.req.rel, &p.req.blkno)),
|
||||
effective_lsn,
|
||||
ctx,
|
||||
)
|
||||
@@ -1675,16 +1716,20 @@ impl PageServerHandler {
|
||||
requests
|
||||
.into_iter()
|
||||
.zip(results.into_iter())
|
||||
.map(|((_, _, timer), res)| {
|
||||
.map(|(req, res)| {
|
||||
res.map(|page| {
|
||||
(
|
||||
PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse {
|
||||
req: req.req,
|
||||
page,
|
||||
}),
|
||||
timer,
|
||||
req.timer,
|
||||
)
|
||||
})
|
||||
.map_err(PageStreamError::from)
|
||||
.map_err(|e| BatchedPageStreamError {
|
||||
err: PageStreamError::from(e),
|
||||
req: req.req.hdr,
|
||||
})
|
||||
}),
|
||||
)
|
||||
}
|
||||
@@ -1699,8 +1744,8 @@ impl PageServerHandler {
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
timeline,
|
||||
req.request_lsn,
|
||||
req.not_modified_since,
|
||||
req.hdr.request_lsn,
|
||||
req.hdr.not_modified_since,
|
||||
&latest_gc_cutoff_lsn,
|
||||
ctx,
|
||||
)
|
||||
@@ -1711,7 +1756,7 @@ impl PageServerHandler {
|
||||
let segment = timeline.get_slru_segment(kind, req.segno, lsn, ctx).await?;
|
||||
|
||||
Ok(PagestreamBeMessage::GetSlruSegment(
|
||||
PagestreamGetSlruSegmentResponse { segment },
|
||||
PagestreamGetSlruSegmentResponse { req: *req, segment },
|
||||
))
|
||||
}
|
||||
|
||||
@@ -1906,6 +1951,7 @@ struct FullBackupCmd {
|
||||
struct PageStreamCmd {
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
}
|
||||
|
||||
/// `lease lsn tenant timeline lsn`
|
||||
@@ -1926,7 +1972,7 @@ enum PageServiceCmd {
|
||||
}
|
||||
|
||||
impl PageStreamCmd {
|
||||
fn parse(query: &str) -> anyhow::Result<Self> {
|
||||
fn parse(query: &str, protocol_version: PagestreamProtocolVersion) -> anyhow::Result<Self> {
|
||||
let parameters = query.split_whitespace().collect_vec();
|
||||
if parameters.len() != 2 {
|
||||
bail!(
|
||||
@@ -1941,6 +1987,7 @@ impl PageStreamCmd {
|
||||
Ok(Self {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
protocol_version,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -2078,7 +2125,14 @@ impl PageServiceCmd {
|
||||
bail!("cannot parse query: {query}")
|
||||
};
|
||||
match cmd.to_ascii_lowercase().as_str() {
|
||||
"pagestream_v2" => Ok(Self::PageStream(PageStreamCmd::parse(other)?)),
|
||||
"pagestream_v2" => Ok(Self::PageStream(PageStreamCmd::parse(
|
||||
other,
|
||||
PagestreamProtocolVersion::V2,
|
||||
)?)),
|
||||
"pagestream_v3" => Ok(Self::PageStream(PageStreamCmd::parse(
|
||||
other,
|
||||
PagestreamProtocolVersion::V3,
|
||||
)?)),
|
||||
"basebackup" => Ok(Self::BaseBackup(BaseBackupCmd::parse(other)?)),
|
||||
"fullbackup" => Ok(Self::FullBackup(FullBackupCmd::parse(other)?)),
|
||||
"lease" => {
|
||||
@@ -2160,25 +2214,21 @@ where
|
||||
PageServiceCmd::PageStream(PageStreamCmd {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
protocol_version,
|
||||
}) => {
|
||||
tracing::Span::current()
|
||||
.record("tenant_id", field::display(tenant_id))
|
||||
.record("timeline_id", field::display(timeline_id));
|
||||
|
||||
self.check_permission(Some(tenant_id))?;
|
||||
let command_kind = match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => ComputeCommandKind::PageStreamV2,
|
||||
PagestreamProtocolVersion::V3 => ComputeCommandKind::PageStreamV3,
|
||||
};
|
||||
COMPUTE_COMMANDS_COUNTERS.for_command(command_kind).inc();
|
||||
|
||||
COMPUTE_COMMANDS_COUNTERS
|
||||
.for_command(ComputeCommandKind::PageStreamV2)
|
||||
.inc();
|
||||
|
||||
self.handle_pagerequests(
|
||||
pgb,
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
PagestreamProtocolVersion::V2,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
self.handle_pagerequests(pgb, tenant_id, timeline_id, protocol_version, ctx)
|
||||
.await?;
|
||||
}
|
||||
PageServiceCmd::BaseBackup(BaseBackupCmd {
|
||||
tenant_id,
|
||||
@@ -2357,7 +2407,8 @@ mod tests {
|
||||
cmd,
|
||||
PageServiceCmd::PageStream(PageStreamCmd {
|
||||
tenant_id,
|
||||
timeline_id
|
||||
timeline_id,
|
||||
protocol_version: PagestreamProtocolVersion::V2,
|
||||
})
|
||||
);
|
||||
let cmd = PageServiceCmd::parse(&format!("basebackup {tenant_id} {timeline_id}")).unwrap();
|
||||
|
||||
@@ -556,6 +556,9 @@ pageserver_connect(shardno_t shard_no, int elevel)
|
||||
|
||||
switch (neon_protocol_version)
|
||||
{
|
||||
case 3:
|
||||
pagestream_query = psprintf("pagestream_v3 %s %s", neon_tenant, neon_timeline);
|
||||
break;
|
||||
case 2:
|
||||
pagestream_query = psprintf("pagestream_v2 %s %s", neon_tenant, neon_timeline);
|
||||
break;
|
||||
@@ -1135,9 +1138,9 @@ pg_init_libpagestore(void)
|
||||
"Version of compute<->page server protocol",
|
||||
NULL,
|
||||
&neon_protocol_version,
|
||||
2, /* use protocol version 2 */
|
||||
2, /* min */
|
||||
2, /* max */
|
||||
2, /* use protocol version 2 */
|
||||
2, /* min */
|
||||
3, /* max */
|
||||
PGC_SU_BACKEND,
|
||||
0, /* no flags required */
|
||||
NULL, NULL, NULL);
|
||||
|
||||
@@ -44,10 +44,15 @@ typedef enum
|
||||
T_NeonGetSlruSegmentResponse,
|
||||
} NeonMessageTag;
|
||||
|
||||
typedef uint64 NeonRequestId;
|
||||
|
||||
/* base struct for c-style inheritance */
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonRequestId reqid;
|
||||
XLogRecPtr lsn;
|
||||
XLogRecPtr not_modified_since;
|
||||
} NeonMessage;
|
||||
|
||||
#define messageTag(m) (((const NeonMessage *)(m))->tag)
|
||||
@@ -67,6 +72,7 @@ typedef enum {
|
||||
SLRU_MULTIXACT_OFFSETS
|
||||
} SlruKind;
|
||||
|
||||
|
||||
/*--
|
||||
* supertype of all the Neon*Request structs below.
|
||||
*
|
||||
@@ -87,37 +93,37 @@ typedef enum {
|
||||
*
|
||||
* These structs describe the V2 of these requests. (The old now-defunct V1
|
||||
* protocol contained just one LSN and a boolean 'latest' flag.)
|
||||
*
|
||||
* V3 version of protocol adds request ID to all requests. This request ID is also included in response
|
||||
* as well as other fields from requests, which allows to verify that we receive response for our request.
|
||||
* We copy fields from request to response to make checking more reliable: request ID is formed from process ID
|
||||
* and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
XLogRecPtr lsn;
|
||||
XLogRecPtr not_modified_since;
|
||||
} NeonRequest;
|
||||
typedef NeonMessage NeonRequest;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
NeonRequest hdr;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
} NeonExistsRequest;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
NeonRequest hdr;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
} NeonNblocksRequest;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
NeonRequest hdr;
|
||||
Oid dbNode;
|
||||
} NeonDbSizeRequest;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
NeonRequest hdr;
|
||||
NRelFileInfo rinfo;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blkno;
|
||||
@@ -125,32 +131,29 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonRequest req;
|
||||
SlruKind kind;
|
||||
int segno;
|
||||
NeonRequest hdr;
|
||||
SlruKind kind;
|
||||
int segno;
|
||||
} NeonGetSlruSegmentRequest;
|
||||
|
||||
/* supertype of all the Neon*Response structs below */
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
} NeonResponse;
|
||||
typedef NeonMessage NeonResponse;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonExistsRequest req;
|
||||
bool exists;
|
||||
} NeonExistsResponse;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonNblocksRequest req;
|
||||
uint32 n_blocks;
|
||||
} NeonNblocksResponse;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonGetPageRequest req;
|
||||
char page[FLEXIBLE_ARRAY_MEMBER];
|
||||
} NeonGetPageResponse;
|
||||
|
||||
@@ -158,21 +161,21 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonDbSizeRequest req;
|
||||
int64 db_size;
|
||||
} NeonDbSizeResponse;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
NeonResponse req;
|
||||
char message[FLEXIBLE_ARRAY_MEMBER]; /* null-terminated error
|
||||
* message */
|
||||
} NeonErrorResponse;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NeonMessageTag tag;
|
||||
int n_blocks;
|
||||
NeonGetSlruSegmentRequest req;
|
||||
int n_blocks;
|
||||
char data[BLCKSZ * SLRU_PAGES_PER_SEGMENT];
|
||||
} NeonGetSlruSegmentResponse;
|
||||
|
||||
|
||||
@@ -120,6 +120,9 @@ static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block
|
||||
|
||||
static BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
|
||||
|
||||
static uint32 local_request_counter;
|
||||
#define GENERATE_REQUEST_ID() (((NeonRequestId)MyProcPid << 32) | ++local_request_counter)
|
||||
|
||||
/*
|
||||
* Prefetch implementation:
|
||||
*
|
||||
@@ -188,15 +191,11 @@ typedef struct PrefetchRequest
|
||||
uint8 status; /* see PrefetchStatus for valid values */
|
||||
uint8 flags; /* see PrefetchRequestFlags */
|
||||
neon_request_lsns request_lsns;
|
||||
NeonRequestId reqid;
|
||||
NeonResponse *response; /* may be null */
|
||||
uint64 my_ring_index;
|
||||
} PrefetchRequest;
|
||||
|
||||
StaticAssertDecl(sizeof(PrefetchRequest) == 64,
|
||||
"We prefer to have a power-of-2 size for this struct. Please"
|
||||
" try to find an alternative solution before reaching to"
|
||||
" increase the expected size here");
|
||||
|
||||
/* prefetch buffer lookup hash table */
|
||||
|
||||
typedef struct PrfHashEntry
|
||||
@@ -365,6 +364,7 @@ compact_prefetch_buffers(void)
|
||||
target_slot->shard_no = source_slot->shard_no;
|
||||
target_slot->status = source_slot->status;
|
||||
target_slot->response = source_slot->response;
|
||||
target_slot->reqid = source_slot->reqid;
|
||||
target_slot->request_lsns = source_slot->request_lsns;
|
||||
target_slot->my_ring_index = empty_ring_index;
|
||||
|
||||
@@ -798,7 +798,8 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
|
||||
uint64 mySlotNo PG_USED_FOR_ASSERTS_ONLY = slot->my_ring_index;
|
||||
|
||||
NeonGetPageRequest request = {
|
||||
.req.tag = T_NeonGetPageRequest,
|
||||
.hdr.tag = T_NeonGetPageRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
/* lsn and not_modified_since are filled in below */
|
||||
.rinfo = BufTagGetNRelFileInfo(slot->buftag),
|
||||
.forknum = slot->buftag.forkNum,
|
||||
@@ -807,14 +808,16 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
|
||||
|
||||
Assert(mySlotNo == MyPState->ring_unused);
|
||||
|
||||
slot->reqid = request.hdr.reqid;
|
||||
|
||||
if (force_request_lsns)
|
||||
slot->request_lsns = *force_request_lsns;
|
||||
else
|
||||
neon_get_request_lsns(BufTagGetNRelFileInfo(slot->buftag),
|
||||
slot->buftag.forkNum, slot->buftag.blockNum,
|
||||
&slot->request_lsns, 1, NULL);
|
||||
request.req.lsn = slot->request_lsns.request_lsn;
|
||||
request.req.not_modified_since = slot->request_lsns.not_modified_since;
|
||||
request.hdr.lsn = slot->request_lsns.request_lsn;
|
||||
request.hdr.not_modified_since = slot->request_lsns.not_modified_since;
|
||||
|
||||
Assert(slot->response == NULL);
|
||||
Assert(slot->my_ring_index == MyPState->ring_unused);
|
||||
@@ -1102,6 +1105,12 @@ Retry:
|
||||
return min_ring_index;
|
||||
}
|
||||
|
||||
static bool
|
||||
equal_requests(NeonRequest* a, NeonRequest* b)
|
||||
{
|
||||
return a->reqid == b->reqid && a->lsn == b->lsn && a->not_modified_since == b->not_modified_since;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Note: this function can get canceled and use a long jump to the next catch
|
||||
@@ -1184,6 +1193,10 @@ nm_pack_request(NeonRequest *msg)
|
||||
initStringInfo(&s);
|
||||
|
||||
pq_sendbyte(&s, msg->tag);
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
pq_sendint64(&s, msg->reqid);
|
||||
}
|
||||
pq_sendint64(&s, msg->lsn);
|
||||
pq_sendint64(&s, msg->not_modified_since);
|
||||
|
||||
@@ -1261,8 +1274,16 @@ NeonResponse *
|
||||
nm_unpack_response(StringInfo s)
|
||||
{
|
||||
NeonMessageTag tag = pq_getmsgbyte(s);
|
||||
NeonResponse resp_hdr = {0}; /* make valgrind happy */
|
||||
NeonResponse *resp = NULL;
|
||||
|
||||
resp_hdr.tag = tag;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
resp_hdr.reqid = pq_getmsgint64(s);
|
||||
resp_hdr.lsn = pq_getmsgint64(s);
|
||||
resp_hdr.not_modified_since = pq_getmsgint64(s);
|
||||
}
|
||||
switch (tag)
|
||||
{
|
||||
/* pagestore -> pagestore_client */
|
||||
@@ -1270,7 +1291,14 @@ nm_unpack_response(StringInfo s)
|
||||
{
|
||||
NeonExistsResponse *msg_resp = palloc0(sizeof(NeonExistsResponse));
|
||||
|
||||
msg_resp->tag = tag;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
NInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
msg_resp->req.forknum = pq_getmsgbyte(s);
|
||||
}
|
||||
msg_resp->req.hdr = resp_hdr;
|
||||
msg_resp->exists = pq_getmsgbyte(s);
|
||||
pq_getmsgend(s);
|
||||
|
||||
@@ -1282,7 +1310,14 @@ nm_unpack_response(StringInfo s)
|
||||
{
|
||||
NeonNblocksResponse *msg_resp = palloc0(sizeof(NeonNblocksResponse));
|
||||
|
||||
msg_resp->tag = tag;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
NInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
msg_resp->req.forknum = pq_getmsgbyte(s);
|
||||
}
|
||||
msg_resp->req.hdr = resp_hdr;
|
||||
msg_resp->n_blocks = pq_getmsgint(s, 4);
|
||||
pq_getmsgend(s);
|
||||
|
||||
@@ -1295,12 +1330,20 @@ nm_unpack_response(StringInfo s)
|
||||
NeonGetPageResponse *msg_resp;
|
||||
|
||||
msg_resp = MemoryContextAllocZero(MyPState->bufctx, PS_GETPAGERESPONSE_SIZE);
|
||||
msg_resp->tag = tag;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
NInfoGetSpcOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetDbOid(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
NInfoGetRelNumber(msg_resp->req.rinfo) = pq_getmsgint(s, 4);
|
||||
msg_resp->req.forknum = pq_getmsgbyte(s);
|
||||
msg_resp->req.blkno = pq_getmsgint(s, 4);
|
||||
}
|
||||
msg_resp->req.hdr = resp_hdr;
|
||||
/* XXX: should be varlena */
|
||||
memcpy(msg_resp->page, pq_getmsgbytes(s, BLCKSZ), BLCKSZ);
|
||||
pq_getmsgend(s);
|
||||
|
||||
Assert(msg_resp->tag == T_NeonGetPageResponse);
|
||||
Assert(msg_resp->req.hdr.tag == T_NeonGetPageResponse);
|
||||
|
||||
resp = (NeonResponse *) msg_resp;
|
||||
break;
|
||||
@@ -1310,7 +1353,11 @@ nm_unpack_response(StringInfo s)
|
||||
{
|
||||
NeonDbSizeResponse *msg_resp = palloc0(sizeof(NeonDbSizeResponse));
|
||||
|
||||
msg_resp->tag = tag;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
msg_resp->req.dbNode = pq_getmsgint(s, 4);
|
||||
}
|
||||
msg_resp->req.hdr = resp_hdr;
|
||||
msg_resp->db_size = pq_getmsgint64(s);
|
||||
pq_getmsgend(s);
|
||||
|
||||
@@ -1328,7 +1375,7 @@ nm_unpack_response(StringInfo s)
|
||||
msglen = strlen(msgtext);
|
||||
|
||||
msg_resp = palloc0(sizeof(NeonErrorResponse) + msglen + 1);
|
||||
msg_resp->tag = tag;
|
||||
msg_resp->req = resp_hdr;
|
||||
memcpy(msg_resp->message, msgtext, msglen + 1);
|
||||
pq_getmsgend(s);
|
||||
|
||||
@@ -1339,9 +1386,17 @@ nm_unpack_response(StringInfo s)
|
||||
case T_NeonGetSlruSegmentResponse:
|
||||
{
|
||||
NeonGetSlruSegmentResponse *msg_resp;
|
||||
int n_blocks = pq_getmsgint(s, 4);
|
||||
msg_resp = palloc(sizeof(NeonGetSlruSegmentResponse));
|
||||
msg_resp->tag = tag;
|
||||
int n_blocks;
|
||||
msg_resp = palloc0(sizeof(NeonGetSlruSegmentResponse));
|
||||
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
msg_resp->req.kind = pq_getmsgbyte(s);
|
||||
msg_resp->req.segno = pq_getmsgint(s, 4);
|
||||
}
|
||||
msg_resp->req.hdr = resp_hdr;
|
||||
|
||||
n_blocks = pq_getmsgint(s, 4);
|
||||
msg_resp->n_blocks = n_blocks;
|
||||
memcpy(msg_resp->data, pq_getmsgbytes(s, n_blocks * BLCKSZ), n_blocks * BLCKSZ);
|
||||
pq_getmsgend(s);
|
||||
@@ -1386,8 +1441,8 @@ nm_to_string(NeonMessage *msg)
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonExistsRequest\"");
|
||||
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
|
||||
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
|
||||
appendStringInfoChar(&s, '}');
|
||||
break;
|
||||
}
|
||||
@@ -1399,8 +1454,8 @@ nm_to_string(NeonMessage *msg)
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonNblocksRequest\"");
|
||||
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
|
||||
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
|
||||
appendStringInfoChar(&s, '}');
|
||||
break;
|
||||
}
|
||||
@@ -1413,8 +1468,8 @@ nm_to_string(NeonMessage *msg)
|
||||
appendStringInfo(&s, ", \"rinfo\": \"%u/%u/%u\"", RelFileInfoFmt(msg_req->rinfo));
|
||||
appendStringInfo(&s, ", \"forknum\": %d", msg_req->forknum);
|
||||
appendStringInfo(&s, ", \"blkno\": %u", msg_req->blkno);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
|
||||
appendStringInfoChar(&s, '}');
|
||||
break;
|
||||
}
|
||||
@@ -1424,8 +1479,8 @@ nm_to_string(NeonMessage *msg)
|
||||
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonDbSizeRequest\"");
|
||||
appendStringInfo(&s, ", \"dbnode\": \"%u\"", msg_req->dbNode);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
|
||||
appendStringInfoChar(&s, '}');
|
||||
break;
|
||||
}
|
||||
@@ -1436,8 +1491,8 @@ nm_to_string(NeonMessage *msg)
|
||||
appendStringInfoString(&s, "{\"type\": \"NeonGetSlruSegmentRequest\"");
|
||||
appendStringInfo(&s, ", \"kind\": %u", msg_req->kind);
|
||||
appendStringInfo(&s, ", \"segno\": %u", msg_req->segno);
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->req.not_modified_since));
|
||||
appendStringInfo(&s, ", \"lsn\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.lsn));
|
||||
appendStringInfo(&s, ", \"not_modified_since\": \"%X/%X\"", LSN_FORMAT_ARGS(msg_req->hdr.not_modified_since));
|
||||
appendStringInfoChar(&s, '}');
|
||||
break;
|
||||
}
|
||||
@@ -2312,39 +2367,64 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
|
||||
REL_METADATA_PSEUDO_BLOCKNO, &request_lsns, 1, NULL);
|
||||
{
|
||||
NeonExistsRequest request = {
|
||||
.req.tag = T_NeonExistsRequest,
|
||||
.req.lsn = request_lsns.request_lsn,
|
||||
.req.not_modified_since = request_lsns.not_modified_since,
|
||||
.hdr.tag = T_NeonExistsRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsns.request_lsn,
|
||||
.hdr.not_modified_since = request_lsns.not_modified_since,
|
||||
.rinfo = InfoFromSMgrRel(reln),
|
||||
.forknum = forkNum
|
||||
};
|
||||
|
||||
resp = page_server_request(&request);
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonExistsResponse:
|
||||
{
|
||||
NeonExistsResponse* exists_resp = (NeonExistsResponse *) resp;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr) ||
|
||||
!RelFileInfoEquals(exists_resp->req.rinfo, request.rinfo) ||
|
||||
exists_resp->req.forknum != request.forknum)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to exits request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(exists_resp->req.rinfo), exists_resp->req.forknum,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), request.forknum);
|
||||
}
|
||||
}
|
||||
exists = exists_resp->exists;
|
||||
break;
|
||||
}
|
||||
case T_NeonErrorResponse:
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match exists request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected Exists (0x%02x) or Error (0x%02x) response to ExistsRequest, but got 0x%02x",
|
||||
T_NeonExistsResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
pfree(resp);
|
||||
}
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonExistsResponse:
|
||||
exists = ((NeonExistsResponse *) resp)->exists;
|
||||
break;
|
||||
|
||||
case T_NeonErrorResponse:
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "could not read relation existence of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forkNum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected Exists (0x%02x) or Error (0x%02x) response to ExistsRequest, but got 0x%02x",
|
||||
T_NeonExistsResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
pfree(resp);
|
||||
return exists;
|
||||
}
|
||||
|
||||
@@ -2952,15 +3032,43 @@ Retry:
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonGetPageResponse:
|
||||
memcpy(buffer, ((NeonGetPageResponse *) resp)->page, BLCKSZ);
|
||||
{
|
||||
NeonGetPageResponse* getpage_resp = (NeonGetPageResponse *) resp;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (resp->reqid != slot->reqid ||
|
||||
resp->lsn != slot->request_lsns.request_lsn ||
|
||||
resp->not_modified_since != slot->request_lsns.not_modified_since ||
|
||||
!RelFileInfoEquals(getpage_resp->req.rinfo, rinfo) ||
|
||||
getpage_resp->req.forknum != forkNum ||
|
||||
getpage_resp->req.blkno != base_blockno + i)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u} to get page request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u, block=%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(getpage_resp->req.rinfo), getpage_resp->req.forknum, getpage_resp->req.blkno,
|
||||
slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since), RelFileInfoFmt(rinfo), forkNum, base_blockno + i);
|
||||
}
|
||||
}
|
||||
memcpy(buffer, getpage_resp->page, BLCKSZ);
|
||||
lfc_write(rinfo, forkNum, blockno, buffer);
|
||||
break;
|
||||
|
||||
}
|
||||
case T_NeonErrorResponse:
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (resp->reqid != slot->reqid ||
|
||||
resp->lsn != slot->request_lsns.request_lsn ||
|
||||
resp->not_modified_since != slot->request_lsns.not_modified_since)
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
slot->reqid, LSN_FORMAT_ARGS(slot->request_lsns.request_lsn), LSN_FORMAT_ARGS(slot->request_lsns.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[shard %d] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
slot->shard_no, blockno, RelFileInfoFmt(rinfo),
|
||||
errmsg(NEON_TAG "[shard %d, reqid %lx] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
slot->shard_no, resp->reqid, blockno, RelFileInfoFmt(rinfo),
|
||||
forkNum, LSN_FORMAT_ARGS(reqlsns->effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
@@ -3443,47 +3551,72 @@ neon_nblocks(SMgrRelation reln, ForkNumber forknum)
|
||||
|
||||
{
|
||||
NeonNblocksRequest request = {
|
||||
.req.tag = T_NeonNblocksRequest,
|
||||
.req.lsn = request_lsns.request_lsn,
|
||||
.req.not_modified_since = request_lsns.not_modified_since,
|
||||
.hdr.tag = T_NeonNblocksRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsns.request_lsn,
|
||||
.hdr.not_modified_since = request_lsns.not_modified_since,
|
||||
.rinfo = InfoFromSMgrRel(reln),
|
||||
.forknum = forknum,
|
||||
};
|
||||
|
||||
resp = page_server_request(&request);
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonNblocksResponse:
|
||||
{
|
||||
NeonNblocksResponse * relsize_resp = (NeonNblocksResponse *) resp;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr) ||
|
||||
!RelFileInfoEquals(relsize_resp->req.rinfo, request.rinfo) ||
|
||||
relsize_resp->req.forknum != forknum)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u} to get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, rel=%u/%u/%u.%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), RelFileInfoFmt(relsize_resp->req.rinfo), relsize_resp->req.forknum,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), RelFileInfoFmt(request.rinfo), forknum);
|
||||
}
|
||||
}
|
||||
n_blocks = relsize_resp->n_blocks;
|
||||
break;
|
||||
}
|
||||
case T_NeonErrorResponse:
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get relsize request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected Nblocks (0x%02x) or Error (0x%02x) response to NblocksRequest, but got 0x%02x",
|
||||
T_NeonNblocksResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
|
||||
|
||||
neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn),
|
||||
n_blocks);
|
||||
|
||||
pfree(resp);
|
||||
}
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonNblocksResponse:
|
||||
n_blocks = ((NeonNblocksResponse *) resp)->n_blocks;
|
||||
break;
|
||||
|
||||
case T_NeonErrorResponse:
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "could not read relation size of rel %u/%u/%u.%u from page server at lsn %X/%08X",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected Nblocks (0x%02x) or Error (0x%02x) response to NblocksRequest, but got 0x%02x",
|
||||
T_NeonNblocksResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
update_cached_relsize(InfoFromSMgrRel(reln), forknum, n_blocks);
|
||||
|
||||
neon_log(SmgrTrace, "neon_nblocks: rel %u/%u/%u fork %u (request LSN %X/%08X): %u blocks",
|
||||
RelFileInfoFmt(InfoFromSMgrRel(reln)),
|
||||
forknum,
|
||||
LSN_FORMAT_ARGS(request_lsns.effective_request_lsn),
|
||||
n_blocks);
|
||||
|
||||
pfree(resp);
|
||||
return n_blocks;
|
||||
}
|
||||
|
||||
@@ -3503,40 +3636,64 @@ neon_dbsize(Oid dbNode)
|
||||
|
||||
{
|
||||
NeonDbSizeRequest request = {
|
||||
.req.tag = T_NeonDbSizeRequest,
|
||||
.req.lsn = request_lsns.request_lsn,
|
||||
.req.not_modified_since = request_lsns.not_modified_since,
|
||||
.hdr.tag = T_NeonDbSizeRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsns.request_lsn,
|
||||
.hdr.not_modified_since = request_lsns.not_modified_since,
|
||||
.dbNode = dbNode,
|
||||
};
|
||||
|
||||
resp = page_server_request(&request);
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonDbSizeResponse:
|
||||
{
|
||||
NeonDbSizeResponse* dbsize_resp = (NeonDbSizeResponse *) resp;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr) ||
|
||||
dbsize_resp->req.dbNode != dbNode)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, dbNode=%u} to get DB size request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, dbNode=%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), dbsize_resp->req.dbNode,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), dbNode);
|
||||
}
|
||||
}
|
||||
db_size = dbsize_resp->db_size;
|
||||
break;
|
||||
}
|
||||
case T_NeonErrorResponse:
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get DB size request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read db size of db %u from page server at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected DbSize (0x%02x) or Error (0x%02x) response to DbSizeRequest, but got 0x%02x",
|
||||
T_NeonDbSizeResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
|
||||
neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn), db_size);
|
||||
|
||||
pfree(resp);
|
||||
}
|
||||
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonDbSizeResponse:
|
||||
db_size = ((NeonDbSizeResponse *) resp)->db_size;
|
||||
break;
|
||||
|
||||
case T_NeonErrorResponse:
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "could not read db size of db %u from page server at lsn %X/%08X",
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn)),
|
||||
errdetail("page server returned error: %s",
|
||||
((NeonErrorResponse *) resp)->message)));
|
||||
break;
|
||||
|
||||
default:
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Expected DbSize (0x%02x) or Error (0x%02x) response to DbSizeRequest, but got 0x%02x",
|
||||
T_NeonDbSizeResponse, T_NeonErrorResponse, resp->tag);
|
||||
}
|
||||
|
||||
neon_log(SmgrTrace, "neon_dbsize: db %u (request LSN %X/%08X): %ld bytes",
|
||||
dbNode, LSN_FORMAT_ARGS(request_lsns.effective_request_lsn), db_size);
|
||||
|
||||
pfree(resp);
|
||||
return db_size;
|
||||
}
|
||||
|
||||
@@ -3868,16 +4025,17 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
|
||||
return -1;
|
||||
|
||||
request = (NeonGetSlruSegmentRequest) {
|
||||
.req.tag = T_NeonGetSlruSegmentRequest,
|
||||
.req.lsn = request_lsn,
|
||||
.req.not_modified_since = not_modified_since,
|
||||
.hdr.tag = T_NeonGetSlruSegmentRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsn,
|
||||
.hdr.not_modified_since = not_modified_since,
|
||||
.kind = kind,
|
||||
.segno = segno
|
||||
};
|
||||
|
||||
do
|
||||
{
|
||||
while (!page_server->send(shard_no, &request.req) || !page_server->flush(shard_no));
|
||||
while (!page_server->send(shard_no, &request.hdr) || !page_server->flush(shard_no));
|
||||
|
||||
consume_prefetch_responses();
|
||||
|
||||
@@ -3887,14 +4045,38 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
|
||||
switch (resp->tag)
|
||||
{
|
||||
case T_NeonGetSlruSegmentResponse:
|
||||
n_blocks = ((NeonGetSlruSegmentResponse *) resp)->n_blocks;
|
||||
memcpy(buffer, ((NeonGetSlruSegmentResponse *) resp)->data, n_blocks*BLCKSZ);
|
||||
{
|
||||
NeonGetSlruSegmentResponse* slru_resp = (NeonGetSlruSegmentResponse *) resp;
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr) ||
|
||||
slru_resp->req.kind != kind ||
|
||||
slru_resp->req.segno != segno)
|
||||
{
|
||||
NEON_PANIC_CONNECTION_STATE(-1, PANIC,
|
||||
"Unexpect response {reqid=%lx,lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u} to get SLRU segment request {reqid=%lx,lsn=%X/%08X, since=%X/%08X, kind=%u, segno=%u}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since), slru_resp->req.kind, slru_resp->req.segno,
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since), kind, segno);
|
||||
}
|
||||
}
|
||||
n_blocks = slru_resp->n_blocks;
|
||||
memcpy(buffer, slru_resp->data, n_blocks*BLCKSZ);
|
||||
break;
|
||||
|
||||
}
|
||||
case T_NeonErrorResponse:
|
||||
if (neon_protocol_version >= 3)
|
||||
{
|
||||
if (!equal_requests(resp, &request.hdr))
|
||||
{
|
||||
elog(WARNING, NEON_TAG "Error message {reqid=%lx,lsn=%X/%08X, since=%X/%08X} doesn't match get SLRU segment request {reqid=%lx,lsn=%X/%08X, since=%X/%08X}",
|
||||
resp->reqid, LSN_FORMAT_ARGS(resp->lsn), LSN_FORMAT_ARGS(resp->not_modified_since),
|
||||
request.hdr.reqid, LSN_FORMAT_ARGS(request.hdr.lsn), LSN_FORMAT_ARGS(request.hdr.not_modified_since));
|
||||
}
|
||||
}
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_IO_ERROR),
|
||||
errmsg(NEON_TAG "could not read SLRU %d segment %d at lsn %X/%08X",
|
||||
errmsg(NEON_TAG "[reqid %lx] could not read SLRU %d segment %d at lsn %X/%08X",
|
||||
resp->reqid,
|
||||
kind,
|
||||
segno,
|
||||
LSN_FORMAT_ARGS(request_lsn)),
|
||||
@@ -4033,8 +4215,9 @@ neon_extend_rel_size(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
NeonResponse *response;
|
||||
NeonNblocksResponse *nbresponse;
|
||||
NeonNblocksRequest request = {
|
||||
.req = (NeonRequest) {
|
||||
.hdr = (NeonRequest) {
|
||||
.tag = T_NeonNblocksRequest,
|
||||
.reqid = GENERATE_REQUEST_ID(),
|
||||
.lsn = end_recptr,
|
||||
.not_modified_since = end_recptr,
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user