chore(pageserver): categorize basebackup errors (#7523)

close https://github.com/neondatabase/neon/issues/7391

## Summary of changes

Categorize basebackup error into two types: server error and client
error. This makes it easier to set up alerts.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z
2024-05-01 12:31:59 -04:00
committed by GitHub
parent 26e6ff8ba6
commit 5558457c84
2 changed files with 166 additions and 59 deletions

View File

@@ -10,7 +10,7 @@
//! This module is responsible for creation of such tarball
//! from data stored in object storage.
//!
use anyhow::{anyhow, bail, ensure, Context};
use anyhow::{anyhow, Context};
use bytes::{BufMut, Bytes, BytesMut};
use fail::fail_point;
use pageserver_api::key::{key_to_slru_block, Key};
@@ -38,6 +38,14 @@ use postgres_ffi::PG_TLI;
use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
use utils::lsn::Lsn;
#[derive(Debug, thiserror::Error)]
pub enum BasebackupError {
#[error("basebackup pageserver error {0:#}")]
Server(#[from] anyhow::Error),
#[error("basebackup client error {0:#}")]
Client(#[source] io::Error),
}
/// Create basebackup with non-rel data in it.
/// Only include relational data if 'full_backup' is true.
///
@@ -53,7 +61,7 @@ pub async fn send_basebackup_tarball<'a, W>(
prev_lsn: Option<Lsn>,
full_backup: bool,
ctx: &'a RequestContext,
) -> anyhow::Result<()>
) -> Result<(), BasebackupError>
where
W: AsyncWrite + Send + Sync + Unpin,
{
@@ -92,8 +100,10 @@ where
// Consolidate the derived and the provided prev_lsn values
let prev_lsn = if let Some(provided_prev_lsn) = prev_lsn {
if backup_prev != Lsn(0) {
ensure!(backup_prev == provided_prev_lsn);
if backup_prev != Lsn(0) && backup_prev != provided_prev_lsn {
return Err(BasebackupError::Server(anyhow!(
"backup_prev {backup_prev} != provided_prev_lsn {provided_prev_lsn}"
)));
}
provided_prev_lsn
} else {
@@ -159,15 +169,26 @@ where
}
}
async fn add_block(&mut self, key: &Key, block: Bytes) -> anyhow::Result<()> {
async fn add_block(&mut self, key: &Key, block: Bytes) -> Result<(), BasebackupError> {
let (kind, segno, _) = key_to_slru_block(*key)?;
match kind {
SlruKind::Clog => {
ensure!(block.len() == BLCKSZ as usize || block.len() == BLCKSZ as usize + 8);
if !(block.len() == BLCKSZ as usize || block.len() == BLCKSZ as usize + 8) {
return Err(BasebackupError::Server(anyhow!(
"invalid SlruKind::Clog record: block.len()={}",
block.len()
)));
}
}
SlruKind::MultiXactMembers | SlruKind::MultiXactOffsets => {
ensure!(block.len() == BLCKSZ as usize);
if block.len() != BLCKSZ as usize {
return Err(BasebackupError::Server(anyhow!(
"invalid {:?} record: block.len()={}",
kind,
block.len()
)));
}
}
}
@@ -194,12 +215,15 @@ where
Ok(())
}
async fn flush(&mut self) -> anyhow::Result<()> {
async fn flush(&mut self) -> Result<(), BasebackupError> {
let nblocks = self.buf.len() / BLCKSZ as usize;
let (kind, segno) = self.current_segment.take().unwrap();
let segname = format!("{}/{:>04X}", kind.to_str(), segno);
let header = new_tar_header(&segname, self.buf.len() as u64)?;
self.ar.append(&header, self.buf.as_slice()).await?;
self.ar
.append(&header, self.buf.as_slice())
.await
.map_err(BasebackupError::Client)?;
self.total_blocks += nblocks;
debug!("Added to basebackup slru {} relsize {}", segname, nblocks);
@@ -209,7 +233,7 @@ where
Ok(())
}
async fn finish(mut self) -> anyhow::Result<()> {
async fn finish(mut self) -> Result<(), BasebackupError> {
let res = if self.current_segment.is_none() || self.buf.is_empty() {
Ok(())
} else {
@@ -226,7 +250,7 @@ impl<'a, W> Basebackup<'a, W>
where
W: AsyncWrite + Send + Sync + Unpin,
{
async fn send_tarball(mut self) -> anyhow::Result<()> {
async fn send_tarball(mut self) -> Result<(), BasebackupError> {
// TODO include checksum
let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;
@@ -262,7 +286,8 @@ where
let slru_partitions = self
.timeline
.get_slru_keyspace(Version::Lsn(self.lsn), self.ctx)
.await?
.await
.map_err(|e| BasebackupError::Server(e.into()))?
.partition(
self.timeline.get_shard_identity(),
Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64,
@@ -271,10 +296,15 @@ where
let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar);
for part in slru_partitions.parts {
let blocks = self.timeline.get_vectored(part, self.lsn, self.ctx).await?;
let blocks = self
.timeline
.get_vectored(part, self.lsn, self.ctx)
.await
.map_err(|e| BasebackupError::Server(e.into()))?;
for (key, block) in blocks {
slru_builder.add_block(&key, block?).await?;
let block = block.map_err(|e| BasebackupError::Server(e.into()))?;
slru_builder.add_block(&key, block).await?;
}
}
slru_builder.finish().await?;
@@ -282,8 +312,11 @@ where
let mut min_restart_lsn: Lsn = Lsn::MAX;
// Create tablespace directories
for ((spcnode, dbnode), has_relmap_file) in
self.timeline.list_dbdirs(self.lsn, self.ctx).await?
for ((spcnode, dbnode), has_relmap_file) in self
.timeline
.list_dbdirs(self.lsn, self.ctx)
.await
.map_err(|e| BasebackupError::Server(e.into()))?
{
self.add_dbdir(spcnode, dbnode, has_relmap_file).await?;
@@ -292,7 +325,8 @@ where
let rels = self
.timeline
.list_rels(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx)
.await?;
.await
.map_err(|e| BasebackupError::Server(e.into()))?;
for &rel in rels.iter() {
// Send init fork as main fork to provide well formed empty
// contents of UNLOGGED relations. Postgres copies it in
@@ -315,7 +349,12 @@ where
}
}
for (path, content) in self.timeline.list_aux_files(self.lsn, self.ctx).await? {
for (path, content) in self
.timeline
.list_aux_files(self.lsn, self.ctx)
.await
.map_err(|e| BasebackupError::Server(e.into()))?
{
if path.starts_with("pg_replslot") {
let offs = pg_constants::REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN;
let restart_lsn = Lsn(u64::from_le_bytes(
@@ -346,34 +385,41 @@ where
for xid in self
.timeline
.list_twophase_files(self.lsn, self.ctx)
.await?
.await
.map_err(|e| BasebackupError::Server(e.into()))?
{
self.add_twophase_file(xid).await?;
}
fail_point!("basebackup-before-control-file", |_| {
bail!("failpoint basebackup-before-control-file")
Err(BasebackupError::Server(anyhow!(
"failpoint basebackup-before-control-file"
)))
});
// Generate pg_control and bootstrap WAL segment.
self.add_pgcontrol_file().await?;
self.ar.finish().await?;
self.ar.finish().await.map_err(BasebackupError::Client)?;
debug!("all tarred up!");
Ok(())
}
/// Add contents of relfilenode `src`, naming it as `dst`.
async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> anyhow::Result<()> {
async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> Result<(), BasebackupError> {
let nblocks = self
.timeline
.get_rel_size(src, Version::Lsn(self.lsn), self.ctx)
.await?;
.await
.map_err(|e| BasebackupError::Server(e.into()))?;
// If the relation is empty, create an empty file
if nblocks == 0 {
let file_name = dst.to_segfile_name(0);
let header = new_tar_header(&file_name, 0)?;
self.ar.append(&header, &mut io::empty()).await?;
self.ar
.append(&header, &mut io::empty())
.await
.map_err(BasebackupError::Client)?;
return Ok(());
}
@@ -388,13 +434,17 @@ where
let img = self
.timeline
.get_rel_page_at_lsn(src, blknum, Version::Lsn(self.lsn), self.ctx)
.await?;
.await
.map_err(|e| BasebackupError::Server(e.into()))?;
segment_data.extend_from_slice(&img[..]);
}
let file_name = dst.to_segfile_name(seg as u32);
let header = new_tar_header(&file_name, segment_data.len() as u64)?;
self.ar.append(&header, segment_data.as_slice()).await?;
self.ar
.append(&header, segment_data.as_slice())
.await
.map_err(BasebackupError::Client)?;
seg += 1;
startblk = endblk;
@@ -414,20 +464,22 @@ where
spcnode: u32,
dbnode: u32,
has_relmap_file: bool,
) -> anyhow::Result<()> {
) -> Result<(), BasebackupError> {
let relmap_img = if has_relmap_file {
let img = self
.timeline
.get_relmap_file(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx)
.await?;
.await
.map_err(|e| BasebackupError::Server(e.into()))?;
ensure!(
img.len()
== dispatch_pgversion!(
self.timeline.pg_version,
pgv::bindings::SIZEOF_RELMAPFILE
)
);
if img.len()
!= dispatch_pgversion!(self.timeline.pg_version, pgv::bindings::SIZEOF_RELMAPFILE)
{
return Err(BasebackupError::Server(anyhow!(
"img.len() != SIZE_OF_RELMAPFILE, img.len()={}",
img.len(),
)));
}
Some(img)
} else {
@@ -440,14 +492,20 @@ where
ver => format!("{ver}\x0A"),
};
let header = new_tar_header("PG_VERSION", pg_version_str.len() as u64)?;
self.ar.append(&header, pg_version_str.as_bytes()).await?;
self.ar
.append(&header, pg_version_str.as_bytes())
.await
.map_err(BasebackupError::Client)?;
info!("timeline.pg_version {}", self.timeline.pg_version);
if let Some(img) = relmap_img {
// filenode map for global tablespace
let header = new_tar_header("global/pg_filenode.map", img.len() as u64)?;
self.ar.append(&header, &img[..]).await?;
self.ar
.append(&header, &img[..])
.await
.map_err(BasebackupError::Client)?;
} else {
warn!("global/pg_filenode.map is missing");
}
@@ -466,18 +524,26 @@ where
&& self
.timeline
.list_rels(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx)
.await?
.await
.map_err(|e| BasebackupError::Server(e.into()))?
.is_empty()
{
return Ok(());
}
// User defined tablespaces are not supported
ensure!(spcnode == DEFAULTTABLESPACE_OID);
if spcnode != DEFAULTTABLESPACE_OID {
return Err(BasebackupError::Server(anyhow!(
"spcnode != DEFAULTTABLESPACE_OID, spcnode={spcnode}"
)));
}
// Append dir path for each database
let path = format!("base/{}", dbnode);
let header = new_tar_header_dir(&path)?;
self.ar.append(&header, &mut io::empty()).await?;
self.ar
.append(&header, &mut io::empty())
.await
.map_err(BasebackupError::Client)?;
if let Some(img) = relmap_img {
let dst_path = format!("base/{}/PG_VERSION", dbnode);
@@ -487,11 +553,17 @@ where
ver => format!("{ver}\x0A"),
};
let header = new_tar_header(&dst_path, pg_version_str.len() as u64)?;
self.ar.append(&header, pg_version_str.as_bytes()).await?;
self.ar
.append(&header, pg_version_str.as_bytes())
.await
.map_err(BasebackupError::Client)?;
let relmap_path = format!("base/{}/pg_filenode.map", dbnode);
let header = new_tar_header(&relmap_path, img.len() as u64)?;
self.ar.append(&header, &img[..]).await?;
self.ar
.append(&header, &img[..])
.await
.map_err(BasebackupError::Client)?;
}
};
Ok(())
@@ -500,11 +572,12 @@ where
//
// Extract twophase state files
//
async fn add_twophase_file(&mut self, xid: TransactionId) -> anyhow::Result<()> {
async fn add_twophase_file(&mut self, xid: TransactionId) -> Result<(), BasebackupError> {
let img = self
.timeline
.get_twophase_file(xid, self.lsn, self.ctx)
.await?;
.await
.map_err(|e| BasebackupError::Server(e.into()))?;
let mut buf = BytesMut::new();
buf.extend_from_slice(&img[..]);
@@ -512,7 +585,10 @@ where
buf.put_u32_le(crc);
let path = format!("pg_twophase/{:>08X}", xid);
let header = new_tar_header(&path, buf.len() as u64)?;
self.ar.append(&header, &buf[..]).await?;
self.ar
.append(&header, &buf[..])
.await
.map_err(BasebackupError::Client)?;
Ok(())
}
@@ -521,24 +597,28 @@ where
// Add generated pg_control file and bootstrap WAL segment.
// Also send zenith.signal file with extra bootstrap data.
//
async fn add_pgcontrol_file(&mut self) -> anyhow::Result<()> {
async fn add_pgcontrol_file(&mut self) -> Result<(), BasebackupError> {
// add zenith.signal file
let mut zenith_signal = String::new();
if self.prev_record_lsn == Lsn(0) {
if self.lsn == self.timeline.get_ancestor_lsn() {
write!(zenith_signal, "PREV LSN: none")?;
write!(zenith_signal, "PREV LSN: none")
.map_err(|e| BasebackupError::Server(e.into()))?;
} else {
write!(zenith_signal, "PREV LSN: invalid")?;
write!(zenith_signal, "PREV LSN: invalid")
.map_err(|e| BasebackupError::Server(e.into()))?;
}
} else {
write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)?;
write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)
.map_err(|e| BasebackupError::Server(e.into()))?;
}
self.ar
.append(
&new_tar_header("zenith.signal", zenith_signal.len() as u64)?,
zenith_signal.as_bytes(),
)
.await?;
.await
.map_err(BasebackupError::Client)?;
let checkpoint_bytes = self
.timeline
@@ -560,7 +640,10 @@ where
//send pg_control
let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?;
self.ar.append(&header, &pg_control_bytes[..]).await?;
self.ar
.append(&header, &pg_control_bytes[..])
.await
.map_err(BasebackupError::Client)?;
//send wal segment
let segno = self.lsn.segment_number(WAL_SEGMENT_SIZE);
@@ -575,8 +658,16 @@ where
self.lsn,
)
.map_err(|e| anyhow!(e).context("Failed generating wal segment"))?;
ensure!(wal_seg.len() == WAL_SEGMENT_SIZE);
self.ar.append(&header, &wal_seg[..]).await?;
if wal_seg.len() != WAL_SEGMENT_SIZE {
return Err(BasebackupError::Server(anyhow!(
"wal_seg.len() != WAL_SEGMENT_SIZE, wal_seg.len()={}",
wal_seg.len()
)));
}
self.ar
.append(&header, &wal_seg[..])
.await
.map_err(BasebackupError::Client)?;
Ok(())
}
}

View File

@@ -48,6 +48,7 @@ use utils::{
use crate::auth::check_permission;
use crate::basebackup;
use crate::basebackup::BasebackupError;
use crate::config::PageServerConf;
use crate::context::{DownloadBehavior, RequestContext};
use crate::import_datadir::import_wal_from_tar;
@@ -1236,6 +1237,13 @@ impl PageServerHandler {
where
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
{
fn map_basebackup_error(err: BasebackupError) -> QueryError {
match err {
BasebackupError::Client(e) => QueryError::Disconnected(ConnectionError::Io(e)),
BasebackupError::Server(e) => QueryError::Other(e),
}
}
let started = std::time::Instant::now();
// check that the timeline exists
@@ -1261,7 +1269,8 @@ impl PageServerHandler {
let lsn_awaited_after = started.elapsed();
// switch client to COPYOUT
pgb.write_message_noflush(&BeMessage::CopyOutResponse)?;
pgb.write_message_noflush(&BeMessage::CopyOutResponse)
.map_err(QueryError::Disconnected)?;
self.flush_cancellable(pgb, &timeline.cancel).await?;
// Send a tarball of the latest layer on the timeline. Compress if not
@@ -1276,7 +1285,8 @@ impl PageServerHandler {
full_backup,
ctx,
)
.await?;
.await
.map_err(map_basebackup_error)?;
} else {
let mut writer = pgb.copyout_writer();
if gzip {
@@ -1297,9 +1307,13 @@ impl PageServerHandler {
full_backup,
ctx,
)
.await?;
.await
.map_err(map_basebackup_error)?;
// shutdown the encoder to ensure the gzip footer is written
encoder.shutdown().await?;
encoder
.shutdown()
.await
.map_err(|e| QueryError::Disconnected(ConnectionError::Io(e)))?;
} else {
basebackup::send_basebackup_tarball(
&mut writer,
@@ -1309,11 +1323,13 @@ impl PageServerHandler {
full_backup,
ctx,
)
.await?;
.await
.map_err(map_basebackup_error)?;
}
}
pgb.write_message_noflush(&BeMessage::CopyDone)?;
pgb.write_message_noflush(&BeMessage::CopyDone)
.map_err(QueryError::Disconnected)?;
self.flush_cancellable(pgb, &timeline.cancel).await?;
let basebackup_after = started