diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index 53abd8bfb9..58b18dae7d 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -10,7 +10,7 @@ //! This module is responsible for creation of such tarball //! from data stored in object storage. //! -use anyhow::{anyhow, bail, ensure, Context}; +use anyhow::{anyhow, Context}; use bytes::{BufMut, Bytes, BytesMut}; use fail::fail_point; use pageserver_api::key::{key_to_slru_block, Key}; @@ -38,6 +38,14 @@ use postgres_ffi::PG_TLI; use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE}; use utils::lsn::Lsn; +#[derive(Debug, thiserror::Error)] +pub enum BasebackupError { + #[error("basebackup pageserver error {0:#}")] + Server(#[from] anyhow::Error), + #[error("basebackup client error {0:#}")] + Client(#[source] io::Error), +} + /// Create basebackup with non-rel data in it. /// Only include relational data if 'full_backup' is true. /// @@ -53,7 +61,7 @@ pub async fn send_basebackup_tarball<'a, W>( prev_lsn: Option, full_backup: bool, ctx: &'a RequestContext, -) -> anyhow::Result<()> +) -> Result<(), BasebackupError> where W: AsyncWrite + Send + Sync + Unpin, { @@ -92,8 +100,10 @@ where // Consolidate the derived and the provided prev_lsn values let prev_lsn = if let Some(provided_prev_lsn) = prev_lsn { - if backup_prev != Lsn(0) { - ensure!(backup_prev == provided_prev_lsn); + if backup_prev != Lsn(0) && backup_prev != provided_prev_lsn { + return Err(BasebackupError::Server(anyhow!( + "backup_prev {backup_prev} != provided_prev_lsn {provided_prev_lsn}" + ))); } provided_prev_lsn } else { @@ -159,15 +169,26 @@ where } } - async fn add_block(&mut self, key: &Key, block: Bytes) -> anyhow::Result<()> { + async fn add_block(&mut self, key: &Key, block: Bytes) -> Result<(), BasebackupError> { let (kind, segno, _) = key_to_slru_block(*key)?; match kind { SlruKind::Clog => { - ensure!(block.len() == BLCKSZ as usize || block.len() == BLCKSZ as usize + 8); + if !(block.len() == BLCKSZ as usize || block.len() == BLCKSZ as usize + 8) { + return Err(BasebackupError::Server(anyhow!( + "invalid SlruKind::Clog record: block.len()={}", + block.len() + ))); + } } SlruKind::MultiXactMembers | SlruKind::MultiXactOffsets => { - ensure!(block.len() == BLCKSZ as usize); + if block.len() != BLCKSZ as usize { + return Err(BasebackupError::Server(anyhow!( + "invalid {:?} record: block.len()={}", + kind, + block.len() + ))); + } } } @@ -194,12 +215,15 @@ where Ok(()) } - async fn flush(&mut self) -> anyhow::Result<()> { + async fn flush(&mut self) -> Result<(), BasebackupError> { let nblocks = self.buf.len() / BLCKSZ as usize; let (kind, segno) = self.current_segment.take().unwrap(); let segname = format!("{}/{:>04X}", kind.to_str(), segno); let header = new_tar_header(&segname, self.buf.len() as u64)?; - self.ar.append(&header, self.buf.as_slice()).await?; + self.ar + .append(&header, self.buf.as_slice()) + .await + .map_err(BasebackupError::Client)?; self.total_blocks += nblocks; debug!("Added to basebackup slru {} relsize {}", segname, nblocks); @@ -209,7 +233,7 @@ where Ok(()) } - async fn finish(mut self) -> anyhow::Result<()> { + async fn finish(mut self) -> Result<(), BasebackupError> { let res = if self.current_segment.is_none() || self.buf.is_empty() { Ok(()) } else { @@ -226,7 +250,7 @@ impl<'a, W> Basebackup<'a, W> where W: AsyncWrite + Send + Sync + Unpin, { - async fn send_tarball(mut self) -> anyhow::Result<()> { + async fn send_tarball(mut self) -> Result<(), BasebackupError> { // TODO include checksum let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup; @@ -262,7 +286,8 @@ where let slru_partitions = self .timeline .get_slru_keyspace(Version::Lsn(self.lsn), self.ctx) - .await? + .await + .map_err(|e| BasebackupError::Server(e.into()))? .partition( self.timeline.get_shard_identity(), Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64, @@ -271,10 +296,15 @@ where let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar); for part in slru_partitions.parts { - let blocks = self.timeline.get_vectored(part, self.lsn, self.ctx).await?; + let blocks = self + .timeline + .get_vectored(part, self.lsn, self.ctx) + .await + .map_err(|e| BasebackupError::Server(e.into()))?; for (key, block) in blocks { - slru_builder.add_block(&key, block?).await?; + let block = block.map_err(|e| BasebackupError::Server(e.into()))?; + slru_builder.add_block(&key, block).await?; } } slru_builder.finish().await?; @@ -282,8 +312,11 @@ where let mut min_restart_lsn: Lsn = Lsn::MAX; // Create tablespace directories - for ((spcnode, dbnode), has_relmap_file) in - self.timeline.list_dbdirs(self.lsn, self.ctx).await? + for ((spcnode, dbnode), has_relmap_file) in self + .timeline + .list_dbdirs(self.lsn, self.ctx) + .await + .map_err(|e| BasebackupError::Server(e.into()))? { self.add_dbdir(spcnode, dbnode, has_relmap_file).await?; @@ -292,7 +325,8 @@ where let rels = self .timeline .list_rels(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx) - .await?; + .await + .map_err(|e| BasebackupError::Server(e.into()))?; for &rel in rels.iter() { // Send init fork as main fork to provide well formed empty // contents of UNLOGGED relations. Postgres copies it in @@ -315,7 +349,12 @@ where } } - for (path, content) in self.timeline.list_aux_files(self.lsn, self.ctx).await? { + for (path, content) in self + .timeline + .list_aux_files(self.lsn, self.ctx) + .await + .map_err(|e| BasebackupError::Server(e.into()))? + { if path.starts_with("pg_replslot") { let offs = pg_constants::REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN; let restart_lsn = Lsn(u64::from_le_bytes( @@ -346,34 +385,41 @@ where for xid in self .timeline .list_twophase_files(self.lsn, self.ctx) - .await? + .await + .map_err(|e| BasebackupError::Server(e.into()))? { self.add_twophase_file(xid).await?; } fail_point!("basebackup-before-control-file", |_| { - bail!("failpoint basebackup-before-control-file") + Err(BasebackupError::Server(anyhow!( + "failpoint basebackup-before-control-file" + ))) }); // Generate pg_control and bootstrap WAL segment. self.add_pgcontrol_file().await?; - self.ar.finish().await?; + self.ar.finish().await.map_err(BasebackupError::Client)?; debug!("all tarred up!"); Ok(()) } /// Add contents of relfilenode `src`, naming it as `dst`. - async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> anyhow::Result<()> { + async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> Result<(), BasebackupError> { let nblocks = self .timeline .get_rel_size(src, Version::Lsn(self.lsn), self.ctx) - .await?; + .await + .map_err(|e| BasebackupError::Server(e.into()))?; // If the relation is empty, create an empty file if nblocks == 0 { let file_name = dst.to_segfile_name(0); let header = new_tar_header(&file_name, 0)?; - self.ar.append(&header, &mut io::empty()).await?; + self.ar + .append(&header, &mut io::empty()) + .await + .map_err(BasebackupError::Client)?; return Ok(()); } @@ -388,13 +434,17 @@ where let img = self .timeline .get_rel_page_at_lsn(src, blknum, Version::Lsn(self.lsn), self.ctx) - .await?; + .await + .map_err(|e| BasebackupError::Server(e.into()))?; segment_data.extend_from_slice(&img[..]); } let file_name = dst.to_segfile_name(seg as u32); let header = new_tar_header(&file_name, segment_data.len() as u64)?; - self.ar.append(&header, segment_data.as_slice()).await?; + self.ar + .append(&header, segment_data.as_slice()) + .await + .map_err(BasebackupError::Client)?; seg += 1; startblk = endblk; @@ -414,20 +464,22 @@ where spcnode: u32, dbnode: u32, has_relmap_file: bool, - ) -> anyhow::Result<()> { + ) -> Result<(), BasebackupError> { let relmap_img = if has_relmap_file { let img = self .timeline .get_relmap_file(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx) - .await?; + .await + .map_err(|e| BasebackupError::Server(e.into()))?; - ensure!( - img.len() - == dispatch_pgversion!( - self.timeline.pg_version, - pgv::bindings::SIZEOF_RELMAPFILE - ) - ); + if img.len() + != dispatch_pgversion!(self.timeline.pg_version, pgv::bindings::SIZEOF_RELMAPFILE) + { + return Err(BasebackupError::Server(anyhow!( + "img.len() != SIZE_OF_RELMAPFILE, img.len()={}", + img.len(), + ))); + } Some(img) } else { @@ -440,14 +492,20 @@ where ver => format!("{ver}\x0A"), }; let header = new_tar_header("PG_VERSION", pg_version_str.len() as u64)?; - self.ar.append(&header, pg_version_str.as_bytes()).await?; + self.ar + .append(&header, pg_version_str.as_bytes()) + .await + .map_err(BasebackupError::Client)?; info!("timeline.pg_version {}", self.timeline.pg_version); if let Some(img) = relmap_img { // filenode map for global tablespace let header = new_tar_header("global/pg_filenode.map", img.len() as u64)?; - self.ar.append(&header, &img[..]).await?; + self.ar + .append(&header, &img[..]) + .await + .map_err(BasebackupError::Client)?; } else { warn!("global/pg_filenode.map is missing"); } @@ -466,18 +524,26 @@ where && self .timeline .list_rels(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx) - .await? + .await + .map_err(|e| BasebackupError::Server(e.into()))? .is_empty() { return Ok(()); } // User defined tablespaces are not supported - ensure!(spcnode == DEFAULTTABLESPACE_OID); + if spcnode != DEFAULTTABLESPACE_OID { + return Err(BasebackupError::Server(anyhow!( + "spcnode != DEFAULTTABLESPACE_OID, spcnode={spcnode}" + ))); + } // Append dir path for each database let path = format!("base/{}", dbnode); let header = new_tar_header_dir(&path)?; - self.ar.append(&header, &mut io::empty()).await?; + self.ar + .append(&header, &mut io::empty()) + .await + .map_err(BasebackupError::Client)?; if let Some(img) = relmap_img { let dst_path = format!("base/{}/PG_VERSION", dbnode); @@ -487,11 +553,17 @@ where ver => format!("{ver}\x0A"), }; let header = new_tar_header(&dst_path, pg_version_str.len() as u64)?; - self.ar.append(&header, pg_version_str.as_bytes()).await?; + self.ar + .append(&header, pg_version_str.as_bytes()) + .await + .map_err(BasebackupError::Client)?; let relmap_path = format!("base/{}/pg_filenode.map", dbnode); let header = new_tar_header(&relmap_path, img.len() as u64)?; - self.ar.append(&header, &img[..]).await?; + self.ar + .append(&header, &img[..]) + .await + .map_err(BasebackupError::Client)?; } }; Ok(()) @@ -500,11 +572,12 @@ where // // Extract twophase state files // - async fn add_twophase_file(&mut self, xid: TransactionId) -> anyhow::Result<()> { + async fn add_twophase_file(&mut self, xid: TransactionId) -> Result<(), BasebackupError> { let img = self .timeline .get_twophase_file(xid, self.lsn, self.ctx) - .await?; + .await + .map_err(|e| BasebackupError::Server(e.into()))?; let mut buf = BytesMut::new(); buf.extend_from_slice(&img[..]); @@ -512,7 +585,10 @@ where buf.put_u32_le(crc); let path = format!("pg_twophase/{:>08X}", xid); let header = new_tar_header(&path, buf.len() as u64)?; - self.ar.append(&header, &buf[..]).await?; + self.ar + .append(&header, &buf[..]) + .await + .map_err(BasebackupError::Client)?; Ok(()) } @@ -521,24 +597,28 @@ where // Add generated pg_control file and bootstrap WAL segment. // Also send zenith.signal file with extra bootstrap data. // - async fn add_pgcontrol_file(&mut self) -> anyhow::Result<()> { + async fn add_pgcontrol_file(&mut self) -> Result<(), BasebackupError> { // add zenith.signal file let mut zenith_signal = String::new(); if self.prev_record_lsn == Lsn(0) { if self.lsn == self.timeline.get_ancestor_lsn() { - write!(zenith_signal, "PREV LSN: none")?; + write!(zenith_signal, "PREV LSN: none") + .map_err(|e| BasebackupError::Server(e.into()))?; } else { - write!(zenith_signal, "PREV LSN: invalid")?; + write!(zenith_signal, "PREV LSN: invalid") + .map_err(|e| BasebackupError::Server(e.into()))?; } } else { - write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)?; + write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn) + .map_err(|e| BasebackupError::Server(e.into()))?; } self.ar .append( &new_tar_header("zenith.signal", zenith_signal.len() as u64)?, zenith_signal.as_bytes(), ) - .await?; + .await + .map_err(BasebackupError::Client)?; let checkpoint_bytes = self .timeline @@ -560,7 +640,10 @@ where //send pg_control let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?; - self.ar.append(&header, &pg_control_bytes[..]).await?; + self.ar + .append(&header, &pg_control_bytes[..]) + .await + .map_err(BasebackupError::Client)?; //send wal segment let segno = self.lsn.segment_number(WAL_SEGMENT_SIZE); @@ -575,8 +658,16 @@ where self.lsn, ) .map_err(|e| anyhow!(e).context("Failed generating wal segment"))?; - ensure!(wal_seg.len() == WAL_SEGMENT_SIZE); - self.ar.append(&header, &wal_seg[..]).await?; + if wal_seg.len() != WAL_SEGMENT_SIZE { + return Err(BasebackupError::Server(anyhow!( + "wal_seg.len() != WAL_SEGMENT_SIZE, wal_seg.len()={}", + wal_seg.len() + ))); + } + self.ar + .append(&header, &wal_seg[..]) + .await + .map_err(BasebackupError::Client)?; Ok(()) } } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 96d2397c94..f6b251283c 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -48,6 +48,7 @@ use utils::{ use crate::auth::check_permission; use crate::basebackup; +use crate::basebackup::BasebackupError; use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; use crate::import_datadir::import_wal_from_tar; @@ -1236,6 +1237,13 @@ impl PageServerHandler { where IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, { + fn map_basebackup_error(err: BasebackupError) -> QueryError { + match err { + BasebackupError::Client(e) => QueryError::Disconnected(ConnectionError::Io(e)), + BasebackupError::Server(e) => QueryError::Other(e), + } + } + let started = std::time::Instant::now(); // check that the timeline exists @@ -1261,7 +1269,8 @@ impl PageServerHandler { let lsn_awaited_after = started.elapsed(); // switch client to COPYOUT - pgb.write_message_noflush(&BeMessage::CopyOutResponse)?; + pgb.write_message_noflush(&BeMessage::CopyOutResponse) + .map_err(QueryError::Disconnected)?; self.flush_cancellable(pgb, &timeline.cancel).await?; // Send a tarball of the latest layer on the timeline. Compress if not @@ -1276,7 +1285,8 @@ impl PageServerHandler { full_backup, ctx, ) - .await?; + .await + .map_err(map_basebackup_error)?; } else { let mut writer = pgb.copyout_writer(); if gzip { @@ -1297,9 +1307,13 @@ impl PageServerHandler { full_backup, ctx, ) - .await?; + .await + .map_err(map_basebackup_error)?; // shutdown the encoder to ensure the gzip footer is written - encoder.shutdown().await?; + encoder + .shutdown() + .await + .map_err(|e| QueryError::Disconnected(ConnectionError::Io(e)))?; } else { basebackup::send_basebackup_tarball( &mut writer, @@ -1309,11 +1323,13 @@ impl PageServerHandler { full_backup, ctx, ) - .await?; + .await + .map_err(map_basebackup_error)?; } } - pgb.write_message_noflush(&BeMessage::CopyDone)?; + pgb.write_message_noflush(&BeMessage::CopyDone) + .map_err(QueryError::Disconnected)?; self.flush_cancellable(pgb, &timeline.cancel).await?; let basebackup_after = started