From 54aa31980597742f28b022bd0a729b97d910f0b7 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Sat, 30 Dec 2023 00:31:19 +0300 Subject: [PATCH] Don't split WAL record across two XLogData's when sending from safekeepers. As protocol demands. Not following this makes standby complain about corrupted WAL in various ways. https://neondb.slack.com/archives/C05L7D1JAUS/p1703774799114719 closes https://github.com/neondatabase/cloud/issues/9057 --- safekeeper/src/send_wal.rs | 22 +++++++++++++++------- safekeeper/src/wal_storage.rs | 3 +++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs index bd1d306968..9a5657a40d 100644 --- a/safekeeper/src/send_wal.rs +++ b/safekeeper/src/send_wal.rs @@ -529,12 +529,19 @@ impl WalSender<'_, IO> { ); // try to send as much as available, capped by MAX_SEND_SIZE - let mut send_size = self - .end_pos - .checked_sub(self.start_pos) - .context("reading wal without waiting for it first")? - .0 as usize; - send_size = min(send_size, self.send_buf.len()); + let mut chunk_end_pos = self.start_pos + MAX_SEND_SIZE as u64; + // if we went behind available WAL, back off + if chunk_end_pos >= self.end_pos { + chunk_end_pos = self.end_pos; + } else { + // If sending not up to end pos, round down to page boundary to + // avoid breaking WAL record not at page boundary, as protocol + // demands. See walsender.c (XLogSendPhysical). + chunk_end_pos = chunk_end_pos + .checked_sub(chunk_end_pos.block_offset()) + .unwrap(); + } + let send_size = (chunk_end_pos.0 - self.start_pos.0) as usize; let send_buf = &mut self.send_buf[..send_size]; let send_size: usize; { @@ -545,7 +552,8 @@ impl WalSender<'_, IO> { } else { None }; - // read wal into buffer + // Read WAL into buffer. send_size can be additionally capped to + // segment boundary here. send_size = self.wal_reader.read(send_buf).await? }; let send_buf = &send_buf[..send_size]; diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs index fa44b24258..e7538f805c 100644 --- a/safekeeper/src/wal_storage.rs +++ b/safekeeper/src/wal_storage.rs @@ -565,6 +565,9 @@ impl WalReader { }) } + /// Read WAL at current position into provided buf, returns number of bytes + /// read. It can be smaller than buf size only if segment boundary is + /// reached. pub async fn read(&mut self, buf: &mut [u8]) -> Result { // If this timeline is new, we may not have a full segment yet, so // we pad the first bytes of the timeline's first WAL segment with 0s