From 1ad6e186bcb72f096eebddf022e08f01cc99d1aa Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Tue, 27 Dec 2022 12:34:48 +0400 Subject: [PATCH] Refuse ProposerElected if it is going to truncate correct WAL. Prevents commit_lsn monotonicity violation (otherwise harmless). closes https://github.com/neondatabase/neon/issues/3069 --- safekeeper/src/safekeeper.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index 2c13f81476..a70ae247b7 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -727,6 +727,24 @@ where return Ok(None); } + // This might happen in a rare race when another (old) connection from + // the same walproposer writes + flushes WAL after this connection + // already sent flush_lsn in VoteRequest. It is generally safe to + // proceed, but to prevent commit_lsn surprisingly going down we should + // either refuse the session (simpler) or skip the part we already have + // from the stream (can be implemented). + if msg.term == self.get_epoch() && self.flush_lsn() > msg.start_streaming_at { + bail!("refusing ProposerElected which is going to overwrite correct WAL: term={}, flush_lsn={}, start_streaming_at={}; restarting the handshake should help", + msg.term, self.flush_lsn(), msg.start_streaming_at) + } + // Otherwise this shouldn't happen. + assert!( + msg.start_streaming_at >= self.inmem.commit_lsn, + "attempt to truncate committed data: start_streaming_at={}, commit_lsn={}", + msg.start_streaming_at, + self.inmem.commit_lsn + ); + // TODO: cross check divergence point, check if msg.start_streaming_at corresponds to // intersection of our history and history from msg