diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index dbe0034de2..b3e006ab05 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -938,8 +938,9 @@ where } trace!( - "processed AppendRequest of len {}, end_lsn={:?}, commit_lsn={:?}, truncate_lsn={:?}, flushed={:?}", + "processed AppendRequest of len {}, begin_lsn={}, end_lsn={:?}, commit_lsn={:?}, truncate_lsn={:?}, flushed={:?}", msg.wal_data.len(), + msg.h.begin_lsn, msg.h.end_lsn, msg.h.commit_lsn, msg.h.truncate_lsn, diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs index 89c2e98a94..c477fe5c7b 100644 --- a/safekeeper/src/wal_storage.rs +++ b/safekeeper/src/wal_storage.rs @@ -98,7 +98,19 @@ pub struct PhysicalStorage { /// Also can be ahead of record_lsn, if happen to be in the middle of a WAL record. write_lsn: Lsn, - /// The LSN of the last WAL record written to disk. Still can be not fully flushed. + /// The LSN of the last WAL record written to disk. Still can be not fully + /// flushed. + /// + /// Note: Normally it (and flush_record_lsn) is <= write_lsn, but after xlog + /// switch ingest the reverse is true because we don't bump write_lsn up to + /// the next segment: WAL stream from the compute doesn't have the gap and + /// for simplicity / as a sanity check we disallow any non-sequential + /// writes, so write zeros as is. + /// + /// Similar effect is in theory possible due to LSN alignment: if record + /// ends at *2, decoder will report end lsn as *8 even though we haven't + /// written these zeros yet. In practice compute likely never sends + /// non-aligned chunks of data. write_record_lsn: Lsn, /// The LSN of the last WAL record flushed to disk. @@ -440,11 +452,12 @@ impl Storage for PhysicalStorage { .with_label_values(&["truncate_wal"]) .start_timer(); - // Streaming must not create a hole, so truncate cannot be called on non-written lsn - if self.write_lsn != Lsn(0) && end_pos > self.write_lsn { + // Streaming must not create a hole, so truncate cannot be called on + // non-written lsn. + if self.write_record_lsn != Lsn(0) && end_pos > self.write_record_lsn { bail!( - "truncate_wal called on non-written WAL, write_lsn={}, end_pos={}", - self.write_lsn, + "truncate_wal called on non-written WAL, write_record_lsn={}, end_pos={}", + self.write_record_lsn, end_pos ); } diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 3785651aed..5672e836ee 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -1057,6 +1057,24 @@ def test_restart_endpoint(neon_env_builder: NeonEnvBuilder): endpoint.start() +# Try restarting endpoint immediately after xlog switch. +# https://github.com/neondatabase/neon/issues/8911 +def test_restart_endpoint_after_switch_wal(neon_env_builder: NeonEnvBuilder): + env = neon_env_builder.init_start() + + endpoint = env.endpoints.create_start("main") + + endpoint.safe_psql("create table t (i int)") + + endpoint.safe_psql("SELECT pg_switch_wal()") + + # we want immediate shutdown to have endpoint restart on xlog switch record, + # so prevent shutdown checkpoint. + endpoint.stop(mode="immediate") + endpoint = env.endpoints.create_start("main") + endpoint.safe_psql("SELECT 'works'") + + # Context manager which logs passed time on exit. class DurationLogger: def __init__(self, desc):