safekeeper: fix endpoint restart immediately after xlog switch.

Check that truncation point is not from the future by comparing it with
write_record_lsn, not write_lsn, and explain that xlog switch changes
their normal order.

ref https://github.com/neondatabase/neon/issues/8911
This commit is contained in:
Arseny Sher
2024-09-06 15:23:57 +03:00
committed by Arseny Sher
parent cbcd4058ed
commit e287f36a05
3 changed files with 38 additions and 6 deletions

View File

@@ -938,8 +938,9 @@ where
}
trace!(
"processed AppendRequest of len {}, end_lsn={:?}, commit_lsn={:?}, truncate_lsn={:?}, flushed={:?}",
"processed AppendRequest of len {}, begin_lsn={}, end_lsn={:?}, commit_lsn={:?}, truncate_lsn={:?}, flushed={:?}",
msg.wal_data.len(),
msg.h.begin_lsn,
msg.h.end_lsn,
msg.h.commit_lsn,
msg.h.truncate_lsn,

View File

@@ -98,7 +98,19 @@ pub struct PhysicalStorage {
/// Also can be ahead of record_lsn, if happen to be in the middle of a WAL record.
write_lsn: Lsn,
/// The LSN of the last WAL record written to disk. Still can be not fully flushed.
/// The LSN of the last WAL record written to disk. Still can be not fully
/// flushed.
///
/// Note: Normally it (and flush_record_lsn) is <= write_lsn, but after xlog
/// switch ingest the reverse is true because we don't bump write_lsn up to
/// the next segment: WAL stream from the compute doesn't have the gap and
/// for simplicity / as a sanity check we disallow any non-sequential
/// writes, so write zeros as is.
///
/// Similar effect is in theory possible due to LSN alignment: if record
/// ends at *2, decoder will report end lsn as *8 even though we haven't
/// written these zeros yet. In practice compute likely never sends
/// non-aligned chunks of data.
write_record_lsn: Lsn,
/// The LSN of the last WAL record flushed to disk.
@@ -440,11 +452,12 @@ impl Storage for PhysicalStorage {
.with_label_values(&["truncate_wal"])
.start_timer();
// Streaming must not create a hole, so truncate cannot be called on non-written lsn
if self.write_lsn != Lsn(0) && end_pos > self.write_lsn {
// Streaming must not create a hole, so truncate cannot be called on
// non-written lsn.
if self.write_record_lsn != Lsn(0) && end_pos > self.write_record_lsn {
bail!(
"truncate_wal called on non-written WAL, write_lsn={}, end_pos={}",
self.write_lsn,
"truncate_wal called on non-written WAL, write_record_lsn={}, end_pos={}",
self.write_record_lsn,
end_pos
);
}

View File

@@ -1057,6 +1057,24 @@ def test_restart_endpoint(neon_env_builder: NeonEnvBuilder):
endpoint.start()
# Try restarting endpoint immediately after xlog switch.
# https://github.com/neondatabase/neon/issues/8911
def test_restart_endpoint_after_switch_wal(neon_env_builder: NeonEnvBuilder):
env = neon_env_builder.init_start()
endpoint = env.endpoints.create_start("main")
endpoint.safe_psql("create table t (i int)")
endpoint.safe_psql("SELECT pg_switch_wal()")
# we want immediate shutdown to have endpoint restart on xlog switch record,
# so prevent shutdown checkpoint.
endpoint.stop(mode="immediate")
endpoint = env.endpoints.create_start("main")
endpoint.safe_psql("SELECT 'works'")
# Context manager which logs passed time on exit.
class DurationLogger:
def __init__(self, desc):