From a092127b17770502cb3896ba80fb966165f23507 Mon Sep 17 00:00:00 2001 From: Arthur Petukhovsky Date: Thu, 18 Jan 2024 21:55:24 +0300 Subject: [PATCH] Fix truncateLsn initialization (#6396) In https://github.com/neondatabase/neon/commit/7f828890cf602d8f99fc4e772b94a6230a34db17 we changed the logic for persisting control_files. Previously it was updated if `peer_horizon_lsn` jumped more than one segment, which made `peer_horizon_lsn` initialized on disk as soon as safekeeper has received a first `AppendRequest`. This caused an issue with `truncateLsn`, which now can be zero sometimes. This PR fixes it, and now `truncateLsn/peer_horizon_lsn` can never be zero once we know `timeline_start_lsn`. Closes https://github.com/neondatabase/neon/issues/6248 --- pgxn/neon/walproposer.c | 13 +++++++------ safekeeper/src/safekeeper.rs | 5 +++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c index 1f7c473e7d..171af7d2aa 100644 --- a/pgxn/neon/walproposer.c +++ b/pgxn/neon/walproposer.c @@ -959,8 +959,8 @@ DetermineEpochStartLsn(WalProposer *wp) } /* - * If propEpochStartLsn is 0 everywhere, we are bootstrapping -- nothing - * was committed yet. Start streaming then from the basebackup LSN. + * If propEpochStartLsn is 0, it means flushLsn is 0 everywhere, we are bootstrapping + * and nothing was committed yet. Start streaming then from the basebackup LSN. */ if (wp->propEpochStartLsn == InvalidXLogRecPtr && !wp->config->syncSafekeepers) { @@ -973,12 +973,13 @@ DetermineEpochStartLsn(WalProposer *wp) } /* - * If propEpochStartLsn is not 0, at least one msg with WAL was sent to - * some connected safekeeper; it must have carried truncateLsn pointing to - * the first record. + * Safekeepers are setting truncateLsn after timelineStartLsn is known, so it + * should never be zero at this point, if we know timelineStartLsn. + * + * timelineStartLsn can be zero only on the first syncSafekeepers run. */ Assert((wp->truncateLsn != InvalidXLogRecPtr) || - (wp->config->syncSafekeepers && wp->truncateLsn == wp->propEpochStartLsn)); + (wp->config->syncSafekeepers && wp->truncateLsn == wp->timelineStartLsn)); /* * We will be generating WAL since propEpochStartLsn, so we should set diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index f63e8576ad..d66db9b652 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -742,6 +742,11 @@ where state.timeline_start_lsn ); } + if state.peer_horizon_lsn == Lsn(0) { + // Update peer_horizon_lsn as soon as we know where timeline starts. + // It means that peer_horizon_lsn cannot be zero after we know timeline_start_lsn. + state.peer_horizon_lsn = msg.timeline_start_lsn; + } if state.local_start_lsn == Lsn(0) { state.local_start_lsn = msg.start_streaming_at; info!("setting local_start_lsn to {:?}", state.local_start_lsn);