Remove empty safekeeper init since truncateLsn.

It has caveats such as creating half empty segment which can't be
offloaded. Instead we'll pursue approach of pull_timeline, seeding new state
from some peer.
This commit is contained in:
Arseny Sher
2024-01-02 11:24:52 +03:00
committed by Arseny Sher
parent 17b256679b
commit 65b4e6e7d6
2 changed files with 6 additions and 30 deletions

View File

@@ -1091,34 +1091,10 @@ SendProposerElected(Safekeeper *sk)
{
/* safekeeper is empty or no common point, start from the beginning */
sk->startStreamingAt = wp->propTermHistory.entries[0].lsn;
if (sk->startStreamingAt < wp->truncateLsn)
{
/*
* There's a gap between the WAL starting point and a truncateLsn,
* which can't appear in a normal working cluster. That gap means
* that all safekeepers reported that they have persisted WAL up
* to the truncateLsn before, but now current safekeeper tells
* otherwise.
*
* Also we have a special condition here, which is empty
* safekeeper with no history. In combination with a gap, that can
* happen when we introduce a new safekeeper to the cluster. This
* is a rare case, which is triggered manually for now, and should
* be treated with care.
*/
/*
* truncateLsn will not change without ack from current
* safekeeper, and it's aligned to the WAL record, so we can
* safely start streaming from this point.
*/
sk->startStreamingAt = wp->truncateLsn;
wp_log(WARNING, "empty safekeeper joined cluster as %s:%s, historyStart=%X/%X, sk->startStreamingAt=%X/%X",
sk->host, sk->port, LSN_FORMAT_ARGS(wp->propTermHistory.entries[0].lsn),
LSN_FORMAT_ARGS(sk->startStreamingAt));
}
wp_log(LOG, "no common point with sk %s:%s, streaming since first term at %X/%X, timelineStartLsn=%X/%X, termHistory.n_entries=%u" ,
sk->host, sk->port, LSN_FORMAT_ARGS(sk->startStreamingAt), LSN_FORMAT_ARGS(wp->timelineStartLsn), wp->propTermHistory.n_entries);
/* wp->timelineStartLsn == InvalidXLogRecPtr can be only when timeline is created manually (test_s3_wal_replay) */
Assert(sk->startStreamingAt == wp->timelineStartLsn || wp->timelineStartLsn == InvalidXLogRecPtr);
}
else
{
@@ -1141,7 +1117,7 @@ SendProposerElected(Safekeeper *sk)
}
}
Assert(sk->startStreamingAt >= wp->truncateLsn && sk->startStreamingAt <= wp->availableLsn);
Assert(sk->startStreamingAt <= wp->availableLsn);
msg.tag = 'e';
msg.term = wp->propTerm;

View File

@@ -566,7 +566,7 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder):
f"Pageserver last_record_lsn={pageserver_lsn}; flush_lsn={last_lsn}; lag before replay is {lag / 1024}kb"
)
endpoint.stop_and_destroy()
endpoint.stop()
timeline_delete_wait_completed(ps_http, tenant_id, timeline_id)
# Also delete and manually create timeline on safekeepers -- this tests