safekeeper: remove local WAL files ignoring peer_horizon_lsn. (#8900)

If peer safekeeper needs garbage collected segment it will be fetched
now from s3 using on-demand WAL download. Reduces danger of running out of disk space when safekeeper fails.
This commit is contained in:
Arseny Sher
2024-10-04 19:07:39 +03:00
committed by GitHub
parent 2d248aea6f
commit eae4470bb6
2 changed files with 20 additions and 12 deletions

View File

@@ -2,21 +2,29 @@ use utils::lsn::Lsn;
use crate::timeline_manager::StateSnapshot;
/// Get oldest LSN we still need to keep. We hold WAL till it is consumed
/// by all of 1) pageserver (remote_consistent_lsn) 2) peers 3) s3
/// offloading.
/// While it is safe to use inmem values for determining horizon,
/// we use persistent to make possible normal states less surprising.
/// All segments covering LSNs before horizon_lsn can be removed.
/// Get oldest LSN we still need to keep.
///
/// We hold WAL till it is consumed by
/// 1) pageserver (remote_consistent_lsn)
/// 2) s3 offloading.
/// 3) Additionally we must store WAL since last local commit_lsn because
/// that's where we start looking for last WAL record on start.
///
/// If some peer safekeeper misses data it will fetch it from the remote
/// storage. While it is safe to use inmem values for determining horizon, we
/// use persistent to make possible normal states less surprising. All segments
/// covering LSNs before horizon_lsn can be removed.
pub(crate) fn calc_horizon_lsn(state: &StateSnapshot, extra_horizon_lsn: Option<Lsn>) -> Lsn {
use std::cmp::min;
let mut horizon_lsn = min(
state.cfile_remote_consistent_lsn,
state.cfile_peer_horizon_lsn,
);
let mut horizon_lsn = state.cfile_remote_consistent_lsn;
// we don't want to remove WAL that is not yet offloaded to s3
horizon_lsn = min(horizon_lsn, state.cfile_backup_lsn);
// Min by local commit_lsn to be able to begin reading WAL from somewhere on
// sk start. Technically we don't allow local commit_lsn to be higher than
// flush_lsn, but let's be double safe by including it as well.
horizon_lsn = min(horizon_lsn, state.cfile_commit_lsn);
horizon_lsn = min(horizon_lsn, state.flush_lsn);
if let Some(extra_horizon_lsn) = extra_horizon_lsn {
horizon_lsn = min(horizon_lsn, extra_horizon_lsn);
}

View File

@@ -47,7 +47,7 @@ pub(crate) struct StateSnapshot {
pub(crate) remote_consistent_lsn: Lsn,
// persistent control file values
pub(crate) cfile_peer_horizon_lsn: Lsn,
pub(crate) cfile_commit_lsn: Lsn,
pub(crate) cfile_remote_consistent_lsn: Lsn,
pub(crate) cfile_backup_lsn: Lsn,
@@ -70,7 +70,7 @@ impl StateSnapshot {
commit_lsn: state.inmem.commit_lsn,
backup_lsn: state.inmem.backup_lsn,
remote_consistent_lsn: state.inmem.remote_consistent_lsn,
cfile_peer_horizon_lsn: state.peer_horizon_lsn,
cfile_commit_lsn: state.commit_lsn,
cfile_remote_consistent_lsn: state.remote_consistent_lsn,
cfile_backup_lsn: state.backup_lsn,
flush_lsn: read_guard.sk.flush_lsn(),