Clarify the meaning of StandbyReply LSNs:

write_lsn - The last LSN received and processed by pageserver's walreceiver.
flush_lsn - same as write_lsn. At pageserver it doesn't guarantees data persistence, but it's fine. We rely on safekeepers.
apply_lsn - The LSN at which pageserver guaranteed persistence of all received data (disk_consistent_lsn).
This commit is contained in:
anastasia
2021-11-10 00:12:08 +03:00
committed by lubennikovaav
parent 5bad2deff8
commit c7f3b4e62c
3 changed files with 22 additions and 12 deletions

View File

@@ -321,14 +321,24 @@ fn walreceiver_main(
};
if let Some(last_lsn) = status_update {
// TODO: More thought should go into what values are sent here.
let last_lsn = PgLsn::from(u64::from(last_lsn));
// We are using disk consistent LSN as `write_lsn`, i.e. LSN at which page server
// may guarantee persistence of all received data. Safekeeper is not free to remove
// WAL preceding `write_lsn`: it should not be requested by this page server.
let write_lsn = PgLsn::from(u64::from(timeline.get_disk_consistent_lsn()));
let flush_lsn = last_lsn;
let apply_lsn = PgLsn::from(0);
// The last LSN we processed. It is not guaranteed to survive pageserver crash.
let write_lsn = last_lsn;
// This value doesn't guarantee data durability, but it's ok.
// In setup with WAL service, pageserver durability is guaranteed by safekeepers.
// In setup without WAL service, we just don't care.
let flush_lsn = write_lsn;
// `disk_consistent_lsn` is the LSN at which page server guarantees persistence of all received data
// Depending on the setup we recieve WAL directly from Compute Node or
// from a WAL service.
//
// Senders use the feedback to determine if we are caught up:
// - Safekeepers are free to remove WAL preceding `apply_lsn`,
// as it will never be requested by this page server.
// - Compute Node uses 'apply_lsn' to calculate a lag for back pressure mechanism
// (delay WAL inserts to avoid lagging pageserver responses and WAL overflow).
let apply_lsn = PgLsn::from(u64::from(timeline.get_disk_consistent_lsn()));
let ts = SystemTime::now();
const NO_REPLY: u8 = 0;
physical_stream.standby_status_update(write_lsn, flush_lsn, apply_lsn, ts, NO_REPLY)?;

View File

@@ -55,9 +55,9 @@ impl HotStandbyFeedback {
/// Standby status update
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StandbyReply {
pub write_lsn: Lsn, // disk consistent lSN
pub flush_lsn: Lsn, // LSN committedby quorum
pub apply_lsn: Lsn, // not used
pub write_lsn: Lsn, // not used
pub flush_lsn: Lsn, // not used
pub apply_lsn: Lsn, // pageserver's disk consistent lSN
pub reply_ts: TimestampTz,
pub reply_requested: bool,
}
@@ -115,7 +115,7 @@ impl ReplicationConn {
Some(STANDBY_STATUS_UPDATE_TAG_BYTE) => {
let reply = StandbyReply::des(&m[1..])
.context("failed to deserialize StandbyReply")?;
state.disk_consistent_lsn = reply.write_lsn;
state.disk_consistent_lsn = reply.apply_lsn;
timeline.update_replica_state(replica, Some(state));
}
_ => warn!("unexpected message {:?}", msg),