mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-06 04:52:55 +00:00
Use last record LSN as flush position reported by safekeepers to walproposer to prevent moving VCL backward on compute node restart
This commit is contained in:
@@ -27,8 +27,8 @@ use crate::replication::HotStandbyFeedback;
|
||||
use crate::send_wal::SendWalHandler;
|
||||
use crate::timeline::{Timeline, TimelineTools};
|
||||
use crate::WalAcceptorConf;
|
||||
use postgres_ffi::xlog_utils::{TimeLineID, XLogFileName, MAX_SEND_SIZE, XLOG_BLCKSZ};
|
||||
use pageserver::waldecoder::WalStreamDecoder;
|
||||
use postgres_ffi::xlog_utils::{TimeLineID, XLogFileName, MAX_SEND_SIZE, XLOG_BLCKSZ};
|
||||
|
||||
pub const SK_MAGIC: u32 = 0xcafeceefu32;
|
||||
pub const SK_FORMAT_VERSION: u32 = 1;
|
||||
@@ -85,8 +85,6 @@ pub struct SafeKeeperInfo {
|
||||
pub commit_lsn: Lsn,
|
||||
/// locally flushed part of WAL
|
||||
pub flush_lsn: Lsn,
|
||||
/// locally flushed last record LSN
|
||||
pub safe_lsn: Lsn,
|
||||
/// minimal LSN which may be needed for recovery of some safekeeper: min(commit_lsn) for all safekeepers
|
||||
pub restart_lsn: Lsn,
|
||||
}
|
||||
@@ -113,7 +111,6 @@ impl SafeKeeperInfo {
|
||||
},
|
||||
commit_lsn: Lsn(0), /* part of WAL acknowledged by quorum */
|
||||
flush_lsn: Lsn(0), /* locally flushed part of WAL */
|
||||
safe_lsn: Lsn(0), /* locally flushed last record LSN */
|
||||
restart_lsn: Lsn(0), /* minimal LSN which may be needed for recovery of some safekeeper */
|
||||
}
|
||||
}
|
||||
@@ -139,7 +136,6 @@ struct SafeKeeperRequest {
|
||||
struct SafeKeeperResponse {
|
||||
epoch: u64,
|
||||
flush_lsn: Lsn,
|
||||
safe_lsn: Lsn,
|
||||
hs_feedback: HotStandbyFeedback,
|
||||
}
|
||||
|
||||
@@ -280,7 +276,6 @@ impl<'pg> ReceiveWalConn<'pg> {
|
||||
/* Calculate WAL end based on local data */
|
||||
let (flush_lsn, timeline_id) = this_timeline.find_end_of_wal(&swh.conf.data_dir, true);
|
||||
my_info.flush_lsn = flush_lsn;
|
||||
my_info.safe_lsn = flush_lsn; // end_of_wal actually returns last record LSN
|
||||
my_info.server.timeline = timeline_id;
|
||||
|
||||
info!(
|
||||
@@ -328,11 +323,14 @@ impl<'pg> ReceiveWalConn<'pg> {
|
||||
}
|
||||
|
||||
info!(
|
||||
"Start accepting WAL for timeline {} tenant {} address {:?} flush_lsn={} safe_lsn={}",
|
||||
server_info.timeline_id, server_info.tenant_id, self.peer_addr, my_info.flush_lsn, my_info.safe_lsn
|
||||
"Start accepting WAL for timeline {} tenant {} address {:?} flush_lsn={}",
|
||||
server_info.timeline_id,
|
||||
server_info.tenant_id,
|
||||
self.peer_addr,
|
||||
my_info.flush_lsn,
|
||||
);
|
||||
let mut last_rec_lsn = Lsn(0);
|
||||
let mut decoder = WalStreamDecoder::new(last_rec_lsn, false);
|
||||
let mut last_rec_lsn = Lsn(0);
|
||||
let mut decoder = WalStreamDecoder::new(last_rec_lsn, false);
|
||||
|
||||
// Main loop
|
||||
loop {
|
||||
@@ -355,47 +353,52 @@ impl<'pg> ReceiveWalConn<'pg> {
|
||||
let end_pos = req.end_lsn;
|
||||
let rec_size = end_pos.checked_sub(start_pos).unwrap().0 as usize;
|
||||
assert!(rec_size <= MAX_SEND_SIZE);
|
||||
if rec_size != 0 {
|
||||
debug!(
|
||||
"received for {} bytes between {} and {}",
|
||||
rec_size, start_pos, end_pos,
|
||||
);
|
||||
if rec_size != 0 {
|
||||
debug!(
|
||||
"received for {} bytes between {} and {}",
|
||||
rec_size, start_pos, end_pos,
|
||||
);
|
||||
|
||||
/* Receive message body (from the rest of the message) */
|
||||
let mut buf = Vec::with_capacity(rec_size);
|
||||
msg_reader.read_to_end(&mut buf)?;
|
||||
assert_eq!(buf.len(), rec_size);
|
||||
/* Receive message body (from the rest of the message) */
|
||||
let mut buf = Vec::with_capacity(rec_size);
|
||||
msg_reader.read_to_end(&mut buf)?;
|
||||
assert_eq!(buf.len(), rec_size);
|
||||
|
||||
if decoder.available() != start_pos {
|
||||
info!("Restart decoder from {} to {}", decoder.available(), start_pos);
|
||||
decoder = WalStreamDecoder::new(start_pos, false);
|
||||
}
|
||||
decoder.feed_bytes(&buf);
|
||||
//while let Ok(Some((lsn,_rec))) = decoder.poll_decode() {
|
||||
loop {
|
||||
match decoder.poll_decode() {
|
||||
Err(e) => info!("Decode error {}", e),
|
||||
Ok(None) => info!("Decode end"),
|
||||
Ok(Some((lsn,_rec))) => {
|
||||
last_rec_lsn = lsn;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
last_rec_lsn = Lsn((last_rec_lsn.0 + 7) & !7); // align record start position on 8
|
||||
info!("Receive WAL {}..{} last_rec_lsn={}", start_pos, end_pos, last_rec_lsn);
|
||||
if decoder.available() != start_pos {
|
||||
info!(
|
||||
"Restart decoder from {} to {}",
|
||||
decoder.available(),
|
||||
start_pos
|
||||
);
|
||||
decoder = WalStreamDecoder::new(start_pos, false);
|
||||
}
|
||||
decoder.feed_bytes(&buf);
|
||||
loop {
|
||||
match decoder.poll_decode() {
|
||||
Err(e) => info!("Decode error {}", e),
|
||||
Ok(None) => {},
|
||||
Ok(Some((lsn, _rec))) => {
|
||||
last_rec_lsn = lsn;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
info!(
|
||||
"Receive WAL {}..{} last_rec_lsn={}",
|
||||
start_pos, end_pos, last_rec_lsn
|
||||
);
|
||||
|
||||
/* Save message in file */
|
||||
Self::write_wal_file(
|
||||
swh,
|
||||
start_pos,
|
||||
timeline_id,
|
||||
this_timeline.get(),
|
||||
wal_seg_size,
|
||||
&buf,
|
||||
)?;
|
||||
}
|
||||
/* Save message in file */
|
||||
Self::write_wal_file(
|
||||
swh,
|
||||
start_pos,
|
||||
timeline_id,
|
||||
this_timeline.get(),
|
||||
wal_seg_size,
|
||||
&buf,
|
||||
)?;
|
||||
}
|
||||
my_info.restart_lsn = req.restart_lsn;
|
||||
my_info.commit_lsn = req.commit_lsn;
|
||||
|
||||
@@ -410,11 +413,8 @@ impl<'pg> ReceiveWalConn<'pg> {
|
||||
my_info.epoch = prop.epoch; /* bump epoch */
|
||||
sync_control_file = true;
|
||||
}
|
||||
if end_pos > my_info.flush_lsn {
|
||||
my_info.flush_lsn = end_pos;
|
||||
}
|
||||
if last_rec_lsn > my_info.safe_lsn {
|
||||
my_info.safe_lsn = last_rec_lsn;
|
||||
if last_rec_lsn > my_info.flush_lsn {
|
||||
my_info.flush_lsn = last_rec_lsn;
|
||||
}
|
||||
/*
|
||||
* Update restart LSN in control file.
|
||||
@@ -434,7 +434,6 @@ impl<'pg> ReceiveWalConn<'pg> {
|
||||
let resp = SafeKeeperResponse {
|
||||
epoch: my_info.epoch,
|
||||
flush_lsn: my_info.flush_lsn,
|
||||
safe_lsn: my_info.safe_lsn,
|
||||
hs_feedback: this_timeline.get().get_hs_feedback(),
|
||||
};
|
||||
self.write_msg(&resp)?;
|
||||
@@ -443,10 +442,15 @@ impl<'pg> ReceiveWalConn<'pg> {
|
||||
* Ping wal sender that new data is available.
|
||||
* FlushLSN (end_pos) can be smaller than commitLSN in case we are at catching-up safekeeper.
|
||||
*/
|
||||
info!("Notify WAL senders min({}, {})={}", req.commit_lsn, my_info.safe_lsn, min(req.commit_lsn, my_info.safe_lsn));
|
||||
trace!(
|
||||
"Notify WAL senders min({}, {})={}",
|
||||
req.commit_lsn,
|
||||
my_info.flush_lsn,
|
||||
min(req.commit_lsn, my_info.flush_lsn)
|
||||
);
|
||||
this_timeline
|
||||
.get()
|
||||
.notify_wal_senders(min(req.commit_lsn, my_info.safe_lsn));
|
||||
.notify_wal_senders(min(req.commit_lsn, my_info.flush_lsn));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -217,7 +217,12 @@ impl ReplicationConn {
|
||||
data: &file_buf,
|
||||
}))?;
|
||||
|
||||
info!("Sent WAL to page server {}..{}, end_pos={}", start_pos, start_pos + send_size as u64, end_pos);
|
||||
debug!(
|
||||
"Sent WAL to page server {}..{}, end_pos={}",
|
||||
start_pos,
|
||||
start_pos + send_size as u64,
|
||||
end_pos
|
||||
);
|
||||
|
||||
start_pos += send_size as u64;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user