mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 14:02:55 +00:00
Set proper xl_prev in basebackup, when possible.
In a passing fix two minor issues with basabackup: * check that we can't create branches with pre-initdb LSN's * normalize branch LSN's that are pointing to the segment boundary patch by @knizhnik closes #506
This commit is contained in:
committed by
Stas Kelvich
parent
45c09c1cdd
commit
b227c63edf
@@ -47,28 +47,45 @@ impl<'a> Basebackup<'a> {
|
||||
timeline: &'a Arc<dyn Timeline>,
|
||||
req_lsn: Option<Lsn>,
|
||||
) -> Basebackup<'a> {
|
||||
// current_prev may be zero if we are at the start of timeline branched from old lsn
|
||||
let RecordLsn {
|
||||
last: lsn,
|
||||
prev: prev_record_lsn,
|
||||
} = if let Some(lsn) = req_lsn {
|
||||
// FIXME: that wouldn't work since we don't know prev for old LSN's.
|
||||
// Probably it is better to avoid using prev in compute node start
|
||||
// at all and accept the fact that first WAL record in the timeline would
|
||||
// have zero as prev. https://github.com/zenithdb/zenith/issues/506
|
||||
RecordLsn {
|
||||
last: lsn,
|
||||
prev: lsn,
|
||||
last: current_last,
|
||||
prev: current_prev,
|
||||
} = timeline.get_last_record_rlsn();
|
||||
|
||||
// Compute postgres doesn't have any previous WAL files, but the first record that this
|
||||
// postgres is going to write need to have LSN of previous record (xl_prev). So we are
|
||||
// writing prev_lsn to "zenith.signal" file so that postgres can read it during the start.
|
||||
// In some cases we don't know prev_lsn (branch or basebackup @old_lsn) so pass Lsn(0)
|
||||
// instead and embrace the wrong xl_prev in this situations.
|
||||
let (backup_prev, backup_lsn) = if let Some(req_lsn) = req_lsn {
|
||||
if req_lsn > current_last {
|
||||
// FIXME: now wait_lsn() is inside of list_nonrels() so we don't have a way
|
||||
// to get it from there. It is better to wait just here.
|
||||
(Lsn(0), req_lsn)
|
||||
} else if req_lsn < current_last {
|
||||
// we don't know prev already. We don't currently use basebackup@old_lsn
|
||||
// but may use it for read only replicas in future
|
||||
(Lsn(0), req_lsn)
|
||||
} else {
|
||||
// we are exactly at req_lsn and know prev
|
||||
(current_prev, req_lsn)
|
||||
}
|
||||
} else {
|
||||
// Atomically get last and prev LSN's
|
||||
timeline.get_last_record_rlsn()
|
||||
// None in req_lsn means that we are branching from the latest LSN
|
||||
(current_prev, current_last)
|
||||
};
|
||||
|
||||
info!(
|
||||
"taking basebackup lsn={}, prev_lsn={}",
|
||||
backup_prev, backup_lsn
|
||||
);
|
||||
|
||||
Basebackup {
|
||||
ar: Builder::new(write),
|
||||
timeline,
|
||||
lsn,
|
||||
prev_record_lsn,
|
||||
lsn: backup_lsn,
|
||||
prev_record_lsn: backup_prev,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -236,7 +253,7 @@ impl<'a> Basebackup<'a> {
|
||||
XLOG_SIZE_OF_XLOG_LONG_PHD as u32,
|
||||
pg_constants::WAL_SEGMENT_SIZE,
|
||||
);
|
||||
checkpoint.redo = self.lsn.0 + self.lsn.calc_padding(8u32);
|
||||
checkpoint.redo = normalize_lsn(self.lsn, pg_constants::WAL_SEGMENT_SIZE).0;
|
||||
|
||||
//reset some fields we don't want to preserve
|
||||
//TODO Check this.
|
||||
@@ -249,9 +266,14 @@ impl<'a> Basebackup<'a> {
|
||||
pg_control.state = pg_constants::DB_SHUTDOWNED;
|
||||
|
||||
// add zenith.signal file
|
||||
let xl_prev = if self.prev_record_lsn == Lsn(0) {
|
||||
0xBAD0 // magic value to indicate that we don't know prev_lsn
|
||||
} else {
|
||||
self.prev_record_lsn.0
|
||||
};
|
||||
self.ar.append(
|
||||
&new_tar_header("zenith.signal", 8)?,
|
||||
&self.prev_record_lsn.0.to_le_bytes()[..],
|
||||
&xl_prev.to_le_bytes()[..],
|
||||
)?;
|
||||
|
||||
//send pg_control
|
||||
|
||||
@@ -264,15 +264,22 @@ pub(crate) fn create_branch(
|
||||
}
|
||||
|
||||
let mut startpoint = parse_point_in_time(conf, startpoint_str, tenantid)?;
|
||||
|
||||
let timeline = repo.get_timeline(startpoint.timelineid)?;
|
||||
if startpoint.lsn == Lsn(0) {
|
||||
// Find end of WAL on the old timeline
|
||||
let end_of_wal = repo
|
||||
.get_timeline(startpoint.timelineid)?
|
||||
.get_last_record_lsn();
|
||||
let end_of_wal = timeline.get_last_record_lsn();
|
||||
info!("branching at end of WAL: {}", end_of_wal);
|
||||
startpoint.lsn = end_of_wal;
|
||||
}
|
||||
startpoint.lsn = startpoint.lsn.align();
|
||||
if timeline.get_start_lsn() > startpoint.lsn {
|
||||
anyhow::bail!(
|
||||
"invalid startpoint {} for the branch {}: less than timeline start {}",
|
||||
startpoint.lsn,
|
||||
branchname,
|
||||
timeline.get_start_lsn()
|
||||
);
|
||||
}
|
||||
|
||||
// create a new timeline directory for it
|
||||
let newtli = create_timeline(conf, Some(startpoint), tenantid)?;
|
||||
|
||||
@@ -150,12 +150,24 @@ impl Repository for LayeredRepository {
|
||||
fn branch_timeline(&self, src: ZTimelineId, dst: ZTimelineId, start_lsn: Lsn) -> Result<()> {
|
||||
let src_timeline = self.get_timeline(src)?;
|
||||
|
||||
let RecordLsn {
|
||||
last: src_last,
|
||||
prev: src_prev,
|
||||
} = src_timeline.get_last_record_rlsn();
|
||||
|
||||
// Use src_prev from the source timeline only if we branched at the last record.
|
||||
let dst_prev = if src_last == start_lsn {
|
||||
Some(src_prev)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Create the metadata file, noting the ancestor of the new timeline.
|
||||
// There is initially no data in it, but all the read-calls know to look
|
||||
// into the ancestor.
|
||||
let metadata = TimelineMetadata {
|
||||
disk_consistent_lsn: start_lsn,
|
||||
prev_record_lsn: Some(src_timeline.get_prev_record_lsn()), // FIXME not atomic with start_lsn
|
||||
prev_record_lsn: dst_prev,
|
||||
ancestor_timeline: Some(src),
|
||||
ancestor_lsn: start_lsn,
|
||||
};
|
||||
@@ -782,6 +794,14 @@ impl Timeline for LayeredTimeline {
|
||||
fn get_last_record_rlsn(&self) -> RecordLsn {
|
||||
self.last_record_lsn.load()
|
||||
}
|
||||
|
||||
fn get_start_lsn(&self) -> Lsn {
|
||||
if let Some(ancestor) = self.ancestor_timeline.as_ref() {
|
||||
ancestor.get_start_lsn()
|
||||
} else {
|
||||
self.ancestor_lsn
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LayeredTimeline {
|
||||
|
||||
@@ -146,6 +146,7 @@ pub trait Timeline: Send + Sync {
|
||||
/// Get last or prev record separately. Same as get_last_record_rlsn().last/prev.
|
||||
fn get_last_record_lsn(&self) -> Lsn;
|
||||
fn get_prev_record_lsn(&self) -> Lsn;
|
||||
fn get_start_lsn(&self) -> Lsn;
|
||||
|
||||
///
|
||||
/// Flush to disk all data that was written with the put_* functions
|
||||
|
||||
@@ -26,6 +26,7 @@ use std::fs::{self, File};
|
||||
use std::io::prelude::*;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::SystemTime;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
|
||||
pub const XLOG_FNAME_LEN: usize = 24;
|
||||
pub const XLOG_BLCKSZ: usize = 8192;
|
||||
@@ -89,6 +90,21 @@ pub fn IsPartialXLogFileName(fname: &str) -> bool {
|
||||
fname.ends_with(".partial") && IsXLogFileName(&fname[0..fname.len() - 8])
|
||||
}
|
||||
|
||||
/// If LSN points to the beginning of the page, then shift it to first record,
|
||||
/// otherwise align on 8-bytes boundary (required for WAL records)
|
||||
pub fn normalize_lsn(lsn: Lsn, seg_sz: usize) -> Lsn {
|
||||
if lsn.0 % XLOG_BLCKSZ as u64 == 0 {
|
||||
let hdr_size = if lsn.0 % seg_sz as u64 == 0 {
|
||||
XLOG_SIZE_OF_XLOG_LONG_PHD
|
||||
} else {
|
||||
XLOG_SIZE_OF_XLOG_SHORT_PHD
|
||||
};
|
||||
lsn + hdr_size as u64
|
||||
} else {
|
||||
lsn.align()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_current_timestamp() -> TimestampTz {
|
||||
const UNIX_EPOCH_JDATE: u64 = 2440588; /* == date2j(1970, 1, 1) */
|
||||
const POSTGRES_EPOCH_JDATE: u64 = 2451545; /* == date2j(2000, 1, 1) */
|
||||
@@ -416,7 +432,6 @@ mod tests {
|
||||
use super::*;
|
||||
use regex::Regex;
|
||||
use std::{env, process::Command, str::FromStr};
|
||||
use zenith_utils::lsn::Lsn;
|
||||
|
||||
// Run find_end_of_wal against file in test_wal dir
|
||||
// Ensure that it finds last record correctly
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import subprocess
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
|
||||
|
||||
@@ -74,3 +75,18 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
# All the rows are visible on the main branch
|
||||
main_cur.execute('SELECT count(*) FROM foo')
|
||||
assert main_cur.fetchone() == (200100, )
|
||||
|
||||
# Check bad lsn's for branching
|
||||
|
||||
# branch at segment boundary
|
||||
zenith_cli.run(["branch", "test_branch_segment_boundary", "test_branch_behind@0/3000000"])
|
||||
pg = postgres.create_start("test_branch_segment_boundary")
|
||||
cur = pg.connect().cursor()
|
||||
cur.execute('SELECT 1')
|
||||
assert cur.fetchone() == (1, )
|
||||
|
||||
# branch at pre-initdb lsn
|
||||
try:
|
||||
zenith_cli.run(["branch", "test_branch_preinitdb", "test_branch_behind@0/42"])
|
||||
except subprocess.CalledProcessError:
|
||||
print("Branch creation with pre-initdb LSN failed (as expected)")
|
||||
|
||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: 909c606355...3da8bd673b
Reference in New Issue
Block a user