mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-27 01:50:38 +00:00
pageserver - reject and backup future layer files
If a layer file is found with LSN after the disk_consistent_lsn, it is renamed (to avoid conflicts with new layer files) and a warning is logged.
This commit is contained in:
committed by
Patrick Insinger
parent
538c2a2a3e
commit
7095a5d551
@@ -223,6 +223,7 @@ impl LayeredRepository {
|
||||
Some(timeline) => Ok(timeline.clone()),
|
||||
None => {
|
||||
let metadata = Self::load_metadata(self.conf, timelineid, self.tenantid)?;
|
||||
let disk_consistent_lsn = metadata.disk_consistent_lsn;
|
||||
|
||||
// Recurse to look up the ancestor timeline.
|
||||
//
|
||||
@@ -247,7 +248,7 @@ impl LayeredRepository {
|
||||
)?;
|
||||
|
||||
// List the layers on disk, and load them into the layer map
|
||||
timeline.load_layer_map()?;
|
||||
timeline.load_layer_map(disk_consistent_lsn)?;
|
||||
|
||||
// needs to be after load_layer_map
|
||||
timeline.init_current_logical_size()?;
|
||||
@@ -1048,7 +1049,7 @@ impl LayeredTimeline {
|
||||
///
|
||||
/// Scan the timeline directory to populate the layer map
|
||||
///
|
||||
fn load_layer_map(&self) -> anyhow::Result<()> {
|
||||
fn load_layer_map(&self, disk_consistent_lsn: Lsn) -> anyhow::Result<()> {
|
||||
info!(
|
||||
"loading layer map for timeline {} into memory",
|
||||
self.timelineid
|
||||
@@ -1057,8 +1058,20 @@ impl LayeredTimeline {
|
||||
let (imgfilenames, mut deltafilenames) =
|
||||
filename::list_files(self.conf, self.timelineid, self.tenantid)?;
|
||||
|
||||
let timeline_path = self.conf.timeline_path(&self.timelineid, &self.tenantid);
|
||||
|
||||
// First create ImageLayer structs for each image file.
|
||||
for filename in imgfilenames.iter() {
|
||||
if filename.lsn > disk_consistent_lsn {
|
||||
warn!(
|
||||
"found future image layer {} on timeline {}",
|
||||
filename, self.timelineid
|
||||
);
|
||||
|
||||
rename_to_backup(timeline_path.join(filename.to_string()))?;
|
||||
continue;
|
||||
}
|
||||
|
||||
let layer = ImageLayer::new(self.conf, self.timelineid, self.tenantid, filename);
|
||||
|
||||
info!(
|
||||
@@ -1076,6 +1089,17 @@ impl LayeredTimeline {
|
||||
deltafilenames.sort();
|
||||
|
||||
for filename in deltafilenames.iter() {
|
||||
ensure!(filename.start_lsn < filename.end_lsn);
|
||||
if filename.end_lsn > disk_consistent_lsn {
|
||||
warn!(
|
||||
"found future delta layer {} on timeline {}",
|
||||
filename, self.timelineid
|
||||
);
|
||||
|
||||
rename_to_backup(timeline_path.join(filename.to_string()))?;
|
||||
continue;
|
||||
}
|
||||
|
||||
let predecessor = layers.get(&filename.seg, filename.start_lsn);
|
||||
|
||||
let predecessor_str: String = if let Some(prec) = &predecessor {
|
||||
@@ -1929,3 +1953,23 @@ fn layer_ptr_eq(l1: &dyn Layer, l2: &dyn Layer) -> bool {
|
||||
// see here for more https://github.com/rust-lang/rust/issues/46139
|
||||
std::ptr::eq(l1_ptr as *const (), l2_ptr as *const ())
|
||||
}
|
||||
|
||||
/// Add a suffix to a layer file's name: .{num}.old
|
||||
/// Uses the first available num (starts at 0)
|
||||
fn rename_to_backup(path: PathBuf) -> anyhow::Result<()> {
|
||||
let filename = path.file_name().unwrap().to_str().unwrap();
|
||||
let mut new_path = path.clone();
|
||||
|
||||
for i in 0u32.. {
|
||||
new_path.set_file_name(format!("{}.{}.old", filename, i));
|
||||
if !new_path.exists() {
|
||||
std::fs::rename(&path, &new_path)?;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"couldn't find an unused backup number for {:?}",
|
||||
path
|
||||
))
|
||||
}
|
||||
|
||||
@@ -290,7 +290,11 @@ pub fn list_files(
|
||||
deltafiles.push(deltafilename);
|
||||
} else if let Some(imgfilename) = ImageFileName::from_str(fname) {
|
||||
imgfiles.push(imgfilename);
|
||||
} else if fname == "wal" || fname == "metadata" || fname == "ancestor" {
|
||||
} else if fname == "wal"
|
||||
|| fname == "metadata"
|
||||
|| fname == "ancestor"
|
||||
|| fname.ends_with(".old")
|
||||
{
|
||||
// ignore these
|
||||
} else {
|
||||
warn!("unrecognized filename in timeline dir: {}", fname);
|
||||
|
||||
@@ -759,6 +759,70 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn future_layerfiles() -> Result<()> {
|
||||
const TEST_NAME: &str = "future_layerfiles";
|
||||
let repo = get_test_repo(TEST_NAME)?;
|
||||
|
||||
let timelineid = ZTimelineId::from_str("11223344556677881122334455667788").unwrap();
|
||||
repo.create_empty_timeline(timelineid)?;
|
||||
drop(repo);
|
||||
|
||||
let dir = PageServerConf::test_repo_dir(TEST_NAME);
|
||||
let mut read_dir = std::fs::read_dir(dir.join("tenants"))?;
|
||||
let tenant_dir = read_dir.next().unwrap().unwrap().path();
|
||||
assert!(tenant_dir.is_dir());
|
||||
let tenantid = tenant_dir.file_name().unwrap().to_str().unwrap();
|
||||
let tenantid = ZTenantId::from_str(tenantid)?;
|
||||
assert!(read_dir.next().is_none());
|
||||
|
||||
let timelines_path = tenant_dir.join("timelines").join(timelineid.to_string());
|
||||
|
||||
let make_empty_file = |filename: &str| -> std::io::Result<()> {
|
||||
let path = timelines_path.join(filename);
|
||||
|
||||
assert!(!path.exists());
|
||||
std::fs::write(&path, &[])?;
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
let image_filename = format!("pg_control_0_{:016X}", 8000);
|
||||
let delta_filename = format!("pg_control_0_{:016X}_{:016X}", 8000, 8008);
|
||||
|
||||
make_empty_file(&image_filename)?;
|
||||
make_empty_file(&delta_filename)?;
|
||||
|
||||
let new_repo = load_test_repo(TEST_NAME, tenantid)?;
|
||||
new_repo.get_timeline(timelineid).unwrap();
|
||||
drop(new_repo);
|
||||
|
||||
let check_old = |filename: &str, num: u32| {
|
||||
let path = timelines_path.join(filename);
|
||||
assert!(!path.exists());
|
||||
|
||||
let backup_path = timelines_path.join(format!("{}.{}.old", filename, num));
|
||||
assert!(backup_path.exists());
|
||||
};
|
||||
|
||||
check_old(&image_filename, 0);
|
||||
check_old(&delta_filename, 0);
|
||||
|
||||
make_empty_file(&image_filename)?;
|
||||
make_empty_file(&delta_filename)?;
|
||||
|
||||
let new_repo = load_test_repo(TEST_NAME, tenantid)?;
|
||||
new_repo.get_timeline(timelineid).unwrap();
|
||||
drop(new_repo);
|
||||
|
||||
check_old(&image_filename, 0);
|
||||
check_old(&delta_filename, 0);
|
||||
check_old(&image_filename, 1);
|
||||
check_old(&delta_filename, 1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Mock WAL redo manager that doesn't do much
|
||||
struct TestRedoManager {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user