From 5ecf0664ccc01abd5130c26aabdeb2d2a481964a Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 29 Nov 2021 14:35:24 +0200 Subject: [PATCH] Fix off-by-one error in check for future delta layers. This doesnt show up at the moment, because we never create a delta layer with end-LSN equal to the last LSN. We always create an image layer at that LSN instead. For example, if the latest processed LSN is 100, we would create a delta layer with end LSN 100 (exclusive), and an image layer at 100. But that's just how InMemoryLayer::write_to_disk happens to work at the moment, there's no fundamental reason it needs to always create that image layer. I noticed this bug when I tried to change the logic in InMemoryLayer::write_to_disk to only create an image layer after a few delta layers. --- pageserver/src/layered_repository.rs | 7 +- pageserver/src/repository.rs | 98 +++++++++++++++++++++------- 2 files changed, 81 insertions(+), 24 deletions(-) diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index 7ef63d2f25..0173c3ca5a 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -973,7 +973,12 @@ impl LayeredTimeline { for filename in &deltafilenames { ensure!(filename.start_lsn < filename.end_lsn); - if filename.end_lsn > disk_consistent_lsn { + // The end-LSN is exclusive, while disk_consistent_lsn is + // inclusive. For example, if disk_consistent_lsn is 100, it is + // OK for a delta layer to have end LSN 101, but if the end LSN + // is 102, then it might not have been fully flushed to disk + // before crash. + if filename.end_lsn > disk_consistent_lsn + 1 { warn!( "found future delta layer {} on timeline {}", filename, self.timelineid diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index b9596d376d..7057dc9b3b 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -980,7 +980,12 @@ mod tests { let harness = RepoHarness::create(TEST_NAME)?; let repo = harness.load(); - repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?; + // Create a timeline with disk_consistent_lsn = 8000 + let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0x8000))?; + let writer = tline.writer(); + writer.advance_last_record_lsn(Lsn(0x8000)); + drop(writer); + repo.checkpoint_iteration(CheckpointConfig::Forced)?; drop(repo); let timeline_path = harness.timeline_path(&TIMELINE_ID); @@ -994,38 +999,85 @@ mod tests { Ok(()) }; - let image_filename = format!("pg_control_0_{:016X}", 8000); - let delta_filename = format!("pg_control_0_{:016X}_{:016X}", 8000, 8008); - - make_empty_file(&image_filename)?; - make_empty_file(&delta_filename)?; - - let new_repo = harness.load(); - new_repo.get_timeline(TIMELINE_ID).unwrap(); - drop(new_repo); - - let check_old = |filename: &str, num: u32| { + // Helper function to check that a relation file exists, and a corresponding + // .0.old file does not. + let assert_exists = |filename: &str| { let path = timeline_path.join(filename); - assert!(!path.exists()); + assert!(path.exists(), "file {} was removed", filename); - let backup_path = timeline_path.join(format!("{}.{}.old", filename, num)); - assert!(backup_path.exists()); + // Check that there is no .old file + let backup_path = timeline_path.join(format!("{}.0.old", filename)); + assert!( + !backup_path.exists(), + "unexpected backup file {}", + backup_path.display() + ); }; - check_old(&image_filename, 0); - check_old(&delta_filename, 0); + // Helper function to check that a relation file does *not* exists, and a corresponding + // ..old file does. + let assert_is_renamed = |filename: &str, num: u32| { + let path = timeline_path.join(filename); + assert!( + !path.exists(), + "file {} was not removed as expected", + filename + ); - make_empty_file(&image_filename)?; - make_empty_file(&delta_filename)?; + let backup_path = timeline_path.join(format!("{}.{}.old", filename, num)); + assert!( + backup_path.exists(), + "backup file {} was not created", + backup_path.display() + ); + }; + + // These files are considered to be in the future and will be renamed out + // of the way + let future_filenames = vec![ + format!("pg_control_0_{:016X}", 0x8001), + format!("pg_control_0_{:016X}_{:016X}", 0x8001, 0x8008), + ]; + // But these are not: + let past_filenames = vec![ + format!("pg_control_0_{:016X}", 0x8000), + format!("pg_control_0_{:016X}_{:016X}", 0x7000, 0x8001), + ]; + + for filename in future_filenames.iter().chain(past_filenames.iter()) { + make_empty_file(filename)?; + } + + // Load the timeline. This will cause the files in the "future" to be renamed + // away. + let new_repo = harness.load(); + new_repo.get_timeline(TIMELINE_ID).unwrap(); + drop(new_repo); + + for filename in future_filenames.iter() { + assert_is_renamed(filename, 0); + } + for filename in past_filenames.iter() { + assert_exists(filename); + } + + // Create the future files again, and load again. They should be renamed to + // *.1.old this time. + for filename in future_filenames.iter() { + make_empty_file(filename)?; + } let new_repo = harness.load(); new_repo.get_timeline(TIMELINE_ID).unwrap(); drop(new_repo); - check_old(&image_filename, 0); - check_old(&delta_filename, 0); - check_old(&image_filename, 1); - check_old(&delta_filename, 1); + for filename in future_filenames.iter() { + assert_is_renamed(filename, 0); + assert_is_renamed(filename, 1); + } + for filename in past_filenames.iter() { + assert_exists(filename); + } Ok(()) }