From a2df9d650e82ffe524e4de915f566e399e8f7e40 Mon Sep 17 00:00:00 2001 From: Anastasia Lubennikova Date: Wed, 14 Sep 2022 18:34:30 +0300 Subject: [PATCH] Handle backwards-compatibility of TimelineMetadata. This commit bumps TimelineMetadata format version and makes it independent from STORAGE_FORMAT_VERSION. --- pageserver/src/lib.rs | 6 +- pageserver/src/tenant/metadata.rs | 161 ++++++++++++++++++++++++++---- 2 files changed, 148 insertions(+), 19 deletions(-) diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index 0bd5e242d3..7937f72de7 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -31,9 +31,11 @@ use crate::task_mgr::TaskKind; /// Current storage format version /// -/// This is embedded in the metadata file, and also in the header of all the -/// layer files. If you make any backwards-incompatible changes to the storage +/// This is embedded in the header of all the layer files. +/// If you make any backwards-incompatible changes to the storage /// format, bump this! +/// Note that TimelineMetadata uses its own version number to track +/// backwards-compatible changes to the metadata format. pub const STORAGE_FORMAT_VERSION: u16 = 3; pub const DEFAULT_PG_VERSION: u32 = 14; diff --git a/pageserver/src/tenant/metadata.rs b/pageserver/src/tenant/metadata.rs index 41790b4d11..6d18153b4c 100644 --- a/pageserver/src/tenant/metadata.rs +++ b/pageserver/src/tenant/metadata.rs @@ -20,7 +20,12 @@ use utils::{ use crate::config::PageServerConf; use crate::virtual_file::VirtualFile; -use crate::STORAGE_FORMAT_VERSION; + +/// Use special format number to enable backward compatibility. +const METADATA_FORMAT_VERSION: u16 = 4; + +/// Previous supported format versions. +const METADATA_OLD_FORMAT_VERSION: u16 = 3; /// We assume that a write of up to METADATA_MAX_SIZE bytes is atomic. /// @@ -34,19 +39,19 @@ const METADATA_MAX_SIZE: usize = 512; #[derive(Debug, Clone, PartialEq, Eq)] pub struct TimelineMetadata { hdr: TimelineMetadataHeader, - body: TimelineMetadataBody, + body: TimelineMetadataBodyV2, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] struct TimelineMetadataHeader { checksum: u32, // CRC of serialized metadata body size: u16, // size of serialized metadata - format_version: u16, // storage format version (used for compatibility checks) + format_version: u16, // metadata format version (used for compatibility checks) } const METADATA_HDR_SIZE: usize = std::mem::size_of::(); #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -struct TimelineMetadataBody { +struct TimelineMetadataBodyV2 { disk_consistent_lsn: Lsn, // This is only set if we know it. We track it in memory when the page // server is running, but we only track the value corresponding to @@ -66,6 +71,26 @@ struct TimelineMetadataBody { pg_version: u32, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct TimelineMetadataBodyV1 { + disk_consistent_lsn: Lsn, + // This is only set if we know it. We track it in memory when the page + // server is running, but we only track the value corresponding to + // 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a + // lot. We only store it in the metadata file when we flush *all* the + // in-memory data so that 'last_record_lsn' is the same as + // 'disk_consistent_lsn'. That's OK, because after page server restart, as + // soon as we reprocess at least one record, we will have a valid + // 'prev_record_lsn' value in memory again. This is only really needed when + // doing a clean shutdown, so that there is no more WAL beyond + // 'disk_consistent_lsn' + prev_record_lsn: Option, + ancestor_timeline: Option, + ancestor_lsn: Lsn, + latest_gc_cutoff_lsn: Lsn, + initdb_lsn: Lsn, +} + impl TimelineMetadata { pub fn new( disk_consistent_lsn: Lsn, @@ -80,9 +105,9 @@ impl TimelineMetadata { hdr: TimelineMetadataHeader { checksum: 0, size: 0, - format_version: STORAGE_FORMAT_VERSION, + format_version: METADATA_FORMAT_VERSION, }, - body: TimelineMetadataBody { + body: TimelineMetadataBodyV2 { disk_consistent_lsn, prev_record_lsn, ancestor_timeline, @@ -94,16 +119,43 @@ impl TimelineMetadata { } } + fn upgrade_timeline_metadata(metadata_bytes: &[u8]) -> anyhow::Result { + let mut hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?; + + // backward compatible only up to this version + ensure!( + hdr.format_version == METADATA_OLD_FORMAT_VERSION, + "unsupported metadata format version {}", + hdr.format_version + ); + + let metadata_size = hdr.size as usize; + + let body: TimelineMetadataBodyV1 = + TimelineMetadataBodyV1::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?; + + let body = TimelineMetadataBodyV2 { + disk_consistent_lsn: body.disk_consistent_lsn, + prev_record_lsn: body.prev_record_lsn, + ancestor_timeline: body.ancestor_timeline, + ancestor_lsn: body.ancestor_lsn, + latest_gc_cutoff_lsn: body.latest_gc_cutoff_lsn, + initdb_lsn: body.initdb_lsn, + pg_version: 14, // All timelines created before this version had pg_version 14 + }; + + hdr.format_version = METADATA_FORMAT_VERSION; + + Ok(Self { hdr, body }) + } + pub fn from_bytes(metadata_bytes: &[u8]) -> anyhow::Result { ensure!( metadata_bytes.len() == METADATA_MAX_SIZE, "metadata bytes size is wrong" ); let hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?; - ensure!( - hdr.format_version == STORAGE_FORMAT_VERSION, - "format version mismatch" - ); + let metadata_size = hdr.size as usize; ensure!( metadata_size <= METADATA_MAX_SIZE, @@ -114,13 +166,20 @@ impl TimelineMetadata { hdr.checksum == calculated_checksum, "metadata checksum mismatch" ); - let body = TimelineMetadataBody::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?; - ensure!( - body.disk_consistent_lsn.is_aligned(), - "disk_consistent_lsn is not aligned" - ); - Ok(TimelineMetadata { hdr, body }) + if hdr.format_version != METADATA_FORMAT_VERSION { + // If metadata has the old format, + // upgrade it and return the result + TimelineMetadata::upgrade_timeline_metadata(&metadata_bytes) + } else { + let body = + TimelineMetadataBodyV2::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?; + ensure!( + body.disk_consistent_lsn.is_aligned(), + "disk_consistent_lsn is not aligned" + ); + Ok(TimelineMetadata { hdr, body }) + } } pub fn to_bytes(&self) -> anyhow::Result> { @@ -128,7 +187,7 @@ impl TimelineMetadata { let metadata_size = METADATA_HDR_SIZE + body_bytes.len(); let hdr = TimelineMetadataHeader { size: metadata_size as u16, - format_version: STORAGE_FORMAT_VERSION, + format_version: METADATA_FORMAT_VERSION, checksum: crc32c::crc32c(&body_bytes), }; let hdr_bytes = hdr.ser()?; @@ -235,4 +294,72 @@ mod tests { "Metadata that was serialized to bytes and deserialized back should not change" ); } + + // Generate old version metadata and read it with current code. + // Ensure that it is upgraded correctly + #[test] + fn test_metadata_upgrade() { + #[derive(Debug, Clone, PartialEq, Eq)] + struct TimelineMetadataV1 { + hdr: TimelineMetadataHeader, + body: TimelineMetadataBodyV1, + } + + let metadata_v1 = TimelineMetadataV1 { + hdr: TimelineMetadataHeader { + checksum: 0, + size: 0, + format_version: METADATA_OLD_FORMAT_VERSION, + }, + body: TimelineMetadataBodyV1 { + disk_consistent_lsn: Lsn(0x200), + prev_record_lsn: Some(Lsn(0x100)), + ancestor_timeline: Some(TIMELINE_ID), + ancestor_lsn: Lsn(0), + latest_gc_cutoff_lsn: Lsn(0), + initdb_lsn: Lsn(0), + }, + }; + + impl TimelineMetadataV1 { + pub fn to_bytes(&self) -> anyhow::Result> { + let body_bytes = self.body.ser()?; + let metadata_size = METADATA_HDR_SIZE + body_bytes.len(); + let hdr = TimelineMetadataHeader { + size: metadata_size as u16, + format_version: METADATA_OLD_FORMAT_VERSION, + checksum: crc32c::crc32c(&body_bytes), + }; + let hdr_bytes = hdr.ser()?; + let mut metadata_bytes = vec![0u8; METADATA_MAX_SIZE]; + metadata_bytes[0..METADATA_HDR_SIZE].copy_from_slice(&hdr_bytes); + metadata_bytes[METADATA_HDR_SIZE..metadata_size].copy_from_slice(&body_bytes); + Ok(metadata_bytes) + } + } + + let metadata_bytes = metadata_v1 + .to_bytes() + .expect("Should serialize correct metadata to bytes"); + + // This should deserialize to the latest version format + let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes) + .expect("Should deserialize its own bytes"); + + let expected_metadata = TimelineMetadata::new( + Lsn(0x200), + Some(Lsn(0x100)), + Some(TIMELINE_ID), + Lsn(0), + Lsn(0), + Lsn(0), + 14, // All timelines created before this version had pg_version 14 + ); + + assert_eq!( + deserialized_metadata.body, expected_metadata.body, + "Metadata of the old version {} should be upgraded to the latest version {}", + METADATA_OLD_FORMAT_VERSION, METADATA_FORMAT_VERSION + ); + } }