From 21db1bc2f0b295433fc2ba70e47c03bade1fda01 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Mon, 22 Apr 2024 15:51:00 -0400 Subject: [PATCH] metadata v3 Signed-off-by: Alex Chi Z --- pageserver/src/tenant/metadata.rs | 179 ++++++++++++++++++++++++------ 1 file changed, 145 insertions(+), 34 deletions(-) diff --git a/pageserver/src/tenant/metadata.rs b/pageserver/src/tenant/metadata.rs index a64b5ae704..49c41777cc 100644 --- a/pageserver/src/tenant/metadata.rs +++ b/pageserver/src/tenant/metadata.rs @@ -14,10 +14,11 @@ use utils::bin_ser::SerializeError; use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn}; /// Use special format number to enable backward compatibility. -const METADATA_FORMAT_VERSION: u16 = 4; +const METADATA_FORMAT_VERSION: u16 = 5; /// Previous supported format versions. -const METADATA_OLD_FORMAT_VERSION: u16 = 3; +const METADATA_OLD_FORMAT_VERSION_V2: u16 = 4; +const METADATA_OLD_FORMAT_VERSION_V1: u16 = 3; /// We assume that a write of up to METADATA_MAX_SIZE bytes is atomic. /// @@ -31,7 +32,7 @@ const METADATA_MAX_SIZE: usize = 512; #[derive(Debug, Clone, PartialEq, Eq)] pub struct TimelineMetadata { hdr: TimelineMetadataHeader, - body: TimelineMetadataBodyV2, + body: TimelineMetadataBodyV3, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -42,6 +43,28 @@ struct TimelineMetadataHeader { } const METADATA_HDR_SIZE: usize = std::mem::size_of::(); +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +struct TimelineMetadataBodyV3 { + disk_consistent_lsn: Lsn, + // This is only set if we know it. We track it in memory when the page + // server is running, but we only track the value corresponding to + // 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a + // lot. We only store it in the metadata file when we flush *all* the + // in-memory data so that 'last_record_lsn' is the same as + // 'disk_consistent_lsn'. That's OK, because after page server restart, as + // soon as we reprocess at least one record, we will have a valid + // 'prev_record_lsn' value in memory again. This is only really needed when + // doing a clean shutdown, so that there is no more WAL beyond + // 'disk_consistent_lsn' + prev_record_lsn: Option, + ancestor_timeline: Option, + ancestor_lsn: Lsn, + latest_gc_cutoff_lsn: Lsn, + initdb_lsn: Lsn, + pg_version: u32, + aux_file_v2: bool, +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] struct TimelineMetadataBodyV2 { disk_consistent_lsn: Lsn, @@ -61,9 +84,6 @@ struct TimelineMetadataBodyV2 { latest_gc_cutoff_lsn: Lsn, initdb_lsn: Lsn, pg_version: u32, - #[serde(default)] - #[serde(skip_serializing_if = "Option::is_none")] - aux_file_v2: Option, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -104,7 +124,7 @@ impl TimelineMetadata { size: 0, format_version: METADATA_FORMAT_VERSION, }, - body: TimelineMetadataBodyV2 { + body: TimelineMetadataBodyV3 { disk_consistent_lsn, prev_record_lsn, ancestor_timeline, @@ -112,7 +132,7 @@ impl TimelineMetadata { latest_gc_cutoff_lsn, initdb_lsn, pg_version, - aux_file_v2: if aux_file_v2 { Some(true) } else { None }, + aux_file_v2, }, } } @@ -121,30 +141,51 @@ impl TimelineMetadata { let mut hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?; // backward compatible only up to this version - ensure!( - hdr.format_version == METADATA_OLD_FORMAT_VERSION, - "unsupported metadata format version {}", - hdr.format_version - ); + let body = match hdr.format_version { + METADATA_OLD_FORMAT_VERSION_V2 => { + let metadata_size = hdr.size as usize; - let metadata_size = hdr.size as usize; + let body: TimelineMetadataBodyV2 = + TimelineMetadataBodyV2::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?; - let body: TimelineMetadataBodyV1 = - TimelineMetadataBodyV1::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?; + let body = TimelineMetadataBodyV3 { + disk_consistent_lsn: body.disk_consistent_lsn, + prev_record_lsn: body.prev_record_lsn, + ancestor_timeline: body.ancestor_timeline, + ancestor_lsn: body.ancestor_lsn, + latest_gc_cutoff_lsn: body.latest_gc_cutoff_lsn, + initdb_lsn: body.initdb_lsn, + pg_version: body.pg_version, + aux_file_v2: false, + }; - let body = TimelineMetadataBodyV2 { - disk_consistent_lsn: body.disk_consistent_lsn, - prev_record_lsn: body.prev_record_lsn, - ancestor_timeline: body.ancestor_timeline, - ancestor_lsn: body.ancestor_lsn, - latest_gc_cutoff_lsn: body.latest_gc_cutoff_lsn, - initdb_lsn: body.initdb_lsn, - pg_version: 14, // All timelines created before this version had pg_version 14 - aux_file_v2: None, + hdr.format_version = METADATA_FORMAT_VERSION; + body + } + METADATA_OLD_FORMAT_VERSION_V1 => { + let metadata_size = hdr.size as usize; + + let body: TimelineMetadataBodyV1 = + TimelineMetadataBodyV1::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?; + + let body = TimelineMetadataBodyV3 { + disk_consistent_lsn: body.disk_consistent_lsn, + prev_record_lsn: body.prev_record_lsn, + ancestor_timeline: body.ancestor_timeline, + ancestor_lsn: body.ancestor_lsn, + latest_gc_cutoff_lsn: body.latest_gc_cutoff_lsn, + initdb_lsn: body.initdb_lsn, + pg_version: 14, // All timelines created before this version had pg_version 14 + aux_file_v2: false, + }; + + hdr.format_version = METADATA_FORMAT_VERSION; + body + } + _ => { + anyhow::bail!("unsupported metadata format version {}", hdr.format_version); + } }; - - hdr.format_version = METADATA_FORMAT_VERSION; - Ok(Self { hdr, body }) } @@ -172,7 +213,7 @@ impl TimelineMetadata { TimelineMetadata::upgrade_timeline_metadata(metadata_bytes) } else { let body = - TimelineMetadataBodyV2::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?; + TimelineMetadataBodyV3::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?; ensure!( body.disk_consistent_lsn.is_aligned(), "disk_consistent_lsn is not aligned" @@ -227,7 +268,7 @@ impl TimelineMetadata { } pub fn aux_file_v2(&self) -> bool { - self.body.aux_file_v2.unwrap_or(false) + self.body.aux_file_v2 } // Checksums make it awkward to build a valid instance by hand. This helper @@ -252,7 +293,7 @@ impl TimelineMetadata { self.body.disk_consistent_lsn = update.disk_consistent_lsn; self.body.prev_record_lsn = update.prev_record_lsn; self.body.latest_gc_cutoff_lsn = update.latest_gc_cutoff_lsn; - self.body.aux_file_v2 = if update.aux_file_v2 { Some(true) } else { None }; + self.body.aux_file_v2 = update.aux_file_v2; } } @@ -348,7 +389,7 @@ mod tests { hdr: TimelineMetadataHeader { checksum: 0, size: 0, - format_version: METADATA_OLD_FORMAT_VERSION, + format_version: METADATA_OLD_FORMAT_VERSION_V1, }, body: TimelineMetadataBodyV1 { disk_consistent_lsn: Lsn(0x200), @@ -366,7 +407,7 @@ mod tests { let metadata_size = METADATA_HDR_SIZE + body_bytes.len(); let hdr = TimelineMetadataHeader { size: metadata_size as u16, - format_version: METADATA_OLD_FORMAT_VERSION, + format_version: METADATA_OLD_FORMAT_VERSION_V1, checksum: crc32c::crc32c(&body_bytes), }; let hdr_bytes = hdr.ser()?; @@ -399,7 +440,77 @@ mod tests { assert_eq!( deserialized_metadata.body, expected_metadata.body, "Metadata of the old version {} should be upgraded to the latest version {}", - METADATA_OLD_FORMAT_VERSION, METADATA_FORMAT_VERSION + METADATA_OLD_FORMAT_VERSION_V1, METADATA_FORMAT_VERSION + ); + } + + // Generate old version metadata and read it with current code. + // Ensure that it is upgraded correctly + #[test] + fn test_metadata_upgrade_v2() { + #[derive(Debug, Clone, PartialEq, Eq)] + struct TimelineMetadataV2 { + hdr: TimelineMetadataHeader, + body: TimelineMetadataBodyV2, + } + + let metadata_v2 = TimelineMetadataV2 { + hdr: TimelineMetadataHeader { + checksum: 0, + size: 0, + format_version: METADATA_OLD_FORMAT_VERSION_V2, + }, + body: TimelineMetadataBodyV2 { + disk_consistent_lsn: Lsn(0x200), + prev_record_lsn: Some(Lsn(0x100)), + ancestor_timeline: Some(TIMELINE_ID), + ancestor_lsn: Lsn(0), + latest_gc_cutoff_lsn: Lsn(0), + initdb_lsn: Lsn(0), + pg_version: 16, + }, + }; + + impl TimelineMetadataV2 { + pub fn to_bytes(&self) -> anyhow::Result> { + let body_bytes = self.body.ser()?; + let metadata_size = METADATA_HDR_SIZE + body_bytes.len(); + let hdr = TimelineMetadataHeader { + size: metadata_size as u16, + format_version: METADATA_OLD_FORMAT_VERSION_V2, + checksum: crc32c::crc32c(&body_bytes), + }; + let hdr_bytes = hdr.ser()?; + let mut metadata_bytes = vec![0u8; METADATA_MAX_SIZE]; + metadata_bytes[0..METADATA_HDR_SIZE].copy_from_slice(&hdr_bytes); + metadata_bytes[METADATA_HDR_SIZE..metadata_size].copy_from_slice(&body_bytes); + Ok(metadata_bytes) + } + } + + let metadata_bytes = metadata_v2 + .to_bytes() + .expect("Should serialize correct metadata to bytes"); + + // This should deserialize to the latest version format + let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes) + .expect("Should deserialize its own bytes"); + + let expected_metadata = TimelineMetadata::new( + Lsn(0x200), + Some(Lsn(0x100)), + Some(TIMELINE_ID), + Lsn(0), + Lsn(0), + Lsn(0), + 16, + false, + ); + + assert_eq!( + deserialized_metadata.body, expected_metadata.body, + "Metadata of the old version {} should be upgraded to the latest version {}", + METADATA_OLD_FORMAT_VERSION_V2, METADATA_FORMAT_VERSION ); }