From 9bdb14c1c0b3ef1082ce68f1c54d4547393da362 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 14 Jan 2025 10:27:48 -0500 Subject: [PATCH] fix(pageserver): ensure initial image layers have correct key ranges (#10374) ## Problem Discovered during the relation dir refactor work. If we do not create images as in this patch, we would get two set of image layers: ``` 0000...METADATA_KEYS 0000...REL_KEYS ``` They overlap at the same LSN and would cause data loss for relation keys. This doesn't happen in prod because initial image layer generation is never called, but better to be fixed to avoid future issues with the reldir refactors. ## Summary of changes * Consolidate create_image_layers call into a single one. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline.rs | 43 +++++++++++++++---------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index f7227efeba..741b214a73 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -3781,36 +3781,35 @@ impl Timeline { return Err(FlushLayerError::Cancelled); } - let mut layers_to_upload = Vec::new(); - layers_to_upload.extend( - self.create_image_layers( - &rel_partition, - self.initdb_lsn, - ImageLayerCreationMode::Initial, - ctx, - ) - .await?, - ); + // Ensure that we have a single call to `create_image_layers` with a combined dense keyspace. + // So that the key ranges don't overlap. + let mut partitions = KeyPartitioning::default(); + partitions.parts.extend(rel_partition.parts); if !metadata_partition.parts.is_empty() { assert_eq!( metadata_partition.parts.len(), 1, "currently sparse keyspace should only contain a single metadata keyspace" ); - layers_to_upload.extend( - self.create_image_layers( - // Safety: create_image_layers treat sparse keyspaces differently that it does not scan - // every single key within the keyspace, and therefore, it's safe to force converting it - // into a dense keyspace before calling this function. - &metadata_partition.into_dense(), - self.initdb_lsn, - ImageLayerCreationMode::Initial, - ctx, - ) - .await?, - ); + // Safety: create_image_layers treat sparse keyspaces differently that it does not scan + // every single key within the keyspace, and therefore, it's safe to force converting it + // into a dense keyspace before calling this function. + partitions + .parts + .extend(metadata_partition.into_dense().parts); } + let mut layers_to_upload = Vec::new(); + layers_to_upload.extend( + self.create_image_layers( + &partitions, + self.initdb_lsn, + ImageLayerCreationMode::Initial, + ctx, + ) + .await?, + ); + (layers_to_upload, None) } else { // Normal case, write out a L0 delta layer file.