diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 6f34696724..71589a0581 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -656,22 +656,40 @@ impl RemoteTimelineClient { // from latest_files, but not yet scheduled for deletion. Use a closure // to syntactically forbid ? or bail! calls here. let no_bail_here = || { - for name in names { - if upload_queue.latest_files.remove(name).is_some() { - upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1; - } - } + // Decorate our list of names with each name's generation, dropping + // makes that are unexpectedly missing from our metadata. + let with_generations: Vec<_> = names + .iter() + .map(|name| { + // Remove from latest_files, learning the file's remote generation in the process + let meta = upload_queue.latest_files.remove(name); + + if meta.is_none() { + // This is unexpected: latest_files is meant to be kept up to + // date. We can't delete the layer if we have forgotten what + // generation it was in. + warn!("Deleting layer {name} not found in latest_files list"); + None + } else { + upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1; + let generation = meta.map(|m| m.generation).flatten(); + Some((name, generation)) + } + }) + .filter_map(|i| i) + .collect(); if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 { self.schedule_index_upload(upload_queue, metadata); } // schedule the actual deletions - for name in names { + for (name, generation) in with_generations { let op = UploadOp::Delete(Delete { file_kind: RemoteOpFileKind::Layer, layer_file_name: name.clone(), scheduled_from_timeline_delete: false, + generation: generation, }); self.calls_unfinished_metric_begin(&op); upload_queue.queued_operations.push_back(op); @@ -828,12 +846,14 @@ impl RemoteTimelineClient { .reserve(stopped.upload_queue_for_deletion.latest_files.len()); // schedule the actual deletions - for name in stopped.upload_queue_for_deletion.latest_files.keys() { + for (name, meta) in &stopped.upload_queue_for_deletion.latest_files { let op = UploadOp::Delete(Delete { file_kind: RemoteOpFileKind::Layer, layer_file_name: name.clone(), scheduled_from_timeline_delete: true, + generation: meta.generation, }); + self.calls_unfinished_metric_begin(&op); stopped .upload_queue_for_deletion @@ -1060,7 +1080,7 @@ impl RemoteTimelineClient { let upload_result: anyhow::Result<()> = match &task.op { UploadOp::UploadLayer(ref layer_file_name, ref layer_metadata) => { - let mut path = self + let path = self .conf .timeline_path(&self.tenant_id, &self.timeline_id) .join(layer_file_name.file_name()); @@ -1120,7 +1140,7 @@ impl RemoteTimelineClient { .conf .timeline_path(&self.tenant_id, &self.timeline_id) .join(delete.layer_file_name.file_name()); - delete::delete_layer(self.conf, &self.storage_impl, path) + delete::delete_layer(self.conf, &self.storage_impl, path, delete.generation) .measure_remote_op( self.tenant_id, self.timeline_id, @@ -1420,17 +1440,21 @@ pub fn remote_index_path( pub fn remote_path( conf: &PageServerConf, local_path: &Path, - generation: Generation, + generation: Option, ) -> anyhow::Result { let stripped = local_path .strip_prefix(&conf.workdir) .context("Failed to strip workdir prefix")?; - let suffixed = format!( - "{0}{1}", - stripped.to_string_lossy(), - generation.get_suffix() - ); + let suffixed = if let Some(generation) = generation { + format!( + "{0}{1}", + stripped.to_string_lossy(), + generation.get_suffix() + ) + } else { + stripped.to_string_lossy().to_string() + }; RemotePath::new(&PathBuf::from(suffixed)).with_context(|| { format!( diff --git a/pageserver/src/tenant/remote_timeline_client/delete.rs b/pageserver/src/tenant/remote_timeline_client/delete.rs index 912f213f15..d441789092 100644 --- a/pageserver/src/tenant/remote_timeline_client/delete.rs +++ b/pageserver/src/tenant/remote_timeline_client/delete.rs @@ -14,15 +14,14 @@ pub(super) async fn delete_layer<'a>( conf: &'static PageServerConf, storage: &'a GenericRemoteStorage, local_layer_path: &'a Path, + generation: Option, ) -> anyhow::Result<()> { fail::fail_point!("before-delete-layer", |_| { anyhow::bail!("failpoint before-delete-layer") }); debug!("Deleting layer from remote storage: {local_layer_path:?}",); - // FIXME: once we start writing out keys with generations, this will - // need updating (or, in the deletion queue branch, it is already gone) - let path_to_delete = remote_path(conf, local_layer_path, Generation::placeholder())?; + let path_to_delete = remote_path(conf, local_layer_path, generation)?; // We don't want to print an error if the delete failed if the file has // already been deleted. Thankfully, in this situation S3 already diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index 3a51ea035d..fe195a1243 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -58,7 +58,7 @@ pub(super) async fn upload_timeline_layer<'a>( bail!("failpoint before-upload-layer") }); - let storage_path = remote_path(conf, source_path, generation)?; + let storage_path = remote_path(conf, source_path, Some(generation))?; let source_file_res = fs::File::open(&source_path).await; let source_file = match source_file_res { Ok(source_file) => source_file, diff --git a/pageserver/src/tenant/upload_queue.rs b/pageserver/src/tenant/upload_queue.rs index 92202cb3eb..696fe12e7c 100644 --- a/pageserver/src/tenant/upload_queue.rs +++ b/pageserver/src/tenant/upload_queue.rs @@ -1,6 +1,7 @@ use crate::metrics::RemoteOpFileKind; use super::storage_layer::LayerFileName; +use super::Generation; use crate::tenant::metadata::TimelineMetadata; use crate::tenant::remote_timeline_client::index::IndexPart; use crate::tenant::remote_timeline_client::index::LayerFileMetadata; @@ -205,6 +206,7 @@ pub(crate) struct Delete { pub(crate) file_kind: RemoteOpFileKind, pub(crate) layer_file_name: LayerFileName, pub(crate) scheduled_from_timeline_delete: bool, + pub(crate) generation: Option, } #[derive(Debug)] @@ -238,9 +240,10 @@ impl std::fmt::Display for UploadOp { } UploadOp::Delete(delete) => write!( f, - "Delete(path: {}, scheduled_from_timeline_delete: {})", + "Delete(path: {}, scheduled_from_timeline_delete: {}, gen: {})", delete.layer_file_name.file_name(), - delete.scheduled_from_timeline_delete + delete.scheduled_from_timeline_delete, + delete.generation.unwrap_or(Generation::none()) ), UploadOp::Barrier(_) => write!(f, "Barrier"), }