From 6a2d616f4dfa5b7ea89e799e4341cf9ee794f4a1 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 24 Oct 2023 11:13:49 +0100 Subject: [PATCH] pageserver: on failed upload, remove layer from latest_files --- .../src/tenant/remote_timeline_client.rs | 24 +++++++++++++++++-- .../tenant/remote_timeline_client/upload.rs | 11 ++++++--- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 1a3ec1b4c6..68cfec738a 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -1097,7 +1097,7 @@ impl RemoteTimelineClient { .timeline_path(&self.tenant_id, &self.timeline_id) .join(layer_file_name.file_name()); - upload::upload_timeline_layer( + let result = upload::upload_timeline_layer( self.conf, &self.storage_impl, &path, @@ -1111,7 +1111,27 @@ impl RemoteTimelineClient { RemoteOpKind::Upload, Arc::clone(&self.metrics), ) - .await + .await; + + if let Ok(outcome) = &result { + match outcome { + upload::UploadOutcome::NotFound => { + // Layer uploads can be no-ops if the file is not found on local disk. In this case, we must + // update our remote metadata to reflect that the file doesn't exist (it was added to `latest_files`) + // when scheduled. + let mut guard = self.upload_queue.lock().unwrap(); + if let Ok(upload_queue) = guard.initialized_mut() { + upload_queue.latest_files.remove(layer_file_name); + upload_queue + .latest_files_changes_since_metadata_upload_scheduled += 1; + } + } + upload::UploadOutcome::Uploaded => { + // Success, no special handling required. + } + } + } + result.map(|_| ()) } UploadOp::UploadMetadata(ref index_part, _lsn) => { let mention_having_future_layers = if cfg!(feature = "testing") { diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index 03ba137566..63bed87f71 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -45,6 +45,11 @@ pub(super) async fn upload_index_part<'a>( .with_context(|| format!("upload index part for '{tenant_id} / {timeline_id}'")) } +pub(super) enum UploadOutcome { + Uploaded, + NotFound, +} + /// Attempts to upload given layer files. /// No extra checks for overlapping files is made and any files that are already present remotely will be overwritten, if submitted during the upload. /// @@ -55,7 +60,7 @@ pub(super) async fn upload_timeline_layer<'a>( source_path: &'a Utf8Path, known_metadata: &'a LayerFileMetadata, generation: Generation, -) -> anyhow::Result<()> { +) -> anyhow::Result { fail_point!("before-upload-layer", |_| { bail!("failpoint before-upload-layer") }); @@ -73,7 +78,7 @@ pub(super) async fn upload_timeline_layer<'a>( // something worse, like when a file is scheduled for upload before // it has been written to disk yet. info!(path = %source_path, "File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more."); - return Ok(()); + return Ok(UploadOutcome::NotFound); } Err(e) => { Err(e).with_context(|| format!("open a source file for layer {source_path:?}"))? @@ -99,5 +104,5 @@ pub(super) async fn upload_timeline_layer<'a>( .await .with_context(|| format!("upload layer from local path '{source_path}'"))?; - Ok(()) + Ok(UploadOutcome::Uploaded) }