diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs index 32c86052ef..b42709868b 100644 --- a/storage_scrubber/src/checks.rs +++ b/storage_scrubber/src/checks.rs @@ -1,4 +1,5 @@ use std::collections::{HashMap, HashSet}; +use std::time::SystemTime; use itertools::Itertools; use pageserver::tenant::checks::check_valid_layermap; @@ -88,9 +89,14 @@ pub(crate) async fn branch_cleanup_and_check_errors( match s3_data.blob_data { BlobDataParseResult::Parsed { index_part, - index_part_generation: _index_part_generation, - s3_layers: _s3_layers, + index_part_generation: _, + s3_layers: _, + index_part_last_modified_time, + index_part_snapshot_time, } => { + // Ignore missing file error if index_part downloaded is different from the one when listing the layer files. + let ignore_error = index_part_snapshot_time < index_part_last_modified_time + && !cfg!(debug_assertions); if !IndexPart::KNOWN_VERSIONS.contains(&index_part.version()) { result .errors @@ -171,7 +177,7 @@ pub(crate) async fn branch_cleanup_and_check_errors( is_l0, ); - if is_l0 { + if is_l0 || ignore_error { result.warnings.push(msg); } else { result.errors.push(msg); @@ -308,6 +314,8 @@ pub(crate) enum BlobDataParseResult { Parsed { index_part: Box, index_part_generation: Generation, + index_part_last_modified_time: SystemTime, + index_part_snapshot_time: SystemTime, s3_layers: HashSet<(LayerName, Generation)>, }, /// The remains of an uncleanly deleted Timeline or aborted timeline creation(e.g. an initdb archive only, or some layer without an index) @@ -484,9 +492,9 @@ async fn list_timeline_blobs_impl( } if let Some(index_part_object_key) = index_part_object.as_ref() { - let index_part_bytes = + let (index_part_bytes, index_part_last_modified_time) = match download_object_with_retries(remote_client, &index_part_object_key.key).await { - Ok(index_part_bytes) => index_part_bytes, + Ok(data) => data, Err(e) => { // It is possible that the branch gets deleted in-between we list the objects // and we download the index part file. @@ -500,7 +508,7 @@ async fn list_timeline_blobs_impl( )); } }; - + let index_part_snapshot_time = index_part_object_key.last_modified; match serde_json::from_slice(&index_part_bytes) { Ok(index_part) => { return Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData { @@ -508,6 +516,8 @@ async fn list_timeline_blobs_impl( index_part: Box::new(index_part), index_part_generation, s3_layers, + index_part_last_modified_time, + index_part_snapshot_time, }, unused_index_keys: index_part_keys, unknown_keys, @@ -625,7 +635,7 @@ pub(crate) async fn list_tenant_manifests( let manifest_bytes = match download_object_with_retries(remote_client, &latest_listing_object.key).await { - Ok(bytes) => bytes, + Ok((bytes, _)) => bytes, Err(e) => { // It is possible that the tenant gets deleted in-between we list the objects // and we download the manifest file. diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs index be526daaf0..224235098c 100644 --- a/storage_scrubber/src/lib.rs +++ b/storage_scrubber/src/lib.rs @@ -13,7 +13,7 @@ pub mod tenant_snapshot; use std::env; use std::fmt::Display; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, SystemTime}; use anyhow::Context; use aws_config::retry::{RetryConfigBuilder, RetryMode}; @@ -509,10 +509,11 @@ async fn list_objects_with_retries( panic!("MAX_RETRIES is not allowed to be 0"); } +/// Returns content, last modified time async fn download_object_with_retries( remote_client: &GenericRemoteStorage, key: &RemotePath, -) -> anyhow::Result> { +) -> anyhow::Result<(Vec, SystemTime)> { let cancel = CancellationToken::new(); for trial in 0..MAX_RETRIES { let mut buf = Vec::new(); @@ -535,7 +536,7 @@ async fn download_object_with_retries( { Ok(bytes_read) => { tracing::debug!("Downloaded {bytes_read} bytes for object {key}"); - return Ok(buf); + return Ok((buf, download.last_modified)); } Err(e) => { error!("Failed to stream object body for key {key}: {e}"); diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs index d19b8a5f91..a997373375 100644 --- a/storage_scrubber/src/pageserver_physical_gc.rs +++ b/storage_scrubber/src/pageserver_physical_gc.rs @@ -450,6 +450,8 @@ async fn gc_ancestor( index_part: _, index_part_generation: _, s3_layers, + index_part_last_modified_time: _, + index_part_snapshot_time: _, } => s3_layers, BlobDataParseResult::Relic => { // Post-deletion tenant location: don't try and GC it. @@ -586,7 +588,9 @@ async fn gc_timeline( BlobDataParseResult::Parsed { index_part, index_part_generation, - s3_layers: _s3_layers, + s3_layers: _, + index_part_last_modified_time: _, + index_part_snapshot_time: _, } => (index_part, *index_part_generation, data.unused_index_keys), BlobDataParseResult::Relic => { // Post-deletion tenant location: don't try and GC it. diff --git a/storage_scrubber/src/scan_pageserver_metadata.rs b/storage_scrubber/src/scan_pageserver_metadata.rs index c8de6e46b3..a31fb5b242 100644 --- a/storage_scrubber/src/scan_pageserver_metadata.rs +++ b/storage_scrubber/src/scan_pageserver_metadata.rs @@ -47,6 +47,8 @@ impl MetadataSummary { index_part, index_part_generation: _, s3_layers: _, + index_part_last_modified_time: _, + index_part_snapshot_time: _, } = &data.blob_data { *self @@ -195,7 +197,9 @@ pub async fn scan_pageserver_metadata( if let BlobDataParseResult::Parsed { index_part, index_part_generation, - s3_layers: _s3_layers, + s3_layers: _, + index_part_last_modified_time: _, + index_part_snapshot_time: _, } = &data.blob_data { if index_part.deleted_at.is_some() { @@ -318,9 +322,11 @@ pub async fn scan_pageserver_metadata( match &data.blob_data { BlobDataParseResult::Parsed { - index_part: _index_part, + index_part: _, index_part_generation: _index_part_generation, s3_layers, + index_part_last_modified_time: _, + index_part_snapshot_time: _, } => { tenant_objects.push(ttid, s3_layers.clone()); } diff --git a/storage_scrubber/src/tenant_snapshot.rs b/storage_scrubber/src/tenant_snapshot.rs index 39e0b5c9b4..60e79fb859 100644 --- a/storage_scrubber/src/tenant_snapshot.rs +++ b/storage_scrubber/src/tenant_snapshot.rs @@ -268,6 +268,8 @@ impl SnapshotDownloader { index_part, index_part_generation, s3_layers: _, + index_part_last_modified_time: _, + index_part_snapshot_time: _, } => { self.download_timeline( ttid,