diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index e6663ef56f..8f928fd81b 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2251,7 +2251,7 @@ async fn tenant_scan_remote_handler( %timeline_id)) .await { - Ok((index_part, index_generation)) => { + Ok((index_part, index_generation, _index_mtime)) => { tracing::info!("Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)", index_part.layer_metadata.len(), index_part.metadata.disk_consistent_lsn()); generation = std::cmp::max(generation, index_generation); diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 5e9702bd3d..450084aca2 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -505,7 +505,7 @@ impl RemoteTimelineClient { }, ); - let (index_part, _index_generation) = download::download_index_part( + let (index_part, index_generation, index_last_modified) = download::download_index_part( &self.storage_impl, &self.tenant_shard_id, &self.timeline_id, @@ -519,6 +519,49 @@ impl RemoteTimelineClient { ) .await?; + // Defense in depth: monotonicity of generation numbers is an important correctness guarantee, so when we see a very + // old index, we do extra checks in case this is the result of backward time-travel of the generation number (e.g. + // in case of a bug in the service that issues generation numbers). Indices are allowed to be old, but we expect that + // when we load an old index we are loading the _latest_ index: if we are asked to load an old index and there is + // also a newer index available, that is surprising. + const INDEX_AGE_CHECKS_THRESHOLD: Duration = Duration::from_secs(14 * 24 * 3600); + let index_age = index_last_modified.elapsed().unwrap_or_else(|e| { + if e.duration() > Duration::from_secs(5) { + // We only warn if the S3 clock and our local clock are >5s out: because this is a low resolution + // timestamp, it is common to be out by at least 1 second. + tracing::warn!("Index has modification time in the future: {e}"); + } + Duration::ZERO + }); + if index_age > INDEX_AGE_CHECKS_THRESHOLD { + tracing::info!( + ?index_generation, + age = index_age.as_secs_f64(), + "Loaded an old index, checking for other indices..." + ); + + // Find the highest-generation index + let (_latest_index_part, latest_index_generation, latest_index_mtime) = + download::download_index_part( + &self.storage_impl, + &self.tenant_shard_id, + &self.timeline_id, + Generation::MAX, + cancel, + ) + .await?; + + if latest_index_generation > index_generation { + // Unexpected! Why are we loading such an old index if a more recent one exists? + tracing::warn!( + ?index_generation, + ?latest_index_generation, + ?latest_index_mtime, + "Found a newer index while loading an old one" + ); + } + } + if index_part.deleted_at.is_some() { Ok(MaybeDeletedIndexPart::Deleted(index_part)) } else { diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index 692e4d3096..b5d4b0f0bb 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -6,6 +6,7 @@ use std::collections::HashSet; use std::future::Future; use std::str::FromStr; +use std::time::SystemTime; use anyhow::{anyhow, Context}; use camino::{Utf8Path, Utf8PathBuf}; @@ -343,10 +344,10 @@ async fn do_download_index_part( timeline_id: &TimelineId, index_generation: Generation, cancel: &CancellationToken, -) -> Result<(IndexPart, Generation), DownloadError> { +) -> Result<(IndexPart, Generation, SystemTime), DownloadError> { let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation); - let index_part_bytes = download_retry_forever( + let (index_part_bytes, index_part_mtime) = download_retry_forever( || async { let download = storage .download(&remote_path, &DownloadOpts::default(), cancel) @@ -359,7 +360,7 @@ async fn do_download_index_part( tokio::io::copy_buf(&mut stream, &mut bytes).await?; - Ok(bytes) + Ok((bytes, download.last_modified)) }, &format!("download {remote_path:?}"), cancel, @@ -370,7 +371,7 @@ async fn do_download_index_part( .with_context(|| format!("deserialize index part file at {remote_path:?}")) .map_err(DownloadError::Other)?; - Ok((index_part, index_generation)) + Ok((index_part, index_generation, index_part_mtime)) } /// index_part.json objects are suffixed with a generation number, so we cannot @@ -385,7 +386,7 @@ pub(crate) async fn download_index_part( timeline_id: &TimelineId, my_generation: Generation, cancel: &CancellationToken, -) -> Result<(IndexPart, Generation), DownloadError> { +) -> Result<(IndexPart, Generation, SystemTime), DownloadError> { debug_assert_current_span_has_tenant_and_timeline_id(); if my_generation.is_none() {