From 24398bf0600223fb74fb3aa33ca4e4374209f84d Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 17 Oct 2024 19:02:24 +0100 Subject: [PATCH] pageserver: detect & warn on loading an old index which is probably the result of a bad generation (#9383) ## Problem The pageserver generally trusts the storage controller/control plane to give it valid generations. However, sometimes it should be obvious that a generation is bad, and for defense in depth we should detect that on the pageserver. This PR is part 1 of 2: 1. in this PR we detect and warn on such situations, but do not block starting up the tenant. Once we have confidence that the check is not firing unexpectedly in the field 2. part 2 of 2 will introduce a condition that refuses to start a tenant in this situtation, and a test for that (maybe, if we can figure out how to spoof an ancient mtime) Related: #6951 ## Summary of changes - When loading an index older than 2 weeks, log an INFO message noting that we will check for other indices - When loading an index older than 2 weeks _and_ a newer-generation index exists, log a warning. --- pageserver/src/http/routes.rs | 2 +- .../src/tenant/remote_timeline_client.rs | 45 ++++++++++++++++++- .../tenant/remote_timeline_client/download.rs | 11 ++--- 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index e6663ef56f..8f928fd81b 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2251,7 +2251,7 @@ async fn tenant_scan_remote_handler( %timeline_id)) .await { - Ok((index_part, index_generation)) => { + Ok((index_part, index_generation, _index_mtime)) => { tracing::info!("Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)", index_part.layer_metadata.len(), index_part.metadata.disk_consistent_lsn()); generation = std::cmp::max(generation, index_generation); diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 5e9702bd3d..450084aca2 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -505,7 +505,7 @@ impl RemoteTimelineClient { }, ); - let (index_part, _index_generation) = download::download_index_part( + let (index_part, index_generation, index_last_modified) = download::download_index_part( &self.storage_impl, &self.tenant_shard_id, &self.timeline_id, @@ -519,6 +519,49 @@ impl RemoteTimelineClient { ) .await?; + // Defense in depth: monotonicity of generation numbers is an important correctness guarantee, so when we see a very + // old index, we do extra checks in case this is the result of backward time-travel of the generation number (e.g. + // in case of a bug in the service that issues generation numbers). Indices are allowed to be old, but we expect that + // when we load an old index we are loading the _latest_ index: if we are asked to load an old index and there is + // also a newer index available, that is surprising. + const INDEX_AGE_CHECKS_THRESHOLD: Duration = Duration::from_secs(14 * 24 * 3600); + let index_age = index_last_modified.elapsed().unwrap_or_else(|e| { + if e.duration() > Duration::from_secs(5) { + // We only warn if the S3 clock and our local clock are >5s out: because this is a low resolution + // timestamp, it is common to be out by at least 1 second. + tracing::warn!("Index has modification time in the future: {e}"); + } + Duration::ZERO + }); + if index_age > INDEX_AGE_CHECKS_THRESHOLD { + tracing::info!( + ?index_generation, + age = index_age.as_secs_f64(), + "Loaded an old index, checking for other indices..." + ); + + // Find the highest-generation index + let (_latest_index_part, latest_index_generation, latest_index_mtime) = + download::download_index_part( + &self.storage_impl, + &self.tenant_shard_id, + &self.timeline_id, + Generation::MAX, + cancel, + ) + .await?; + + if latest_index_generation > index_generation { + // Unexpected! Why are we loading such an old index if a more recent one exists? + tracing::warn!( + ?index_generation, + ?latest_index_generation, + ?latest_index_mtime, + "Found a newer index while loading an old one" + ); + } + } + if index_part.deleted_at.is_some() { Ok(MaybeDeletedIndexPart::Deleted(index_part)) } else { diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index 692e4d3096..b5d4b0f0bb 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -6,6 +6,7 @@ use std::collections::HashSet; use std::future::Future; use std::str::FromStr; +use std::time::SystemTime; use anyhow::{anyhow, Context}; use camino::{Utf8Path, Utf8PathBuf}; @@ -343,10 +344,10 @@ async fn do_download_index_part( timeline_id: &TimelineId, index_generation: Generation, cancel: &CancellationToken, -) -> Result<(IndexPart, Generation), DownloadError> { +) -> Result<(IndexPart, Generation, SystemTime), DownloadError> { let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation); - let index_part_bytes = download_retry_forever( + let (index_part_bytes, index_part_mtime) = download_retry_forever( || async { let download = storage .download(&remote_path, &DownloadOpts::default(), cancel) @@ -359,7 +360,7 @@ async fn do_download_index_part( tokio::io::copy_buf(&mut stream, &mut bytes).await?; - Ok(bytes) + Ok((bytes, download.last_modified)) }, &format!("download {remote_path:?}"), cancel, @@ -370,7 +371,7 @@ async fn do_download_index_part( .with_context(|| format!("deserialize index part file at {remote_path:?}")) .map_err(DownloadError::Other)?; - Ok((index_part, index_generation)) + Ok((index_part, index_generation, index_part_mtime)) } /// index_part.json objects are suffixed with a generation number, so we cannot @@ -385,7 +386,7 @@ pub(crate) async fn download_index_part( timeline_id: &TimelineId, my_generation: Generation, cancel: &CancellationToken, -) -> Result<(IndexPart, Generation), DownloadError> { +) -> Result<(IndexPart, Generation, SystemTime), DownloadError> { debug_assert_current_span_has_tenant_and_timeline_id(); if my_generation.is_none() {