mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-13 00:12:54 +00:00
pageserver: detect & warn on loading an old index which is probably the result of a bad generation (#9383)
## Problem The pageserver generally trusts the storage controller/control plane to give it valid generations. However, sometimes it should be obvious that a generation is bad, and for defense in depth we should detect that on the pageserver. This PR is part 1 of 2: 1. in this PR we detect and warn on such situations, but do not block starting up the tenant. Once we have confidence that the check is not firing unexpectedly in the field 2. part 2 of 2 will introduce a condition that refuses to start a tenant in this situtation, and a test for that (maybe, if we can figure out how to spoof an ancient mtime) Related: #6951 ## Summary of changes - When loading an index older than 2 weeks, log an INFO message noting that we will check for other indices - When loading an index older than 2 weeks _and_ a newer-generation index exists, log a warning.
This commit is contained in:
@@ -2251,7 +2251,7 @@ async fn tenant_scan_remote_handler(
|
||||
%timeline_id))
|
||||
.await
|
||||
{
|
||||
Ok((index_part, index_generation)) => {
|
||||
Ok((index_part, index_generation, _index_mtime)) => {
|
||||
tracing::info!("Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)",
|
||||
index_part.layer_metadata.len(), index_part.metadata.disk_consistent_lsn());
|
||||
generation = std::cmp::max(generation, index_generation);
|
||||
|
||||
@@ -505,7 +505,7 @@ impl RemoteTimelineClient {
|
||||
},
|
||||
);
|
||||
|
||||
let (index_part, _index_generation) = download::download_index_part(
|
||||
let (index_part, index_generation, index_last_modified) = download::download_index_part(
|
||||
&self.storage_impl,
|
||||
&self.tenant_shard_id,
|
||||
&self.timeline_id,
|
||||
@@ -519,6 +519,49 @@ impl RemoteTimelineClient {
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Defense in depth: monotonicity of generation numbers is an important correctness guarantee, so when we see a very
|
||||
// old index, we do extra checks in case this is the result of backward time-travel of the generation number (e.g.
|
||||
// in case of a bug in the service that issues generation numbers). Indices are allowed to be old, but we expect that
|
||||
// when we load an old index we are loading the _latest_ index: if we are asked to load an old index and there is
|
||||
// also a newer index available, that is surprising.
|
||||
const INDEX_AGE_CHECKS_THRESHOLD: Duration = Duration::from_secs(14 * 24 * 3600);
|
||||
let index_age = index_last_modified.elapsed().unwrap_or_else(|e| {
|
||||
if e.duration() > Duration::from_secs(5) {
|
||||
// We only warn if the S3 clock and our local clock are >5s out: because this is a low resolution
|
||||
// timestamp, it is common to be out by at least 1 second.
|
||||
tracing::warn!("Index has modification time in the future: {e}");
|
||||
}
|
||||
Duration::ZERO
|
||||
});
|
||||
if index_age > INDEX_AGE_CHECKS_THRESHOLD {
|
||||
tracing::info!(
|
||||
?index_generation,
|
||||
age = index_age.as_secs_f64(),
|
||||
"Loaded an old index, checking for other indices..."
|
||||
);
|
||||
|
||||
// Find the highest-generation index
|
||||
let (_latest_index_part, latest_index_generation, latest_index_mtime) =
|
||||
download::download_index_part(
|
||||
&self.storage_impl,
|
||||
&self.tenant_shard_id,
|
||||
&self.timeline_id,
|
||||
Generation::MAX,
|
||||
cancel,
|
||||
)
|
||||
.await?;
|
||||
|
||||
if latest_index_generation > index_generation {
|
||||
// Unexpected! Why are we loading such an old index if a more recent one exists?
|
||||
tracing::warn!(
|
||||
?index_generation,
|
||||
?latest_index_generation,
|
||||
?latest_index_mtime,
|
||||
"Found a newer index while loading an old one"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if index_part.deleted_at.is_some() {
|
||||
Ok(MaybeDeletedIndexPart::Deleted(index_part))
|
||||
} else {
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
use std::collections::HashSet;
|
||||
use std::future::Future;
|
||||
use std::str::FromStr;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
@@ -343,10 +344,10 @@ async fn do_download_index_part(
|
||||
timeline_id: &TimelineId,
|
||||
index_generation: Generation,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(IndexPart, Generation), DownloadError> {
|
||||
) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {
|
||||
let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation);
|
||||
|
||||
let index_part_bytes = download_retry_forever(
|
||||
let (index_part_bytes, index_part_mtime) = download_retry_forever(
|
||||
|| async {
|
||||
let download = storage
|
||||
.download(&remote_path, &DownloadOpts::default(), cancel)
|
||||
@@ -359,7 +360,7 @@ async fn do_download_index_part(
|
||||
|
||||
tokio::io::copy_buf(&mut stream, &mut bytes).await?;
|
||||
|
||||
Ok(bytes)
|
||||
Ok((bytes, download.last_modified))
|
||||
},
|
||||
&format!("download {remote_path:?}"),
|
||||
cancel,
|
||||
@@ -370,7 +371,7 @@ async fn do_download_index_part(
|
||||
.with_context(|| format!("deserialize index part file at {remote_path:?}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
Ok((index_part, index_generation))
|
||||
Ok((index_part, index_generation, index_part_mtime))
|
||||
}
|
||||
|
||||
/// index_part.json objects are suffixed with a generation number, so we cannot
|
||||
@@ -385,7 +386,7 @@ pub(crate) async fn download_index_part(
|
||||
timeline_id: &TimelineId,
|
||||
my_generation: Generation,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(IndexPart, Generation), DownloadError> {
|
||||
) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
if my_generation.is_none() {
|
||||
|
||||
Reference in New Issue
Block a user