Files
neon/s3_scrubber/src/checks.rs
John Spray dfb0a6fdaf scrubber: handle initdb files, fix an issue with prefixes (#6079)
- The code for calculating the prefix in the bucket was expecting a
trailing slash (as it is in the tests), but that's an awkward
expectation to impose for use in the field: make the code more flexible
by only trimming a trailing character if it is indeed a slash.
- initdb archives were detected by the scrubber as malformed layer
files. Teach it to recognize and ignore them.
2023-12-12 16:53:08 +00:00

368 lines
14 KiB
Rust

use std::collections::HashSet;
use anyhow::Context;
use aws_sdk_s3::{types::ObjectIdentifier, Client};
use tracing::{error, info, warn};
use utils::generation::Generation;
use crate::cloud_admin_api::BranchData;
use crate::metadata_stream::stream_listing;
use crate::{download_object_with_retries, RootTarget};
use futures_util::{pin_mut, StreamExt};
use pageserver::tenant::remote_timeline_client::parse_remote_index_path;
use pageserver::tenant::storage_layer::LayerFileName;
use pageserver::tenant::IndexPart;
use remote_storage::RemotePath;
use utils::id::TenantTimelineId;
pub(crate) struct TimelineAnalysis {
/// Anomalies detected
pub(crate) errors: Vec<String>,
/// Healthy-but-noteworthy, like old-versioned structures that are readable but
/// worth reporting for awareness that we must not remove that old version decoding
/// yet.
pub(crate) warnings: Vec<String>,
/// Keys not referenced in metadata: candidates for removal, but NOT NECESSARILY: beware
/// of races between reading the metadata and reading the objects.
pub(crate) garbage_keys: Vec<String>,
}
impl TimelineAnalysis {
fn new() -> Self {
Self {
errors: Vec::new(),
warnings: Vec::new(),
garbage_keys: Vec::new(),
}
}
}
pub(crate) async fn branch_cleanup_and_check_errors(
id: &TenantTimelineId,
s3_root: &RootTarget,
s3_active_branch: Option<&BranchData>,
console_branch: Option<BranchData>,
s3_data: Option<S3TimelineBlobData>,
) -> TimelineAnalysis {
let mut result = TimelineAnalysis::new();
info!("Checking timeline {id}");
if let Some(s3_active_branch) = s3_active_branch {
info!(
"Checking console status for timeline for branch {:?}/{:?}",
s3_active_branch.project_id, s3_active_branch.id
);
match console_branch {
Some(_) => {result.errors.push(format!("Timeline has deleted branch data in the console (id = {:?}, project_id = {:?}), recheck whether it got removed during the check",
s3_active_branch.id, s3_active_branch.project_id))
},
None => {
result.errors.push(format!("Timeline has no branch data in the console (id = {:?}, project_id = {:?}), recheck whether it got removed during the check",
s3_active_branch.id, s3_active_branch.project_id))
}
};
}
match s3_data {
Some(s3_data) => {
result.garbage_keys.extend(s3_data.keys_to_remove);
match s3_data.blob_data {
BlobDataParseResult::Parsed {
index_part,
index_part_generation,
mut s3_layers,
} => {
if !IndexPart::KNOWN_VERSIONS.contains(&index_part.get_version()) {
result.errors.push(format!(
"index_part.json version: {}",
index_part.get_version()
))
}
if &index_part.get_version() != IndexPart::KNOWN_VERSIONS.last().unwrap() {
result.warnings.push(format!(
"index_part.json version is not latest: {}",
index_part.get_version()
))
}
if index_part.metadata.disk_consistent_lsn()
!= index_part.get_disk_consistent_lsn()
{
result.errors.push(format!(
"Mismatching disk_consistent_lsn in TimelineMetadata ({}) and in the index_part ({})",
index_part.metadata.disk_consistent_lsn(),
index_part.get_disk_consistent_lsn(),
))
}
if index_part.layer_metadata.is_empty() {
// not an error, can happen for branches with zero writes, but notice that
info!("index_part.json has no layers");
}
for (layer, metadata) in index_part.layer_metadata {
if metadata.file_size == 0 {
result.errors.push(format!(
"index_part.json contains a layer {} that has 0 size in its layer metadata", layer.file_name(),
))
}
let layer_map_key = (layer, metadata.generation);
if !s3_layers.remove(&layer_map_key) {
// FIXME: this will emit false positives if an index was
// uploaded concurrently with our scan. To make this check
// correct, we need to try sending a HEAD request for the
// layer we think is missing.
result.errors.push(format!(
"index_part.json contains a layer {}{} that is not present in remote storage",
layer_map_key.0.file_name(),
layer_map_key.1.get_suffix()
))
}
}
let orphan_layers: Vec<(LayerFileName, Generation)> = s3_layers
.into_iter()
.filter(|(_layer_name, gen)|
// A layer is only considered orphaned if it has a generation below
// the index. If the generation is >= the index, then the layer may
// be an upload from a running pageserver, or even an upload from
// a new generation that didn't upload an index yet.
//
// Even so, a layer that is not referenced by the index could just
// be something enqueued for deletion, so while this check is valid
// for indicating that a layer is garbage, it is not an indicator
// of a problem.
gen < &index_part_generation)
.collect();
if !orphan_layers.is_empty() {
// An orphan layer is not an error: it's arguably not even a warning, but it is helpful to report
// these as a hint that there is something worth cleaning up here.
result.warnings.push(format!(
"index_part.json does not contain layers from S3: {:?}",
orphan_layers
.iter()
.map(|(layer_name, gen)| format!(
"{}{}",
layer_name.file_name(),
gen.get_suffix()
))
.collect::<Vec<_>>(),
));
result.garbage_keys.extend(orphan_layers.iter().map(
|(layer_name, layer_gen)| {
let mut key = s3_root.timeline_root(id).prefix_in_bucket;
let delimiter = s3_root.delimiter();
if !key.ends_with(delimiter) {
key.push_str(delimiter);
}
key.push_str(&format!(
"{}{}",
&layer_name.file_name(),
layer_gen.get_suffix()
));
key
},
));
}
}
BlobDataParseResult::Relic => {}
BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
parse_errors
.into_iter()
.map(|error| format!("parse error: {error}")),
),
}
}
None => result
.errors
.push("Timeline has no data on S3 at all".to_string()),
}
if result.errors.is_empty() {
info!("No check errors found");
} else {
warn!("Timeline metadata errors: {0:?}", result.errors);
}
if !result.warnings.is_empty() {
warn!("Timeline metadata warnings: {0:?}", result.warnings);
}
if !result.garbage_keys.is_empty() {
error!(
"The following keys should be removed from S3: {0:?}",
result.garbage_keys
)
}
result
}
#[derive(Debug)]
pub(crate) struct S3TimelineBlobData {
pub(crate) blob_data: BlobDataParseResult,
pub(crate) keys_to_remove: Vec<String>,
}
#[derive(Debug)]
pub(crate) enum BlobDataParseResult {
Parsed {
index_part: IndexPart,
index_part_generation: Generation,
s3_layers: HashSet<(LayerFileName, Generation)>,
},
/// The remains of a deleted Timeline (i.e. an initdb archive only)
Relic,
Incorrect(Vec<String>),
}
fn parse_layer_object_name(name: &str) -> Result<(LayerFileName, Generation), String> {
match name.rsplit_once('-') {
// FIXME: this is gross, just use a regex?
Some((layer_filename, gen)) if gen.len() == 8 => {
let layer = layer_filename.parse::<LayerFileName>()?;
let gen =
Generation::parse_suffix(gen).ok_or("Malformed generation suffix".to_string())?;
Ok((layer, gen))
}
_ => Ok((name.parse::<LayerFileName>()?, Generation::none())),
}
}
pub(crate) async fn list_timeline_blobs(
s3_client: &Client,
id: TenantTimelineId,
s3_root: &RootTarget,
) -> anyhow::Result<S3TimelineBlobData> {
let mut s3_layers = HashSet::new();
let mut errors = Vec::new();
let mut keys_to_remove = Vec::new();
let mut timeline_dir_target = s3_root.timeline_root(&id);
timeline_dir_target.delimiter = String::new();
let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
let mut initdb_archive: bool = false;
let stream = stream_listing(s3_client, &timeline_dir_target);
pin_mut!(stream);
while let Some(obj) = stream.next().await {
let obj = obj?;
let key = obj.key();
let blob_name = key.strip_prefix(&timeline_dir_target.prefix_in_bucket);
match blob_name {
Some(name) if name.starts_with("index_part.json") => {
tracing::info!("Index key {key}");
index_parts.push(obj)
}
Some("initdb.tar.zst") => {
tracing::info!("initdb archive {key}");
initdb_archive = true;
}
Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
Ok((new_layer, gen)) => {
tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
s3_layers.insert((new_layer, gen));
}
Err(e) => {
tracing::info!("Error parsing key {maybe_layer_name}");
errors.push(
format!("S3 list response got an object with key {key} that is not a layer name: {e}"),
);
keys_to_remove.push(key.to_string());
}
},
None => {
tracing::info!("Peculiar key {}", key);
errors.push(format!("S3 list response got an object with odd key {key}"));
keys_to_remove.push(key.to_string());
}
}
}
if index_parts.is_empty() && s3_layers.is_empty() && initdb_archive {
tracing::info!(
"Timeline is empty apart from initdb archive: expected post-deletion state."
);
return Ok(S3TimelineBlobData {
blob_data: BlobDataParseResult::Relic,
keys_to_remove: Vec::new(),
});
}
// Choose the index_part with the highest generation
let (index_part_object, index_part_generation) = match index_parts
.iter()
.filter_map(|k| {
let key = k.key();
// Stripping the index key to the last part, because RemotePath doesn't
// like absolute paths, and depending on prefix_in_bucket it's possible
// for the keys we read back to start with a slash.
let basename = key.rsplit_once('/').unwrap().1;
parse_remote_index_path(RemotePath::from_string(basename).unwrap()).map(|g| (k, g))
})
.max_by_key(|i| i.1)
.map(|(k, g)| (k.clone(), g))
{
Some((key, gen)) => (Some(key), gen),
None => {
// Legacy/missing case: one or zero index parts, which did not have a generation
(index_parts.pop(), Generation::none())
}
};
if index_part_object.is_none() {
errors.push("S3 list response got no index_part.json file".to_string());
}
if let Some(index_part_object_key) = index_part_object.as_ref().map(|object| object.key()) {
let index_part_bytes = download_object_with_retries(
s3_client,
&timeline_dir_target.bucket_name,
index_part_object_key,
)
.await
.context("index_part.json download")?;
match serde_json::from_slice(&index_part_bytes) {
Ok(index_part) => {
return Ok(S3TimelineBlobData {
blob_data: BlobDataParseResult::Parsed {
index_part,
index_part_generation,
s3_layers,
},
keys_to_remove,
})
}
Err(index_parse_error) => errors.push(format!(
"index_part.json body parsing error: {index_parse_error}"
)),
}
} else {
errors.push(format!(
"Index part object {index_part_object:?} has no key"
));
}
if errors.is_empty() {
errors.push(
"Unexpected: no errors did not lead to a successfully parsed blob return".to_string(),
);
}
Ok(S3TimelineBlobData {
blob_data: BlobDataParseResult::Incorrect(errors),
keys_to_remove,
})
}