mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-23 16:10:37 +00:00
This doesn't make the scrubber smart enough to understand that many shards are part of the same tenants, but it makes it understand paths well enough to scrub the individual shards without thinking they're malformed. This is a prerequisite to being able to run tests with sharding enabled. Related: #5929
222 lines
6.6 KiB
Rust
222 lines
6.6 KiB
Rust
use std::collections::{HashMap, HashSet};
|
|
|
|
use crate::checks::{
|
|
branch_cleanup_and_check_errors, list_timeline_blobs, BlobDataParseResult, S3TimelineBlobData,
|
|
TimelineAnalysis,
|
|
};
|
|
use crate::metadata_stream::{stream_tenant_timelines, stream_tenants};
|
|
use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId};
|
|
use aws_sdk_s3::Client;
|
|
use futures_util::{pin_mut, StreamExt, TryStreamExt};
|
|
use histogram::Histogram;
|
|
use pageserver::tenant::IndexPart;
|
|
use serde::Serialize;
|
|
|
|
#[derive(Serialize)]
|
|
pub struct MetadataSummary {
|
|
count: usize,
|
|
with_errors: HashSet<TenantShardTimelineId>,
|
|
with_warnings: HashSet<TenantShardTimelineId>,
|
|
with_garbage: HashSet<TenantShardTimelineId>,
|
|
indices_by_version: HashMap<usize, usize>,
|
|
|
|
layer_count: MinMaxHisto,
|
|
timeline_size_bytes: MinMaxHisto,
|
|
layer_size_bytes: MinMaxHisto,
|
|
}
|
|
|
|
/// A histogram plus minimum and maximum tracking
|
|
#[derive(Serialize)]
|
|
struct MinMaxHisto {
|
|
#[serde(skip)]
|
|
histo: Histogram,
|
|
min: u64,
|
|
max: u64,
|
|
}
|
|
|
|
impl MinMaxHisto {
|
|
fn new() -> Self {
|
|
Self {
|
|
histo: histogram::Histogram::builder()
|
|
.build()
|
|
.expect("Bad histogram params"),
|
|
min: u64::MAX,
|
|
max: 0,
|
|
}
|
|
}
|
|
|
|
fn sample(&mut self, v: u64) -> Result<(), histogram::Error> {
|
|
self.min = std::cmp::min(self.min, v);
|
|
self.max = std::cmp::max(self.max, v);
|
|
let r = self.histo.increment(v, 1);
|
|
|
|
if r.is_err() {
|
|
tracing::warn!("Bad histogram sample: {v}");
|
|
}
|
|
|
|
r
|
|
}
|
|
|
|
fn oneline(&self) -> String {
|
|
let percentiles = match self.histo.percentiles(&[1.0, 10.0, 50.0, 90.0, 99.0]) {
|
|
Ok(p) => p,
|
|
Err(e) => return format!("No data: {}", e),
|
|
};
|
|
|
|
let percentiles: Vec<u64> = percentiles
|
|
.iter()
|
|
.map(|p| p.bucket().low() + p.bucket().high() / 2)
|
|
.collect();
|
|
|
|
format!(
|
|
"min {}, 1% {}, 10% {}, 50% {}, 90% {}, 99% {}, max {}",
|
|
self.min,
|
|
percentiles[0],
|
|
percentiles[1],
|
|
percentiles[2],
|
|
percentiles[3],
|
|
percentiles[4],
|
|
self.max,
|
|
)
|
|
}
|
|
}
|
|
|
|
impl MetadataSummary {
|
|
fn new() -> Self {
|
|
Self {
|
|
count: 0,
|
|
with_errors: HashSet::new(),
|
|
with_warnings: HashSet::new(),
|
|
with_garbage: HashSet::new(),
|
|
indices_by_version: HashMap::new(),
|
|
layer_count: MinMaxHisto::new(),
|
|
timeline_size_bytes: MinMaxHisto::new(),
|
|
layer_size_bytes: MinMaxHisto::new(),
|
|
}
|
|
}
|
|
|
|
fn update_histograms(&mut self, index_part: &IndexPart) -> Result<(), histogram::Error> {
|
|
self.layer_count
|
|
.sample(index_part.layer_metadata.len() as u64)?;
|
|
let mut total_size: u64 = 0;
|
|
for meta in index_part.layer_metadata.values() {
|
|
total_size += meta.file_size;
|
|
self.layer_size_bytes.sample(meta.file_size)?;
|
|
}
|
|
self.timeline_size_bytes.sample(total_size)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn update_data(&mut self, data: &S3TimelineBlobData) {
|
|
self.count += 1;
|
|
if let BlobDataParseResult::Parsed {
|
|
index_part,
|
|
index_part_generation: _,
|
|
s3_layers: _,
|
|
} = &data.blob_data
|
|
{
|
|
*self
|
|
.indices_by_version
|
|
.entry(index_part.get_version())
|
|
.or_insert(0) += 1;
|
|
|
|
if let Err(e) = self.update_histograms(index_part) {
|
|
// Value out of range? Warn that the results are untrustworthy
|
|
tracing::warn!(
|
|
"Error updating histograms, summary stats may be wrong: {}",
|
|
e
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn update_analysis(&mut self, id: &TenantShardTimelineId, analysis: &TimelineAnalysis) {
|
|
if !analysis.errors.is_empty() {
|
|
self.with_errors.insert(*id);
|
|
}
|
|
|
|
if !analysis.warnings.is_empty() {
|
|
self.with_warnings.insert(*id);
|
|
}
|
|
}
|
|
|
|
/// Long-form output for printing at end of a scan
|
|
pub fn summary_string(&self) -> String {
|
|
let version_summary: String = itertools::join(
|
|
self.indices_by_version
|
|
.iter()
|
|
.map(|(k, v)| format!("{k}: {v}")),
|
|
", ",
|
|
);
|
|
|
|
format!(
|
|
"Timelines: {0}
|
|
With errors: {1}
|
|
With warnings: {2}
|
|
With garbage: {3}
|
|
Index versions: {version_summary}
|
|
Timeline size bytes: {4}
|
|
Layer size bytes: {5}
|
|
Timeline layer count: {6}
|
|
",
|
|
self.count,
|
|
self.with_errors.len(),
|
|
self.with_warnings.len(),
|
|
self.with_garbage.len(),
|
|
self.timeline_size_bytes.oneline(),
|
|
self.layer_size_bytes.oneline(),
|
|
self.layer_count.oneline(),
|
|
)
|
|
}
|
|
|
|
pub fn is_fatal(&self) -> bool {
|
|
!self.with_errors.is_empty()
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.count == 0
|
|
}
|
|
}
|
|
|
|
/// Scan the pageserver metadata in an S3 bucket, reporting errors and statistics.
|
|
pub async fn scan_metadata(bucket_config: BucketConfig) -> anyhow::Result<MetadataSummary> {
|
|
let (s3_client, target) = init_remote(bucket_config, NodeKind::Pageserver)?;
|
|
|
|
let tenants = stream_tenants(&s3_client, &target);
|
|
|
|
// How many tenants to process in parallel. We need to be mindful of pageservers
|
|
// accessing the same per tenant prefixes, so use a lower setting than pageservers.
|
|
const CONCURRENCY: usize = 32;
|
|
|
|
// Generate a stream of TenantTimelineId
|
|
let timelines = tenants.map_ok(|t| stream_tenant_timelines(&s3_client, &target, t));
|
|
let timelines = timelines.try_buffer_unordered(CONCURRENCY);
|
|
let timelines = timelines.try_flatten();
|
|
|
|
// Generate a stream of S3TimelineBlobData
|
|
async fn report_on_timeline(
|
|
s3_client: &Client,
|
|
target: &RootTarget,
|
|
ttid: TenantShardTimelineId,
|
|
) -> anyhow::Result<(TenantShardTimelineId, S3TimelineBlobData)> {
|
|
let data = list_timeline_blobs(s3_client, ttid, target).await?;
|
|
Ok((ttid, data))
|
|
}
|
|
let timelines = timelines.map_ok(|ttid| report_on_timeline(&s3_client, &target, ttid));
|
|
let timelines = timelines.try_buffer_unordered(CONCURRENCY);
|
|
|
|
let mut summary = MetadataSummary::new();
|
|
pin_mut!(timelines);
|
|
while let Some(i) = timelines.next().await {
|
|
let (ttid, data) = i?;
|
|
summary.update_data(&data);
|
|
|
|
let analysis = branch_cleanup_and_check_errors(&ttid, &target, None, None, Some(data));
|
|
|
|
summary.update_analysis(&ttid, &analysis);
|
|
}
|
|
|
|
Ok(summary)
|
|
}
|