feat(pageserver): add metrics for aux file size (#7623)

ref https://github.com/neondatabase/neon/issues/7443

## Summary of changes

This pull request adds a size estimator for aux files. Each timeline
stores a cached `isize` for the estimated total size of aux files. It
gets reset on basebackup, and gets updated for each aux file
modification. TODO: print a warning when it exceeds the size.

The size metrics is not accurate. Race between `on_basebackup` and other
functions could create a negative basebackup size, but the chance is
rare. Anyways, this does not impose any extra I/Os to the storage as
everything is computed in-memory.

The aux files are only stored on shard 0. As basebackups are only
generated on shard 0, only shard 0 will report this metrics.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z
2024-05-13 11:33:41 -04:00
committed by GitHub
parent 4d8a10af1c
commit 7f51764001
5 changed files with 128 additions and 24 deletions

View File

@@ -1,3 +1,6 @@
use std::sync::Arc;
use ::metrics::IntGauge;
use bytes::{Buf, BufMut, Bytes};
use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE};
use tracing::warn;
@@ -140,6 +143,55 @@ pub fn encode_file_value(files: &[(&str, &[u8])]) -> anyhow::Result<Vec<u8>> {
Ok(encoded)
}
/// An estimation of the size of aux files.
pub struct AuxFileSizeEstimator {
aux_file_size_gauge: IntGauge,
size: Arc<std::sync::Mutex<Option<isize>>>,
}
impl AuxFileSizeEstimator {
pub fn new(aux_file_size_gauge: IntGauge) -> Self {
Self {
aux_file_size_gauge,
size: Arc::new(std::sync::Mutex::new(None)),
}
}
pub fn on_base_backup(&self, new_size: usize) {
let mut guard = self.size.lock().unwrap();
*guard = Some(new_size as isize);
self.report(new_size as isize);
}
pub fn on_add(&self, file_size: usize) {
let mut guard = self.size.lock().unwrap();
if let Some(size) = &mut *guard {
*size += file_size as isize;
self.report(*size);
}
}
pub fn on_remove(&self, file_size: usize) {
let mut guard = self.size.lock().unwrap();
if let Some(size) = &mut *guard {
*size -= file_size as isize;
self.report(*size);
}
}
pub fn on_update(&self, old_size: usize, new_size: usize) {
let mut guard = self.size.lock().unwrap();
if let Some(size) = &mut *guard {
*size += new_size as isize - old_size as isize;
self.report(*size);
}
}
pub fn report(&self, size: isize) {
self.aux_file_size_gauge.set(size as i64);
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -585,6 +585,15 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
.expect("failed to define current logical size metric")
});
static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
register_int_gauge_vec!(
"pageserver_aux_file_estimated_size",
"The size of all aux files for a timeline in aux file v2 store.",
&["tenant_id", "shard_id", "timeline_id"]
)
.expect("failed to define a metric")
});
pub(crate) mod initial_logical_size {
use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
use once_cell::sync::Lazy;
@@ -2115,6 +2124,7 @@ pub(crate) struct TimelineMetrics {
resident_physical_size_gauge: UIntGauge,
/// copy of LayeredTimeline.current_logical_size
pub current_logical_size_gauge: UIntGauge,
pub aux_file_size_gauge: IntGauge,
pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
pub evictions: IntCounter,
pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
@@ -2187,6 +2197,9 @@ impl TimelineMetrics {
let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();
let aux_file_size_gauge = AUX_FILE_SIZE
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
.unwrap();
// TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
let directory_entries_count_gauge_closure = {
let tenant_shard_id = *tenant_shard_id;
@@ -2224,6 +2237,7 @@ impl TimelineMetrics {
last_record_gauge,
resident_physical_size_gauge,
current_logical_size_gauge,
aux_file_size_gauge,
directory_entries_count_gauge,
evictions,
evictions_with_low_residence_duration: std::sync::RwLock::new(
@@ -2264,6 +2278,7 @@ impl TimelineMetrics {
let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
}
let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
self.evictions_with_low_residence_duration
.write()

View File

@@ -699,13 +699,17 @@ impl Timeline {
.await
.context("scan")?;
let mut result = HashMap::new();
let mut sz = 0;
for (_, v) in kv {
let v = v.context("get value")?;
let v = aux_file::decode_file_value_bytes(&v).context("value decode")?;
for (fname, content) in v {
sz += fname.len();
sz += content.len();
result.insert(fname, content);
}
}
self.aux_file_size_estimator.on_base_backup(sz);
Ok(result)
}
@@ -1474,23 +1478,45 @@ impl<'a> DatadirModification<'a> {
Err(PageReconstructError::MissingKey(_)) => None,
Err(e) => return Err(e.into()),
};
let files = if let Some(ref old_val) = old_val {
let files: Vec<(&str, &[u8])> = if let Some(ref old_val) = old_val {
aux_file::decode_file_value(old_val)?
} else {
Vec::new()
};
let new_files = if content.is_empty() {
files
.into_iter()
.filter(|(p, _)| &path != p)
.collect::<Vec<_>>()
} else {
files
.into_iter()
.filter(|(p, _)| &path != p)
.chain(std::iter::once((path, content)))
.collect::<Vec<_>>()
};
let mut other_files = Vec::with_capacity(files.len());
let mut modifying_file = None;
for file @ (p, content) in files {
if path == p {
assert!(
modifying_file.is_none(),
"duplicated entries found for {}",
path
);
modifying_file = Some(content);
} else {
other_files.push(file);
}
}
let mut new_files = other_files;
match (modifying_file, content.is_empty()) {
(Some(old_content), false) => {
self.tline
.aux_file_size_estimator
.on_update(old_content.len(), content.len());
new_files.push((path, content));
}
(Some(old_content), true) => {
self.tline
.aux_file_size_estimator
.on_remove(old_content.len());
// not adding the file key to the final `new_files` vec.
}
(None, false) => {
self.tline.aux_file_size_estimator.on_add(content.len());
new_files.push((path, content));
}
(None, true) => anyhow::bail!("removing non-existing aux file: {}", path),
}
let new_val = aux_file::encode_file_value(&new_files)?;
self.put(key, Value::Image(new_val.into()));
}

View File

@@ -61,9 +61,12 @@ use std::{
};
use crate::tenant::timeline::init::LocalLayerFileMetadata;
use crate::tenant::{
layer_map::{LayerMap, SearchResult},
metadata::TimelineMetadata,
use crate::{
aux_file::AuxFileSizeEstimator,
tenant::{
layer_map::{LayerMap, SearchResult},
metadata::TimelineMetadata,
},
};
use crate::{
context::{DownloadBehavior, RequestContext},
@@ -409,6 +412,8 @@ pub struct Timeline {
/// Keep aux directory cache to avoid it's reconstruction on each update
pub(crate) aux_files: tokio::sync::Mutex<AuxFilesState>,
pub(crate) aux_file_size_estimator: AuxFileSizeEstimator,
}
pub struct WalReceiverInfo {
@@ -2161,6 +2166,16 @@ impl Timeline {
};
Arc::new_cyclic(|myself| {
let metrics = TimelineMetrics::new(
&tenant_shard_id,
&timeline_id,
crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
"mtime",
evictions_low_residence_duration_metric_threshold,
),
);
let aux_file_metrics = metrics.aux_file_size_gauge.clone();
let mut result = Timeline {
conf,
tenant_conf,
@@ -2192,14 +2207,7 @@ impl Timeline {
ancestor_timeline: ancestor,
ancestor_lsn: metadata.ancestor_lsn(),
metrics: TimelineMetrics::new(
&tenant_shard_id,
&timeline_id,
crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
"mtime",
evictions_low_residence_duration_metric_threshold,
),
),
metrics,
query_metrics: crate::metrics::SmgrQueryTimePerTimeline::new(
&tenant_shard_id,
@@ -2263,6 +2271,8 @@ impl Timeline {
dir: None,
n_deltas: 0,
}),
aux_file_size_estimator: AuxFileSizeEstimator::new(aux_file_metrics),
};
result.repartition_threshold =
result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE;

View File

@@ -149,6 +149,7 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = (
"pageserver_storage_operations_seconds_sum_total",
"pageserver_evictions_total",
"pageserver_evictions_with_low_residence_duration_total",
"pageserver_aux_file_estimated_size",
*PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS,
# "pageserver_directory_entries_count", -- only used if above a certain threshold
# "pageserver_broken_tenants_count" -- used only for broken