feat(pageserver): more statistics for gc-compaction (#11103)

## Problem

part of https://github.com/neondatabase/neon/issues/9114

## Summary of changes

* Add timers for each phase of the gc-compaction.
* Add a final ratio computation to directly show the garbage collection
ratio in the logs.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2025-03-06 11:44:00 -05:00
committed by GitHub
parent 4b77807de9
commit 11334a2cdb

View File

@@ -7,6 +7,7 @@
use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque};
use std::ops::{Deref, Range};
use std::sync::Arc;
use std::time::Instant;
use super::layer_manager::LayerManager;
use super::{
@@ -724,17 +725,41 @@ struct CompactionStatisticsNumSize {
#[derive(Debug, Serialize, Default)]
pub struct CompactionStatistics {
/// Delta layer visited (maybe compressed, physical size)
delta_layer_visited: CompactionStatisticsNumSize,
/// Image layer visited (maybe compressed, physical size)
image_layer_visited: CompactionStatisticsNumSize,
/// Delta layer produced (maybe compressed, physical size)
delta_layer_produced: CompactionStatisticsNumSize,
/// Image layer produced (maybe compressed, physical size)
image_layer_produced: CompactionStatisticsNumSize,
num_delta_layer_discarded: usize,
num_image_layer_discarded: usize,
/// Delta layer discarded (maybe compressed, physical size of the layer being discarded instead of the original layer)
delta_layer_discarded: CompactionStatisticsNumSize,
/// Image layer discarded (maybe compressed, physical size of the layer being discarded instead of the original layer)
image_layer_discarded: CompactionStatisticsNumSize,
num_unique_keys_visited: usize,
/// Delta visited (uncompressed, original size)
wal_keys_visited: CompactionStatisticsNumSize,
/// Image visited (uncompressed, original size)
image_keys_visited: CompactionStatisticsNumSize,
/// Delta produced (uncompressed, original size)
wal_produced: CompactionStatisticsNumSize,
/// Image produced (uncompressed, original size)
image_produced: CompactionStatisticsNumSize,
// Time spent in each phase
time_acquire_lock_secs: f64,
time_analyze_secs: f64,
time_download_layer_secs: f64,
time_main_loop_secs: f64,
time_final_phase_secs: f64,
time_total_secs: f64,
// Summary
/// Ratio of the key-value size before/after gc-compaction.
uncompressed_size_ratio: f64,
/// Ratio of the physical size before/after gc-compaction.
physical_size_ratio: f64,
}
impl CompactionStatistics {
@@ -784,11 +809,13 @@ impl CompactionStatistics {
self.image_produced.num += 1;
self.image_produced.size += val.len() as u64 + Self::estimated_size_of_key() as u64;
}
fn discard_delta_layer(&mut self) {
self.num_delta_layer_discarded += 1;
fn discard_delta_layer(&mut self, original_size: u64) {
self.delta_layer_discarded.num += 1;
self.delta_layer_discarded.size += original_size;
}
fn discard_image_layer(&mut self) {
self.num_image_layer_discarded += 1;
fn discard_image_layer(&mut self, original_size: u64) {
self.image_layer_discarded.num += 1;
self.image_layer_discarded.size += original_size;
}
fn produce_delta_layer(&mut self, size: u64) {
self.delta_layer_produced.num += 1;
@@ -798,6 +825,19 @@ impl CompactionStatistics {
self.image_layer_produced.num += 1;
self.image_layer_produced.size += size;
}
fn finalize(&mut self) {
let original_key_value_size = self.image_keys_visited.size + self.wal_keys_visited.size;
let produced_key_value_size = self.image_produced.size + self.wal_produced.size;
self.uncompressed_size_ratio =
original_key_value_size as f64 / (produced_key_value_size as f64 + 1.0); // avoid div by 0
let original_physical_size = self.image_layer_visited.size + self.delta_layer_visited.size;
let produced_physical_size = self.image_layer_produced.size
+ self.delta_layer_produced.size
+ self.image_layer_discarded.size
+ self.delta_layer_discarded.size; // Also include the discarded layers to make the ratio accurate
self.physical_size_ratio =
original_physical_size as f64 / (produced_physical_size as f64 + 1.0); // avoid div by 0
}
}
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
@@ -2595,6 +2635,9 @@ impl Timeline {
// with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc.
// Note that we already acquired the compaction lock when the outer `compact` function gets called.
let timer = Instant::now();
let begin_timer = timer;
let gc_lock = async {
tokio::select! {
guard = self.gc_lock.lock() => Ok(guard),
@@ -2602,6 +2645,9 @@ impl Timeline {
}
};
let time_acquire_lock = timer.elapsed();
let timer = Instant::now();
let gc_lock = crate::timed(
gc_lock,
"acquires gc lock",
@@ -2791,6 +2837,9 @@ impl Timeline {
has_data_below,
);
let time_analyze = timer.elapsed();
let timer = Instant::now();
for layer in &job_desc.selected_layers {
debug!("read layer: {}", layer.layer_desc().key());
}
@@ -2893,6 +2942,9 @@ impl Timeline {
.context("failed to create filter iterator")
.map_err(CompactionError::Other)?;
let time_download_layer = timer.elapsed();
let timer = Instant::now();
// Step 2: Produce images+deltas.
let mut accumulated_values = Vec::new();
let mut last_key: Option<Key> = None;
@@ -3114,6 +3166,9 @@ impl Timeline {
.map_err(CompactionError::Other)?;
// end: move the above part to the loop body
let time_main_loop = timer.elapsed();
let timer = Instant::now();
let mut rewrote_delta_layers = Vec::new();
for (key, writers) in delta_layer_rewriters {
if let Some(delta_writer_before) = writers.before {
@@ -3178,6 +3233,13 @@ impl Timeline {
let mut keep_layers = HashSet::new();
let produced_delta_layers_len = produced_delta_layers.len();
let produced_image_layers_len = produced_image_layers.len();
let layer_selection_by_key = job_desc
.selected_layers
.iter()
.map(|l| (l.layer_desc().key(), l.layer_desc().clone()))
.collect::<HashMap<_, _>>();
for action in produced_delta_layers {
match action {
BatchWriterResult::Produced(layer) => {
@@ -3191,8 +3253,16 @@ impl Timeline {
if cfg!(debug_assertions) {
info!("discarded delta layer: {}", l);
}
if let Some(layer_desc) = layer_selection_by_key.get(&l) {
stat.discard_delta_layer(layer_desc.file_size());
} else {
tracing::warn!(
"discarded delta layer not in layer_selection: {}, produced a layer outside of the compaction key range?",
l
);
stat.discard_delta_layer(0);
}
keep_layers.insert(l);
stat.discard_delta_layer();
}
}
}
@@ -3201,6 +3271,9 @@ impl Timeline {
"produced rewritten delta layer: {}",
layer.layer_desc().key()
);
// For now, we include rewritten delta layer size in the "produce_delta_layer". We could
// make it a separate statistics in the future.
stat.produce_delta_layer(layer.layer_desc().file_size());
}
compact_to.extend(rewrote_delta_layers);
for action in produced_image_layers {
@@ -3212,8 +3285,16 @@ impl Timeline {
}
BatchWriterResult::Discarded(l) => {
debug!("discarded image layer: {}", l);
if let Some(layer_desc) = layer_selection_by_key.get(&l) {
stat.discard_image_layer(layer_desc.file_size());
} else {
tracing::warn!(
"discarded image layer not in layer_selection: {}, produced a layer outside of the compaction key range?",
l
);
stat.discard_image_layer(0);
}
keep_layers.insert(l);
stat.discard_image_layer();
}
}
}
@@ -3261,6 +3342,16 @@ impl Timeline {
layer_selection.retain(|x| !keep_layers.contains(&x.layer_desc().key()));
let time_final_phase = timer.elapsed();
stat.time_final_phase_secs = time_final_phase.as_secs_f64();
stat.time_main_loop_secs = time_main_loop.as_secs_f64();
stat.time_acquire_lock_secs = time_acquire_lock.as_secs_f64();
stat.time_download_layer_secs = time_download_layer.as_secs_f64();
stat.time_analyze_secs = time_analyze.as_secs_f64();
stat.time_total_secs = begin_timer.elapsed().as_secs_f64();
stat.finalize();
info!(
"gc-compaction statistics: {}",
serde_json::to_string(&stat)