pageserver: add amortized read amp metrics (#11093)

## Problem

In a batch, `pageserver_layers_per_read_global` counts all layer visits
towards every read in the batch, since this directly affects the
observed latency of the read. However, this doesn't give a good picture
of the amortized read amplification due to batching.

## Summary of changes

Add two more global read amp metrics:

* `pageserver_layers_per_read_batch_global`: number of layers visited
per batch.
* `pageserver_layers_per_read_amortized_global`: number of layers
divided by reads in a batch.
This commit is contained in:
Erik Grinaker
2025-03-06 11:23:48 +01:00
committed by GitHub
parent 16b8a3f598
commit ab7efe9e47
2 changed files with 43 additions and 7 deletions

View File

@@ -143,6 +143,29 @@ pub(crate) static LAYERS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
.expect("failed to define a metric")
});
pub(crate) static LAYERS_PER_READ_BATCH_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_layers_per_read_batch_global",
"Layers visited to serve a single read batch (read amplification), regardless of number of reads.",
vec![
1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
],
)
.expect("failed to define a metric")
});
pub(crate) static LAYERS_PER_READ_AMORTIZED_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_layers_per_read_amortized_global",
"Layers visited to serve a single read (read amplification). Amortized across a batch: \
all visited layers are divided by number of reads.",
vec![
1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
],
)
.expect("failed to define a metric")
});
pub(crate) static DELTAS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
// We expect this to be low because of Postgres checkpoints. Let's see if that holds.
register_histogram!(
@@ -4074,6 +4097,8 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) {
// histograms
[
&LAYERS_PER_READ_GLOBAL,
&LAYERS_PER_READ_BATCH_GLOBAL,
&LAYERS_PER_READ_AMORTIZED_GLOBAL,
&DELTAS_PER_READ_GLOBAL,
&WAIT_LSN_TIME,
&WAL_REDO_TIME,

View File

@@ -99,7 +99,8 @@ use crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate,
use crate::keyspace::{KeyPartitioning, KeySpace};
use crate::l0_flush::{self, L0FlushGlobalState};
use crate::metrics::{
DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL, ScanLatencyOngoingRecording, TimelineMetrics,
DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_AMORTIZED_GLOBAL, LAYERS_PER_READ_BATCH_GLOBAL,
LAYERS_PER_READ_GLOBAL, ScanLatencyOngoingRecording, TimelineMetrics,
};
use crate::page_service::TenantManagerTypes;
use crate::pgdatadir_mapping::{
@@ -1330,10 +1331,6 @@ impl Timeline {
// (this is a requirement, not a bug). Skip updating the metric in these cases
// to avoid infinite results.
if !results.is_empty() {
// Record the total number of layers visited towards each key in the batch. While some
// layers may not intersect with a given read, and the cost of layer visits are
// amortized across the batch, each visited layer contributes directly to the observed
// latency for every read in the batch, which is what we care about.
if layers_visited >= Self::LAYERS_VISITED_WARN_THRESHOLD {
static LOG_PACER: Lazy<Mutex<RateLimit>> =
Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(60))));
@@ -1348,9 +1345,23 @@ impl Timeline {
});
}
// Records the number of layers visited in a few different ways:
//
// * LAYERS_PER_READ: all layers count towards every read in the batch, because each
// layer directly affects its observed latency.
//
// * LAYERS_PER_READ_BATCH: all layers count towards each batch, to get the per-batch
// layer visits and access cost.
//
// * LAYERS_PER_READ_AMORTIZED: the average layer count per read, to get the amortized
// read amplification after batching.
let layers_visited = layers_visited as f64;
let avg_layers_visited = layers_visited / results.len() as f64;
LAYERS_PER_READ_BATCH_GLOBAL.observe(layers_visited);
for _ in &results {
self.metrics.layers_per_read.observe(layers_visited as f64);
LAYERS_PER_READ_GLOBAL.observe(layers_visited as f64);
self.metrics.layers_per_read.observe(layers_visited);
LAYERS_PER_READ_GLOBAL.observe(layers_visited);
LAYERS_PER_READ_AMORTIZED_GLOBAL.observe(avg_layers_visited);
}
}