pageserver: tweak metric counting layer visits on non-vectored read path

* Give it a better name
* Track all layers
* Larger buckets
This commit is contained in:
Vlad Lazar
2024-04-22 13:48:50 +01:00
parent 25d9dc6eaf
commit ea5b36c47a
3 changed files with 12 additions and 8 deletions

View File

@@ -86,11 +86,14 @@ pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
.expect("failed to define a metric")
});
pub(crate) static READ_NUM_FS_LAYERS: Lazy<Histogram> = Lazy::new(|| {
pub(crate) static READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_read_num_fs_layers",
"Number of persistent layers accessed for processing a read request, including those in the cache",
vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 20.0, 50.0, 100.0],
"pageserver_layers_visited_per_read_global",
"Number of layers visited to reconstruct one key",
vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
)
.expect("failed to define a metric")
});
)
.expect("failed to define a metric")
});
@@ -2772,6 +2775,7 @@ pub fn preinitialize_metrics() {
// histograms
[
&READ_NUM_FS_LAYERS,
&READ_NUM_LAYERS_VISITED,
&WAIT_LSN_TIME,
&WAL_REDO_TIME,
&WAL_REDO_RECORDS_HISTOGRAM,

View File

@@ -2794,7 +2794,7 @@ impl Timeline {
let mut timeline = self;
let mut read_count = scopeguard::guard(0, |cnt| {
crate::metrics::READ_NUM_FS_LAYERS.observe(cnt as f64)
crate::metrics::READ_NUM_LAYERS_VISITED.observe(cnt as f64)
});
// For debugging purposes, collect the path of layers that we traversed
@@ -2909,7 +2909,7 @@ impl Timeline {
Err(e) => return Err(PageReconstructError::from(e)),
};
cont_lsn = lsn_floor;
// metrics: open_layer does not count as fs access, so we are not updating `read_count`
*read_count += 1;
traversal_path.push((result, cont_lsn, open_layer.traversal_id()));
continue 'outer;
}
@@ -2936,7 +2936,7 @@ impl Timeline {
Err(e) => return Err(PageReconstructError::from(e)),
};
cont_lsn = lsn_floor;
// metrics: open_layer does not count as fs access, so we are not updating `read_count`
*read_count += 1;
traversal_path.push((result, cont_lsn, frozen_layer.traversal_id()));
continue 'outer;
}

View File

@@ -129,7 +129,7 @@ PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = (
"pageserver_getpage_reconstruct_seconds_sum",
*[f"pageserver_basebackup_query_seconds_{x}" for x in ["bucket", "count", "sum"]],
*histogram("pageserver_smgr_query_seconds_global"),
*histogram("pageserver_read_num_fs_layers"),
*histogram("pageserver_layers_visited_per_read_global"),
*histogram("pageserver_getpage_get_reconstruct_data_seconds"),
*histogram("pageserver_wait_lsn_seconds"),
*histogram("pageserver_remote_operation_seconds"),