Don't collect histogram of load_layer_map operations per timeline.

The layer map is loaded only once, when the tenant is attached. We don't
need that level of detail anyway: if one tenant is particularly slow
at loading the layer map, we can probably pinpoint which one it is by
looking at the logs.
This commit is contained in:
Heikki Linnakangas
2023-04-22 20:00:30 +03:00
parent 2cdb5503b0
commit c0a71bc334
2 changed files with 12 additions and 7 deletions

View File

@@ -139,6 +139,15 @@ pub static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
.unwrap()
});
pub static LOAD_LAYER_MAP_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
register_histogram!(
"pageserver_load_layer_map_histogram",
"Time spent on loadiing layer map",
STORAGE_OP_BUCKETS.into(),
)
.expect("failed to define a metric")
});
static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
register_uint_gauge_vec!(
"pageserver_current_logical_size",
@@ -552,7 +561,7 @@ impl StorageTimeMetricsTimer {
pub struct StorageTimeMetrics {
/// Sum of f64 seconds, per operation, tenant_id and timeline_id
timeline_sum: Counter,
/// Number of oeprations, per operation, tenant_id and timeline_id
/// Number of operations, per operation, tenant_id and timeline_id
timeline_count: IntCounter,
/// Global histogram having only the "operation" label.
global_histogram: Histogram,
@@ -595,7 +604,6 @@ pub struct TimelineMetrics {
pub compact_time_histo: StorageTimeMetrics,
pub create_images_time_histo: StorageTimeMetrics,
pub logical_size_histo: StorageTimeMetrics,
pub load_layer_map_histo: StorageTimeMetrics,
pub garbage_collect_histo: StorageTimeMetrics,
pub last_record_gauge: IntGauge,
pub wait_lsn_time_histo: Histogram,
@@ -627,8 +635,6 @@ impl TimelineMetrics {
let create_images_time_histo =
StorageTimeMetrics::new("create images", &tenant_id, &timeline_id);
let logical_size_histo = StorageTimeMetrics::new("logical size", &tenant_id, &timeline_id);
let load_layer_map_histo =
StorageTimeMetrics::new("load layer map", &tenant_id, &timeline_id);
let garbage_collect_histo = StorageTimeMetrics::new("gc", &tenant_id, &timeline_id);
let last_record_gauge = LAST_RECORD_LSN
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
@@ -664,7 +670,6 @@ impl TimelineMetrics {
create_images_time_histo,
logical_size_histo,
garbage_collect_histo,
load_layer_map_histo,
last_record_gauge,
wait_lsn_time_histo,
resident_physical_size_gauge,

View File

@@ -48,7 +48,7 @@ use crate::tenant::{
use crate::config::PageServerConf;
use crate::keyspace::{KeyPartitioning, KeySpace};
use crate::metrics::TimelineMetrics;
use crate::metrics::{TimelineMetrics, LOAD_LAYER_MAP_HISTOGRAM};
use crate::pgdatadir_mapping::LsnForTimestamp;
use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
@@ -1444,7 +1444,7 @@ impl Timeline {
let mut updates = layers.batch_update();
let mut num_layers = 0;
let timer = self.metrics.load_layer_map_histo.start_timer();
let timer = LOAD_LAYER_MAP_HISTOGRAM.start_timer();
// Scan timeline directory and create ImageFileName and DeltaFilename
// structs representing all files on disk