From c0a71bc334597cae1748d05d6671dee1f97fd405 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 22 Apr 2023 20:00:30 +0300 Subject: [PATCH] Don't collect histogram of load_layer_map operations per timeline. The layer map is loaded only once, when the tenant is attached. We don't need that level of detail anyway: if one tenant is particularly slow at loading the layer map, we can probably pinpoint which one it is by looking at the logs. --- pageserver/src/metrics.rs | 15 ++++++++++----- pageserver/src/tenant/timeline.rs | 4 ++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index c075315683..27ae07fa35 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -139,6 +139,15 @@ pub static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy = Lazy::new(|| { .unwrap() }); +pub static LOAD_LAYER_MAP_HISTOGRAM: Lazy = Lazy::new(|| { + register_histogram!( + "pageserver_load_layer_map_histogram", + "Time spent on loadiing layer map", + STORAGE_OP_BUCKETS.into(), + ) + .expect("failed to define a metric") +}); + static CURRENT_LOGICAL_SIZE: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_current_logical_size", @@ -552,7 +561,7 @@ impl StorageTimeMetricsTimer { pub struct StorageTimeMetrics { /// Sum of f64 seconds, per operation, tenant_id and timeline_id timeline_sum: Counter, - /// Number of oeprations, per operation, tenant_id and timeline_id + /// Number of operations, per operation, tenant_id and timeline_id timeline_count: IntCounter, /// Global histogram having only the "operation" label. global_histogram: Histogram, @@ -595,7 +604,6 @@ pub struct TimelineMetrics { pub compact_time_histo: StorageTimeMetrics, pub create_images_time_histo: StorageTimeMetrics, pub logical_size_histo: StorageTimeMetrics, - pub load_layer_map_histo: StorageTimeMetrics, pub garbage_collect_histo: StorageTimeMetrics, pub last_record_gauge: IntGauge, pub wait_lsn_time_histo: Histogram, @@ -627,8 +635,6 @@ impl TimelineMetrics { let create_images_time_histo = StorageTimeMetrics::new("create images", &tenant_id, &timeline_id); let logical_size_histo = StorageTimeMetrics::new("logical size", &tenant_id, &timeline_id); - let load_layer_map_histo = - StorageTimeMetrics::new("load layer map", &tenant_id, &timeline_id); let garbage_collect_histo = StorageTimeMetrics::new("gc", &tenant_id, &timeline_id); let last_record_gauge = LAST_RECORD_LSN .get_metric_with_label_values(&[&tenant_id, &timeline_id]) @@ -664,7 +670,6 @@ impl TimelineMetrics { create_images_time_histo, logical_size_histo, garbage_collect_histo, - load_layer_map_histo, last_record_gauge, wait_lsn_time_histo, resident_physical_size_gauge, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index b8b1f963e5..69da19be7a 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -48,7 +48,7 @@ use crate::tenant::{ use crate::config::PageServerConf; use crate::keyspace::{KeyPartitioning, KeySpace}; -use crate::metrics::TimelineMetrics; +use crate::metrics::{TimelineMetrics, LOAD_LAYER_MAP_HISTOGRAM}; use crate::pgdatadir_mapping::LsnForTimestamp; use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key}; use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError}; @@ -1444,7 +1444,7 @@ impl Timeline { let mut updates = layers.batch_update(); let mut num_layers = 0; - let timer = self.metrics.load_layer_map_histo.start_timer(); + let timer = LOAD_LAYER_MAP_HISTOGRAM.start_timer(); // Scan timeline directory and create ImageFileName and DeltaFilename // structs representing all files on disk