From 20b38acff0287df29050653327eb12a5959e9298 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lassi=20P=C3=B6l=C3=B6nen?= Date: Mon, 30 Jan 2023 17:10:29 +0200 Subject: [PATCH] Replace per timeline `pageserver_storage_operations_seconds` with a global one (#3409) Related to: https://github.com/neondatabase/neon/issues/2848 `pageserver_storage_operations_seconds` is the most expensive metric we have, as there are a lot of tenants/timelines and the histogram had 42 buckets. These are quite sparse too, so instead of having a histogram per timeline, create a new histogram `pageserver_storage_operations_seconds_global` without tenant and timeline dimensions and replace `pageserver_storage_operations_seconds` with sum and counter. Co-authored-by: Joonas Koivunen --- libs/metrics/src/lib.rs | 1 + pageserver/src/metrics.rs | 143 +++++++++++++++++++++------- pageserver/src/tenant.rs | 17 +--- pageserver/src/tenant/timeline.rs | 13 ++- test_runner/fixtures/metrics.py | 11 ++- test_runner/regress/test_tenants.py | 9 ++ 6 files changed, 143 insertions(+), 51 deletions(-) diff --git a/libs/metrics/src/lib.rs b/libs/metrics/src/lib.rs index 880ab0e83c..07d220195b 100644 --- a/libs/metrics/src/lib.rs +++ b/libs/metrics/src/lib.rs @@ -8,6 +8,7 @@ pub use prometheus::opts; pub use prometheus::register; pub use prometheus::{core, default_registry, proto}; pub use prometheus::{exponential_buckets, linear_buckets}; +pub use prometheus::{register_counter_vec, Counter, CounterVec}; pub use prometheus::{register_gauge, Gauge}; pub use prometheus::{register_gauge_vec, GaugeVec}; pub use prometheus::{register_histogram, Histogram}; diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 7836712ca6..6bd0eddbb5 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1,8 +1,9 @@ use metrics::core::{AtomicU64, GenericCounter}; use metrics::{ - register_histogram, register_histogram_vec, register_int_counter, register_int_counter_vec, - register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec, Histogram, HistogramVec, - IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec, + register_counter_vec, register_histogram, register_histogram_vec, register_int_counter, + register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec, + Counter, CounterVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, + UIntGauge, UIntGaugeVec, }; use once_cell::sync::Lazy; use pageserver_api::models::state; @@ -36,11 +37,29 @@ const STORAGE_TIME_OPERATIONS: &[&str] = &[ "gc", ]; -pub static STORAGE_TIME: Lazy = Lazy::new(|| { - register_histogram_vec!( - "pageserver_storage_operations_seconds", - "Time spent on storage operations", +pub static STORAGE_TIME_SUM_PER_TIMELINE: Lazy = Lazy::new(|| { + register_counter_vec!( + "pageserver_storage_operations_seconds_sum", + "Total time spent on storage operations with operation, tenant and timeline dimensions", &["operation", "tenant_id", "timeline_id"], + ) + .expect("failed to define a metric") +}); + +pub static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_storage_operations_seconds_count", + "Count of storage operations with operation, tenant and timeline dimensions", + &["operation", "tenant_id", "timeline_id"], + ) + .expect("failed to define a metric") +}); + +pub static STORAGE_TIME_GLOBAL: Lazy = Lazy::new(|| { + register_histogram_vec!( + "pageserver_storage_operations_seconds_global", + "Time spent on storage operations", + &["operation"], get_buckets_for_critical_operations(), ) .expect("failed to define a metric") @@ -394,18 +413,81 @@ pub static WAL_REDO_RECORD_COUNTER: Lazy = Lazy::new(|| { .unwrap() }); +/// Similar to [`prometheus::HistogramTimer`] but does not record on drop. +pub struct StorageTimeMetricsTimer { + metrics: StorageTimeMetrics, + start: Instant, +} + +impl StorageTimeMetricsTimer { + fn new(metrics: StorageTimeMetrics) -> Self { + Self { + metrics, + start: Instant::now(), + } + } + + /// Record the time from creation to now. + pub fn stop_and_record(self) { + let duration = self.start.elapsed().as_secs_f64(); + self.metrics.timeline_sum.inc_by(duration); + self.metrics.timeline_count.inc(); + self.metrics.global_histogram.observe(duration); + } +} + +/// Timing facilities for an globally histogrammed metric, which is supported by per tenant and +/// timeline total sum and count. +#[derive(Clone, Debug)] +pub struct StorageTimeMetrics { + /// Sum of f64 seconds, per operation, tenant_id and timeline_id + timeline_sum: Counter, + /// Number of oeprations, per operation, tenant_id and timeline_id + timeline_count: IntCounter, + /// Global histogram having only the "operation" label. + global_histogram: Histogram, +} + +impl StorageTimeMetrics { + pub fn new(operation: &str, tenant_id: &str, timeline_id: &str) -> Self { + let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE + .get_metric_with_label_values(&[operation, tenant_id, timeline_id]) + .unwrap(); + let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE + .get_metric_with_label_values(&[operation, tenant_id, timeline_id]) + .unwrap(); + let global_histogram = STORAGE_TIME_GLOBAL + .get_metric_with_label_values(&[operation]) + .unwrap(); + + StorageTimeMetrics { + timeline_sum, + timeline_count, + global_histogram, + } + } + + /// Starts timing a new operation. + /// + /// Note: unlike [`prometheus::HistogramTimer`] the returned timer does not record on drop. + pub fn start_timer(&self) -> StorageTimeMetricsTimer { + StorageTimeMetricsTimer::new(self.clone()) + } +} + #[derive(Debug)] pub struct TimelineMetrics { tenant_id: String, timeline_id: String, pub reconstruct_time_histo: Histogram, pub materialized_page_cache_hit_counter: GenericCounter, - pub flush_time_histo: Histogram, - pub compact_time_histo: Histogram, - pub create_images_time_histo: Histogram, - pub init_logical_size_histo: Histogram, - pub logical_size_histo: Histogram, - pub load_layer_map_histo: Histogram, + pub flush_time_histo: StorageTimeMetrics, + pub compact_time_histo: StorageTimeMetrics, + pub create_images_time_histo: StorageTimeMetrics, + pub init_logical_size_histo: StorageTimeMetrics, + pub logical_size_histo: StorageTimeMetrics, + pub load_layer_map_histo: StorageTimeMetrics, + pub garbage_collect_histo: StorageTimeMetrics, pub last_record_gauge: IntGauge, pub wait_lsn_time_histo: Histogram, pub resident_physical_size_gauge: UIntGauge, @@ -425,24 +507,16 @@ impl TimelineMetrics { let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT .get_metric_with_label_values(&[&tenant_id, &timeline_id]) .unwrap(); - let flush_time_histo = STORAGE_TIME - .get_metric_with_label_values(&["layer flush", &tenant_id, &timeline_id]) - .unwrap(); - let compact_time_histo = STORAGE_TIME - .get_metric_with_label_values(&["compact", &tenant_id, &timeline_id]) - .unwrap(); - let create_images_time_histo = STORAGE_TIME - .get_metric_with_label_values(&["create images", &tenant_id, &timeline_id]) - .unwrap(); - let init_logical_size_histo = STORAGE_TIME - .get_metric_with_label_values(&["init logical size", &tenant_id, &timeline_id]) - .unwrap(); - let logical_size_histo = STORAGE_TIME - .get_metric_with_label_values(&["logical size", &tenant_id, &timeline_id]) - .unwrap(); - let load_layer_map_histo = STORAGE_TIME - .get_metric_with_label_values(&["load layer map", &tenant_id, &timeline_id]) - .unwrap(); + let flush_time_histo = StorageTimeMetrics::new("layer flush", &tenant_id, &timeline_id); + let compact_time_histo = StorageTimeMetrics::new("compact", &tenant_id, &timeline_id); + let create_images_time_histo = + StorageTimeMetrics::new("create images", &tenant_id, &timeline_id); + let init_logical_size_histo = + StorageTimeMetrics::new("init logical size", &tenant_id, &timeline_id); + let logical_size_histo = StorageTimeMetrics::new("logical size", &tenant_id, &timeline_id); + let load_layer_map_histo = + StorageTimeMetrics::new("load layer map", &tenant_id, &timeline_id); + let garbage_collect_histo = StorageTimeMetrics::new("gc", &tenant_id, &timeline_id); let last_record_gauge = LAST_RECORD_LSN .get_metric_with_label_values(&[&tenant_id, &timeline_id]) .unwrap(); @@ -472,6 +546,7 @@ impl TimelineMetrics { create_images_time_histo, init_logical_size_histo, logical_size_histo, + garbage_collect_histo, load_layer_map_histo, last_record_gauge, wait_lsn_time_histo, @@ -497,7 +572,10 @@ impl Drop for TimelineMetrics { let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]); for op in STORAGE_TIME_OPERATIONS { - let _ = STORAGE_TIME.remove_label_values(&[op, tenant_id, timeline_id]); + let _ = + STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]); + let _ = + STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]); } for op in STORAGE_IO_TIME_OPERATIONS { let _ = STORAGE_IO_TIME.remove_label_values(&[op, tenant_id, timeline_id]); @@ -514,7 +592,6 @@ impl Drop for TimelineMetrics { } pub fn remove_tenant_metrics(tenant_id: &TenantId) { - let _ = STORAGE_TIME.remove_label_values(&["gc", &tenant_id.to_string(), "-"]); let tid = tenant_id.to_string(); for state in TENANT_STATE_OPTIONS { let _ = TENANT_STATE_METRIC.remove_label_values(&[&tid, state]); diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 9fb540cfae..2f45fe0dfc 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -51,8 +51,7 @@ use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; use crate::import_datadir; use crate::is_uninit_mark; -use crate::metrics::TENANT_STATE_METRIC; -use crate::metrics::{remove_tenant_metrics, STORAGE_TIME}; +use crate::metrics::{remove_tenant_metrics, TENANT_STATE_METRIC}; use crate::repository::GcResult; use crate::task_mgr; use crate::task_mgr::TaskKind; @@ -1243,17 +1242,11 @@ impl Tenant { "Cannot run GC iteration on inactive tenant" ); - let timeline_str = target_timeline_id - .map(|x| x.to_string()) - .unwrap_or_else(|| "-".to_string()); + let gc_result = self + .gc_iteration_internal(target_timeline_id, horizon, pitr, ctx) + .await; - { - let _timer = STORAGE_TIME - .with_label_values(&["gc", &self.tenant_id.to_string(), &timeline_str]) - .start_timer(); - self.gc_iteration_internal(target_timeline_id, horizon, pitr, ctx) - .await - } + gc_result } /// Perform one compaction iteration. diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 7b796a0749..0ca8a0e491 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -133,7 +133,6 @@ pub struct Timeline { ancestor_timeline: Option>, ancestor_lsn: Lsn, - // Metrics metrics: TimelineMetrics, /// Ensures layers aren't frozen by checkpointer between @@ -2894,6 +2893,8 @@ impl Timeline { /// obsolete. /// pub(super) async fn gc(&self) -> anyhow::Result { + let timer = self.metrics.garbage_collect_histo.start_timer(); + fail_point!("before-timeline-gc"); let _layer_removal_cs = self.layer_removal_cs.lock().await; @@ -2914,11 +2915,17 @@ impl Timeline { let new_gc_cutoff = Lsn::min(horizon_cutoff, pitr_cutoff); - self.gc_timeline(horizon_cutoff, pitr_cutoff, retain_lsns, new_gc_cutoff) + let res = self + .gc_timeline(horizon_cutoff, pitr_cutoff, retain_lsns, new_gc_cutoff) .instrument( info_span!("gc_timeline", timeline = %self.timeline_id, cutoff = %new_gc_cutoff), ) - .await + .await?; + + // only record successes + timer.stop_and_record(); + + Ok(res) } async fn gc_timeline( diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index b5e2876074..bdaaa95216 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -46,6 +46,12 @@ PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS: Tuple[str, ...] = ( "pageserver_remote_physical_size", ) +PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = ( + "pageserver_storage_operations_seconds_global_count", + "pageserver_storage_operations_seconds_global_sum", + "pageserver_storage_operations_seconds_global_bucket", +) + PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = ( "pageserver_current_logical_size", "pageserver_resident_physical_size", @@ -61,9 +67,8 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = ( "pageserver_smgr_query_seconds_bucket", "pageserver_smgr_query_seconds_count", "pageserver_smgr_query_seconds_sum", - "pageserver_storage_operations_seconds_bucket", - "pageserver_storage_operations_seconds_count", - "pageserver_storage_operations_seconds_sum", + "pageserver_storage_operations_seconds_count_total", + "pageserver_storage_operations_seconds_sum_total", "pageserver_wait_lsn_seconds_bucket", "pageserver_wait_lsn_seconds_count", "pageserver_wait_lsn_seconds_sum", diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index ce77c3940b..e56bb1b469 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -9,6 +9,7 @@ from typing import List import pytest from fixtures.log_helper import log from fixtures.metrics import ( + PAGESERVER_GLOBAL_METRICS, PAGESERVER_PER_TENANT_METRICS, PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS, parse_metrics, @@ -161,6 +162,14 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder): f"process_start_time_seconds (UTC): {datetime.fromtimestamp(metrics.query_one('process_start_time_seconds').value)}" ) + # Test (a subset of) pageserver global metrics + for metric in PAGESERVER_GLOBAL_METRICS: + ps_samples = ps_metrics.query_all(metric, {}) + assert len(ps_samples) > 0 + for sample in ps_samples: + labels = ",".join([f'{key}="{value}"' for key, value in sample.labels.items()]) + log.info(f"{sample.name}{{{labels}}} {sample.value}") + @pytest.mark.parametrize( "remote_storage_kind",