From ef81d0b81dac87dc0d8e76616c855c798a6c4e4b Mon Sep 17 00:00:00 2001 From: BodoBolero Date: Thu, 17 Apr 2025 14:16:07 +0200 Subject: [PATCH] remove some more metrics --- pageserver/src/metrics.rs | 401 +----------------- pageserver/src/page_cache.rs | 31 -- pageserver/src/tenant/storage_layer/layer.rs | 13 +- pageserver/src/tenant/timeline.rs | 63 +-- .../src/tenant/timeline/logical_size.rs | 11 +- pageserver/src/virtual_file.rs | 20 +- 6 files changed, 17 insertions(+), 522 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index c7d05e527e..c56cb115f6 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -21,7 +21,7 @@ use pageserver_api::models::InMemoryLayerInfo; use pageserver_api::shard::TenantShardId; use postgres_backend::{QueryError, is_expected_io_error}; use pq_proto::framed::ConnectionError; -use strum::{EnumCount, IntoEnumIterator as _, VariantNames}; +use strum::{IntoEnumIterator as _, VariantNames}; use strum_macros::{IntoStaticStr, VariantNames}; use utils::id::TimelineId; @@ -191,39 +191,6 @@ pub(crate) struct ScanLatency { map: EnumMap>, } - -pub(crate) struct PageCacheSizeMetrics { - pub max_bytes: UIntGauge, - - pub current_bytes_immutable: UIntGauge, -} - -static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_page_cache_size_current_bytes", - "Current size of the page cache in bytes, by key kind", - &["key_kind"] - ) - .expect("failed to define a metric") -}); - -pub(crate) static PAGE_CACHE_SIZE: Lazy = - Lazy::new(|| PageCacheSizeMetrics { - max_bytes: { - register_uint_gauge!( - "pageserver_page_cache_size_max_bytes", - "Maximum size of the page cache in bytes" - ) - .expect("failed to define a metric") - }, - current_bytes_immutable: { - PAGE_CACHE_SIZE_CURRENT_BYTES - .get_metric_with_label_values(&["immutable"]) - .unwrap() - }, - }); - - #[derive(IntoStaticStr)] #[strum(serialize_all = "kebab_case")] pub(crate) enum PageCacheErrorKind { @@ -272,44 +239,6 @@ pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy = Lazy::n pub(crate) mod wait_ondemand_download_time { use super::*; - const WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS: &[f64] = &[ - 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, // 10 ms - 100ms - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, // 100ms to 1s - 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, // 1s to 10s - 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, // 10s to 1m - ]; - - /// The task kinds for which we want to track wait times for on-demand downloads. - /// Other task kinds' wait times are accumulated in label value `unknown`. - pub(crate) const WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS: [TaskKind; 2] = [ - TaskKind::PageRequestHandler, - TaskKind::WalReceiverConnectionHandler, - ]; - - pub(crate) static WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL: Lazy> = Lazy::new(|| { - let histo = register_histogram_vec!( - "pageserver_wait_ondemand_download_seconds_global", - "Observations are individual tasks' wait times for on-demand downloads. \ - If N tasks coalesce on an on-demand download, and it takes 10s, than we observe N * 10s.", - &["task_kind"], - WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS.into(), - ) - .expect("failed to define a metric"); - WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS - .iter() - .map(|task_kind| histo.with_label_values(&[task_kind.into()])) - .collect::>() - }); - - pub(crate) static WAIT_ONDEMAND_DOWNLOAD_TIME_SUM: Lazy = Lazy::new(|| { - register_counter_vec!( - // use a name that _could_ be evolved into a per-timeline histogram later - "pageserver_wait_ondemand_download_seconds_sum", - "Like `pageserver_wait_ondemand_download_seconds_global` but per timeline", - &["tenant_id", "shard_id", "timeline_id", "task_kind"], - ) - .unwrap() - }); pub struct WaitOndemandDownloadTimeSum { } @@ -324,19 +253,10 @@ pub(crate) mod wait_ondemand_download_time { } } - pub(crate) fn shutdown_timeline(tenant_id: &str, shard_id: &str, timeline_id: &str) { - for task_kind in WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS { - let _ = WAIT_ONDEMAND_DOWNLOAD_TIME_SUM.remove_label_values(&[ - tenant_id, - shard_id, - timeline_id, - task_kind.into(), - ]); - } + pub(crate) fn shutdown_timeline(_tenant_id: &str, _shard_id: &str, _timeline_id: &str) { } pub(crate) fn preinitialize_global_metrics() { - Lazy::force(&WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL); } } @@ -588,119 +508,15 @@ pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy = Lazy::new(|| { }); pub(crate) mod initial_logical_size { - use metrics::{IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec}; + use metrics::{IntCounter, register_int_counter}; use once_cell::sync::Lazy; - pub(crate) struct StartCalculation(IntCounterVec); - pub(crate) static START_CALCULATION: Lazy = Lazy::new(|| { - StartCalculation( - register_int_counter_vec!( - "pageserver_initial_logical_size_start_calculation", - "Incremented each time we start an initial logical size calculation attempt. \ - The `circumstances` label provides some additional details.", - &["attempt", "circumstances"] - ) - .unwrap(), - ) - }); - - struct DropCalculation { - first: IntCounter, - retry: IntCounter, - } - - static DROP_CALCULATION: Lazy = Lazy::new(|| { - let vec = register_int_counter_vec!( - "pageserver_initial_logical_size_drop_calculation", - "Incremented each time we abort a started size calculation attmpt.", - &["attempt"] - ) - .unwrap(); - DropCalculation { - first: vec.with_label_values(&["first"]), - retry: vec.with_label_values(&["retry"]), - } - }); - - pub(crate) struct Calculated { - pub(crate) births: IntCounter, - pub(crate) deaths: IntCounter, - } - - pub(crate) static CALCULATED: Lazy = Lazy::new(|| Calculated { - births: register_int_counter!( - "pageserver_initial_logical_size_finish_calculation", - "Incremented every time we finish calculation of initial logical size.\ - If everything is working well, this should happen at most once per Timeline object." - ) - .unwrap(), - deaths: register_int_counter!( - "pageserver_initial_logical_size_drop_finished_calculation", - "Incremented when we drop a finished initial logical size calculation result.\ - Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge." - ) - .unwrap(), - }); - - pub(crate) struct OngoingCalculationGuard { - inc_drop_calculation: Option, - } - #[derive(strum_macros::IntoStaticStr)] pub(crate) enum StartCircumstances { - EmptyInitial, SkippedConcurrencyLimiter, AfterBackgroundTasksRateLimit, } - impl StartCalculation { - pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard { - let circumstances_label: &'static str = circumstances.into(); - self.0 - .with_label_values(&["first", circumstances_label]) - .inc(); - OngoingCalculationGuard { - inc_drop_calculation: Some(DROP_CALCULATION.first.clone()), - } - } - pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard { - let circumstances_label: &'static str = circumstances.into(); - self.0 - .with_label_values(&["retry", circumstances_label]) - .inc(); - OngoingCalculationGuard { - inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()), - } - } - } - - impl Drop for OngoingCalculationGuard { - fn drop(&mut self) { - if let Some(counter) = self.inc_drop_calculation.take() { - counter.inc(); - } - } - } - - impl OngoingCalculationGuard { - pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard { - drop(self.inc_drop_calculation.take()); - CALCULATED.births.inc(); - FinishedCalculationGuard { - inc_on_drop: CALCULATED.deaths.clone(), - } - } - } - - pub(crate) struct FinishedCalculationGuard { - inc_on_drop: IntCounter, - } - - impl Drop for FinishedCalculationGuard { - fn drop(&mut self) { - self.inc_on_drop.inc(); - } - } // context: https://github.com/neondatabase/neon/issues/5963 pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy = @@ -773,25 +589,6 @@ static EVICTIONS: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); -static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_evictions_with_low_residence_duration", - "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \ - Residence duration is determined using the `residence_duration_data_source`.", - &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"] - ) - .expect("failed to define a metric") -}); - -pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy = Lazy::new(|| { - register_int_counter!( - "pageserver_unexpected_ondemand_downloads_count", - "Number of unexpected on-demand downloads. \ - We log more context for each increment, so, forgo any labels in this metric.", - ) - .expect("failed to define a metric") -}); - /// How long did we take to start up? Broken down by labels to describe /// different phases of startup. pub static STARTUP_DURATION: Lazy = Lazy::new(|| { @@ -869,134 +666,6 @@ pub(crate) static TENANT: Lazy = Lazy::new(|| { } }); -/// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric. -#[derive(Debug)] -pub(crate) struct EvictionsWithLowResidenceDuration { - data_source: &'static str, - threshold: Duration, - counter: Option, -} - -pub(crate) struct EvictionsWithLowResidenceDurationBuilder { - data_source: &'static str, - threshold: Duration, -} - -impl EvictionsWithLowResidenceDurationBuilder { - pub fn new(data_source: &'static str, threshold: Duration) -> Self { - Self { - data_source, - threshold, - } - } - - fn build( - &self, - tenant_id: &str, - shard_id: &str, - timeline_id: &str, - ) -> EvictionsWithLowResidenceDuration { - let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION - .get_metric_with_label_values(&[ - tenant_id, - shard_id, - timeline_id, - self.data_source, - &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold), - ]) - .unwrap(); - EvictionsWithLowResidenceDuration { - data_source: self.data_source, - threshold: self.threshold, - counter: Some(counter), - } - } -} - -impl EvictionsWithLowResidenceDuration { - fn threshold_label_value(threshold: Duration) -> String { - format!("{}", threshold.as_secs()) - } - - pub fn observe(&self, observed_value: Duration) { - if observed_value < self.threshold { - self.counter - .as_ref() - .expect("nobody calls this function after `remove_from_vec`") - .inc(); - } - } - - pub fn change_threshold( - &mut self, - tenant_id: &str, - shard_id: &str, - timeline_id: &str, - new_threshold: Duration, - ) { - if new_threshold == self.threshold { - return; - } - let mut with_new = EvictionsWithLowResidenceDurationBuilder::new( - self.data_source, - new_threshold, - ) - .build(tenant_id, shard_id, timeline_id); - std::mem::swap(self, &mut with_new); - with_new.remove(tenant_id, shard_id, timeline_id); - } - - // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`. - fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) { - let Some(_counter) = self.counter.take() else { - return; - }; - - let threshold = Self::threshold_label_value(self.threshold); - - let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[ - tenant_id, - shard_id, - timeline_id, - self.data_source, - &threshold, - ]); - - match removed { - Err(e) => { - // this has been hit in staging as - // , but we don't know how. - // because we can be in the drop path already, don't risk: - // - "double-panic => illegal instruction" or - // - future "drop panick => abort" - // - // so just nag: (the error has the labels) - tracing::warn!( - "failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}" - ); - } - Ok(()) => { - // to help identify cases where we double-remove the same values, let's log all - // deletions? - tracing::info!( - "removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", - self.data_source - ); - } - } - } -} - -// Metrics collected on disk IO operations -// -// Roughly logarithmic scale. -const STORAGE_IO_TIME_BUCKETS: &[f64] = &[ - 0.000030, // 30 usec - 0.001000, // 1000 usec - 0.030, // 30 ms - 1.000, // 1000 ms - 30.000, // 30000 ms -]; /// VirtualFile fs operation variants. /// @@ -1024,52 +693,6 @@ pub(crate) enum StorageIoOperation { Metadata, } -impl StorageIoOperation { - pub fn as_str(&self) -> &'static str { - match self { - StorageIoOperation::Open => "open", - StorageIoOperation::OpenAfterReplace => "open-after-replace", - StorageIoOperation::Close => "close", - StorageIoOperation::CloseByReplace => "close-by-replace", - StorageIoOperation::Read => "read", - StorageIoOperation::Write => "write", - StorageIoOperation::Seek => "seek", - StorageIoOperation::Fsync => "fsync", - StorageIoOperation::Metadata => "metadata", - } - } -} - -/// Tracks time taken by fs operations near VirtualFile. -#[derive(Debug)] -pub(crate) struct StorageIoTime { - metrics: [Histogram; StorageIoOperation::COUNT], -} - -impl StorageIoTime { - fn new() -> Self { - let storage_io_histogram_vec = register_histogram_vec!( - "pageserver_io_operations_seconds", - "Time spent in IO operations", - &["operation"], - STORAGE_IO_TIME_BUCKETS.into() - ) - .expect("failed to define a metric"); - let metrics = std::array::from_fn(|i| { - let op = StorageIoOperation::from_repr(i).unwrap(); - storage_io_histogram_vec - .get_metric_with_label_values(&[op.as_str()]) - .unwrap() - }); - Self { metrics } - } - - pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram { - &self.metrics[op as usize] - } -} - -pub(crate) static STORAGE_IO_TIME_METRIC: Lazy = Lazy::new(StorageIoTime::new); #[derive(Clone, Copy)] #[repr(usize)] @@ -2190,7 +1813,6 @@ pub(crate) struct TimelineMetrics { pub load_layer_map_histo: StorageTimeMetrics, pub garbage_collect_histo: StorageTimeMetrics, pub find_gc_cutoffs_histo: StorageTimeMetrics, - pub last_record_lsn_gauge: IntGauge, pub disk_consistent_lsn_gauge: IntGauge, pub pitr_history_size: UIntGauge, pub archival_size: UIntGauge, @@ -2203,7 +1825,6 @@ pub(crate) struct TimelineMetrics { pub aux_file_size_gauge: IntGauge, pub directory_entries_count_gauge: Lazy UIntGauge>>, pub evictions: IntCounter, - pub evictions_with_low_residence_duration: std::sync::RwLock, /// Number of valid LSN leases. pub valid_lsn_lease_count_gauge: UIntGauge, pub wal_records_received: IntCounter, @@ -2218,7 +1839,6 @@ impl TimelineMetrics { pub fn new( tenant_shard_id: &TenantShardId, timeline_id_raw: &TimelineId, - evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder, ) -> Self { let tenant_id = tenant_shard_id.tenant_id.to_string(); let shard_id = format!("{}", tenant_shard_id.shard_slug()); @@ -2277,9 +1897,6 @@ impl TimelineMetrics { &shard_id, &timeline_id, ); - let last_record_lsn_gauge = LAST_RECORD_LSN - .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) - .unwrap(); let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) @@ -2332,8 +1949,6 @@ impl TimelineMetrics { let evictions = EVICTIONS .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); - let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder - .build(&tenant_id, &shard_id, &timeline_id); let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) @@ -2373,7 +1988,6 @@ impl TimelineMetrics { garbage_collect_histo, find_gc_cutoffs_histo, load_layer_map_histo, - last_record_lsn_gauge, disk_consistent_lsn_gauge, pitr_history_size, archival_size, @@ -2385,9 +1999,6 @@ impl TimelineMetrics { aux_file_size_gauge, directory_entries_count_gauge, evictions, - evictions_with_low_residence_duration: std::sync::RwLock::new( - evictions_with_low_residence_duration, - ), storage_io_size, valid_lsn_lease_count_gauge, wal_records_received, @@ -2482,11 +2093,6 @@ impl TimelineMetrics { let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]); - self.evictions_with_low_residence_duration - .write() - .unwrap() - .remove(tenant_id, shard_id, timeline_id); - // The following metrics are born outside of the TimelineMetrics lifecycle but still // removed at the end of it. The idea is to have the metrics outlive the // entity during which they're observed, e.g., the smgr metrics shall @@ -3244,7 +2850,6 @@ pub fn preinitialize_metrics( // counters [ - &UNEXPECTED_ONDEMAND_DOWNLOADS, &WALRECEIVER_STARTED_CONNECTIONS, &WALRECEIVER_BROKER_UPDATES, &WALRECEIVER_CANDIDATES_ADDED, diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs index 6f32484bc9..90db5291b9 100644 --- a/pageserver/src/page_cache.rs +++ b/pageserver/src/page_cache.rs @@ -77,7 +77,6 @@ use anyhow::Context; use once_cell::sync::OnceCell; use crate::context::RequestContext; -use crate::metrics::PageCacheSizeMetrics; use crate::virtual_file::{IoBufferMut, IoPageSlice}; static PAGE_CACHE: OnceCell = OnceCell::new(); @@ -206,7 +205,6 @@ pub struct PageCache { /// This is interpreted modulo the page cache size. next_evict_slot: AtomicUsize, - size_metrics: &'static PageCacheSizeMetrics, } struct PinnedSlotsPermit { @@ -500,7 +498,6 @@ impl PageCache { let mut map = self.immutable_page_map.write().unwrap(); map.remove(&(*file_id, *blkno)) .expect("could not find old key in mapping"); - self.size_metrics.current_bytes_immutable.sub_page_sz(1); } } } @@ -518,7 +515,6 @@ impl PageCache { Entry::Occupied(entry) => Some(*entry.get()), Entry::Vacant(entry) => { entry.insert(slot_idx); - self.size_metrics.current_bytes_immutable.add_page_sz(1); None } } @@ -608,10 +604,6 @@ impl PageCache { // this is avoided. let page_buffer = IoBufferMut::with_capacity_zeroed(num_pages * PAGE_SZ).leak(); - let size_metrics = &crate::metrics::PAGE_CACHE_SIZE; - size_metrics.max_bytes.set_page_sz(num_pages); - size_metrics.current_bytes_immutable.set_page_sz(0); - let slots = page_buffer .chunks_exact_mut(PAGE_SZ) .map(|chunk| { @@ -633,31 +625,8 @@ impl PageCache { immutable_page_map: Default::default(), slots, next_evict_slot: AtomicUsize::new(0), - size_metrics, pinned_slots: Arc::new(tokio::sync::Semaphore::new(num_pages)), } } } -trait PageSzBytesMetric { - fn set_page_sz(&self, count: usize); - fn add_page_sz(&self, count: usize); - fn sub_page_sz(&self, count: usize); -} - -#[inline(always)] -fn count_times_page_sz(count: usize) -> u64 { - u64::try_from(count).unwrap() * u64::try_from(PAGE_SZ).unwrap() -} - -impl PageSzBytesMetric for metrics::UIntGauge { - fn set_page_sz(&self, count: usize) { - self.set(count_times_page_sz(count)); - } - fn add_page_sz(&self, count: usize) { - self.add(count_times_page_sz(count)); - } - fn sub_page_sz(&self, count: usize) { - self.sub(count_times_page_sz(count)); - } -} diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index b7f6e5dc77..9e24327695 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -1121,7 +1121,6 @@ impl LayerInner { "unexpectedly on-demand downloading for task kind {:?}", ctx.task_kind() ); - crate::metrics::UNEXPECTED_ONDEMAND_DOWNLOADS.inc(); let really_error = matches!(b, Error) && !self.conf.ondemand_download_behavior_treat_error_as_warn; @@ -1570,17 +1569,7 @@ impl LayerInner { Ok(elapsed) => { let accessed_and_visible = self.access_stats.accessed() && self.access_stats.visibility() == LayerVisibilityHint::Visible; - if accessed_and_visible { - // Only layers used for reads contribute to our "low residence" metric that is used - // to detect thrashing. Layers promoted for other reasons (e.g. compaction) are allowed - // to be rapidly evicted without contributing to this metric. - timeline - .metrics - .evictions_with_low_residence_duration - .read() - .unwrap() - .observe(elapsed); - } + tracing::info!( residence_millis = elapsed.as_millis(), diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 5932797a09..38c213f927 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2718,15 +2718,6 @@ impl Timeline { .unwrap_or(self.conf.default_tenant_conf.eviction_policy) } - fn get_evictions_low_residence_duration_metric_threshold( - tenant_conf: &pageserver_api::models::TenantConfig, - default_tenant_conf: &pageserver_api::config::TenantConfigToml, - ) -> Duration { - tenant_conf - .evictions_low_residence_duration_metric_threshold - .unwrap_or(default_tenant_conf.evictions_low_residence_duration_metric_threshold) - } - fn get_image_layer_creation_check_threshold(&self) -> u8 { let tenant_conf = self.tenant_conf.load(); tenant_conf @@ -2802,28 +2793,8 @@ impl Timeline { // The threshold is embedded in the metric. So, we need to update it. { - let new_threshold = Self::get_evictions_low_residence_duration_metric_threshold( - &new_conf.tenant_conf, - &self.conf.default_tenant_conf, - ); - - let tenant_id_str = self.tenant_shard_id.tenant_id.to_string(); - let shard_id_str = format!("{}", self.tenant_shard_id.shard_slug()); - - let timeline_id_str = self.timeline_id.to_string(); - self.remote_client.update_config(&new_conf.location); - self.metrics - .evictions_with_low_residence_duration - .write() - .unwrap() - .change_threshold( - &tenant_id_str, - &shard_id_str, - &timeline_id_str, - new_threshold, - ); } } @@ -2857,13 +2828,6 @@ impl Timeline { let (layer_flush_start_tx, _) = tokio::sync::watch::channel((0, disk_consistent_lsn)); let (layer_flush_done_tx, _) = tokio::sync::watch::channel((0, Ok(()))); - let evictions_low_residence_duration_metric_threshold = { - let loaded_tenant_conf = tenant_conf.load(); - Self::get_evictions_low_residence_duration_metric_threshold( - &loaded_tenant_conf.tenant_conf, - &conf.default_tenant_conf, - ) - }; if let Some(ancestor) = &ancestor { let mut ancestor_gc_info = ancestor.gc_info.write().unwrap(); @@ -2876,10 +2840,6 @@ impl Timeline { let metrics = Arc::new(TimelineMetrics::new( &tenant_shard_id, &timeline_id, - crate::metrics::EvictionsWithLowResidenceDurationBuilder::new( - "mtime", - evictions_low_residence_duration_metric_threshold, - ), )); let aux_file_metrics = metrics.aux_file_size_gauge.clone(); @@ -3016,10 +2976,6 @@ impl Timeline { result.repartition_threshold = result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE; - result - .metrics - .last_record_lsn_gauge - .set(disk_consistent_lsn.0 as i64); result }) } @@ -3489,7 +3445,7 @@ impl Timeline { self.current_logical_size.initialized.add_permits(1); } - let try_once = |attempt: usize| { + let try_once = |_attempt: usize| { let background_ctx = &background_ctx; let self_ref = &self; let skip_concurrency_limiter = &skip_concurrency_limiter; @@ -3500,7 +3456,7 @@ impl Timeline { ); use crate::metrics::initial_logical_size::StartCircumstances; - let (_maybe_permit, circumstances) = tokio::select! { + let (_maybe_permit, _circumstances) = tokio::select! { permit = wait_for_permit => { (Some(permit), StartCircumstances::AfterBackgroundTasksRateLimit) } @@ -3517,12 +3473,6 @@ impl Timeline { } }; - let metrics_guard = if attempt == 1 { - crate::metrics::initial_logical_size::START_CALCULATION.first(circumstances) - } else { - crate::metrics::initial_logical_size::START_CALCULATION.retry(circumstances) - }; - let io_concurrency = IoConcurrency::spawn_from_conf( self_ref.conf, self_ref @@ -3549,7 +3499,7 @@ impl Timeline { // TODO: add aux file size to logical size - Ok((calculated_size, metrics_guard)) + Ok(calculated_size) } }; @@ -3586,7 +3536,7 @@ impl Timeline { } }; - let (calculated_size, metrics_guard) = match retrying.await { + let calculated_size = match retrying.await { ControlFlow::Continue(calculated_size) => calculated_size, ControlFlow::Break(()) => return, }; @@ -3605,8 +3555,7 @@ impl Timeline { self.current_logical_size .initial_logical_size - .set((calculated_size, metrics_guard.calculation_result_saved())) - .ok() + .set((calculated_size,)) .expect("only this task sets it"); } @@ -4503,8 +4452,6 @@ impl Timeline { pub(crate) fn finish_write(&self, new_lsn: Lsn) { assert!(new_lsn.is_aligned()); - - self.metrics.last_record_lsn_gauge.set(new_lsn.0 as i64); self.last_record_lsn.advance(new_lsn); } diff --git a/pageserver/src/tenant/timeline/logical_size.rs b/pageserver/src/tenant/timeline/logical_size.rs index 397037ca9f..ea5515634c 100644 --- a/pageserver/src/tenant/timeline/logical_size.rs +++ b/pageserver/src/tenant/timeline/logical_size.rs @@ -23,7 +23,6 @@ pub(super) struct LogicalSize { /// the initial size at a different LSN. pub initial_logical_size: OnceCell<( u64, - crate::metrics::initial_logical_size::FinishedCalculationGuard, )>, /// Cancellation for the best-effort logical size calculation. @@ -130,11 +129,7 @@ impl CurrentLogicalSize { impl LogicalSize { pub(super) fn empty_initial() -> Self { Self { - initial_logical_size: OnceCell::with_value((0, { - crate::metrics::initial_logical_size::START_CALCULATION - .first(crate::metrics::initial_logical_size::StartCircumstances::EmptyInitial) - .calculation_result_saved() - })), + initial_logical_size: OnceCell::with_value((0,)), cancel_wait_for_background_loop_concurrency_limit_semaphore: OnceCell::new(), initial_part_end: None, size_added_after_initial: AtomicI64::new(0), @@ -159,7 +154,7 @@ impl LogicalSize { // ^^^ keep this type explicit so that the casts in this function break if // we change the type. match self.initial_logical_size.get() { - Some((initial_size, _)) => { + Some((initial_size, )) => { CurrentLogicalSize::Exact(Exact(initial_size.checked_add_signed(size_increment) .with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}")) .unwrap())) @@ -181,7 +176,7 @@ impl LogicalSize { /// available for re-use. This doesn't contain the incremental part. pub(super) fn initialized_size(&self, lsn: Lsn) -> Option { match self.initial_part_end { - Some(v) if v == lsn => self.initial_logical_size.get().map(|(s, _)| *s), + Some(v) if v == lsn => self.initial_logical_size.get().map(|(s, )| *s), _ => None, } } diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index cd3d897423..1f8d5cdcf7 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -27,12 +27,10 @@ use owned_buffers_io::io_buf_ext::FullSlice; use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT; pub use pageserver_api::models::virtual_file as api; use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; -use tokio::time::Instant; use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice}; use crate::assert_u64_eq_usize::UsizeIsU64; use crate::context::RequestContext; -use crate::metrics::{STORAGE_IO_TIME_METRIC, StorageIoOperation}; use crate::page_cache::{PAGE_SZ, PageWriteGuard}; pub(crate) mod io_engine; pub use io_engine::{ @@ -431,9 +429,7 @@ impl OpenFiles { if let Some(old_file) = slot_guard.file.take() { // the normal path of dropping VirtualFile uses "close", use "close-by-replace" here to // distinguish the two. - STORAGE_IO_TIME_METRIC - .get(StorageIoOperation::CloseByReplace) - .observe_closure_duration(|| drop(old_file)); + drop(old_file); } // Prepare the slot for reuse and return it @@ -532,13 +528,9 @@ impl MaybeFatalIo for std::io::Result { /// where "support" means that we measure wall clock time. macro_rules! observe_duration { ($op:expr, $($body:tt)*) => {{ - let instant = Instant::now(); - let result = $($body)*; - let elapsed = instant.elapsed().as_secs_f64(); - STORAGE_IO_TIME_METRIC - .get($op) - .observe(elapsed); - result + + $($body)* + }} } @@ -1263,9 +1255,7 @@ impl Drop for VirtualFileInner { // there is also operation "close-by-replace" for closes done on eviction for // comparison. if let Some(fd) = slot_guard.file.take() { - STORAGE_IO_TIME_METRIC - .get(StorageIoOperation::Close) - .observe_closure_duration(|| drop(fd)); + drop(fd); } } }