remove some more metrics, still compiles

2026-01-15 17:32:56 +00:00 · 2025-04-22 18:32:04 +02:00
parent 6d77432ed2
commit a12369be43
5 changed files with 25 additions and 198 deletions
--- a/pageserver/src/context.rs
+++ b/pageserver/src/context.rs
@@ -91,12 +91,12 @@

 use std::{sync::Arc, time::Duration};

-use once_cell::sync::Lazy;
+
 use tracing::warn;
 use utils::{id::TimelineId, shard::TenantShardId};

 use crate::{
-    metrics::{StorageIoSizeMetrics, TimelineMetrics},
+    metrics::TimelineMetrics,
    task_mgr::TaskKind,
    tenant::Timeline,
 };
@@ -122,38 +122,34 @@ pub struct RequestContext {
 #[derive(Clone)]
 pub(crate) enum Scope {
    Global {
-        io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
+        
    },
    SecondaryTenant {
-        io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
+       
    },
    SecondaryTimeline {
-        io_size_metrics: crate::metrics::StorageIoSizeMetrics,
+       
    },
    Timeline {
-        // We wrap the `Arc<TimelineMetrics>`s inside another Arc to avoid child
+       // We wrap the `Arc<TimelineMetrics>`s inside another Arc to avoid child
        // context creation contending for the ref counters of the Arc<TimelineMetrics>,
        // which are shared among all tasks that operate on the timeline, especially
        // concurrent page_service connections.
        #[allow(clippy::redundant_allocation)]
-        arc_arc: Arc<Arc<TimelineMetrics>>,
-    },
+        arc_arc: Arc<Arc<TimelineMetrics>>,    },
    #[cfg(test)]
    UnitTest {
-        io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
+       
    },
    DebugTools {
-        io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
+        
    },
 }

-static GLOBAL_IO_SIZE_METRICS: Lazy<crate::metrics::StorageIoSizeMetrics> =
-    Lazy::new(|| crate::metrics::StorageIoSizeMetrics::new("*", "*", "*"));

 impl Scope {
    pub(crate) fn new_global() -> Self {
        Scope::Global {
-            io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
        }
    }
    /// NB: this allocates, so, use only at relatively long-lived roots, e.g., at start
@@ -173,18 +169,13 @@ impl Scope {
        }
    }
    pub(crate) fn new_secondary_timeline(
-        tenant_shard_id: &TenantShardId,
-        timeline_id: &TimelineId,
+        _tenant_shard_id: &TenantShardId,
+        _timeline_id: &TimelineId,
    ) -> Self {
        // TODO(https://github.com/neondatabase/neon/issues/11156): secondary timelines have no infrastructure for metrics lifecycle.

-        let tenant_id = tenant_shard_id.tenant_id.to_string();
-        let shard_id = tenant_shard_id.shard_slug().to_string();
-        let timeline_id = timeline_id.to_string();

-        let io_size_metrics =
-            crate::metrics::StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);
-        Scope::SecondaryTimeline { io_size_metrics }
+        Scope::SecondaryTimeline {  }
    }
    pub(crate) fn new_secondary_tenant(_tenant_shard_id: &TenantShardId) -> Self {
        // Before propagating metrics via RequestContext, the labels were inferred from file path.
@@ -197,19 +188,19 @@ impl Scope {
        // like we do for attached timelines. (We don't have attached-tenant-scoped usage of VirtualFile
        // at this point, so, we were able to completely side-step tenant-scoped stuff there).
        Scope::SecondaryTenant {
-            io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
+           
        }
    }
    #[cfg(test)]
    pub(crate) fn new_unit_test() -> Self {
        Scope::UnitTest {
-            io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
+          
        }
    }

    pub(crate) fn new_debug_tools() -> Self {
        Scope::DebugTools {
-            io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
+          
        }
    }
 }
@@ -528,41 +519,6 @@ impl RequestContext {
        self.read_path_debug
    }

-    pub(crate) fn io_size_metrics(&self) -> &StorageIoSizeMetrics {
-        match &self.scope {
-            Scope::Global { io_size_metrics } => {
-                let is_unit_test = cfg!(test);
-                let is_regress_test_build = cfg!(feature = "testing");
-                if is_unit_test || is_regress_test_build {
-                    panic!("all VirtualFile instances are timeline-scoped");
-                } else {
-                    use once_cell::sync::Lazy;
-                    use std::sync::Mutex;
-                    use std::time::Duration;
-                    use utils::rate_limit::RateLimit;
-                    static LIMIT: Lazy<Mutex<RateLimit>> =
-                        Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(1))));
-                    let mut guard = LIMIT.lock().unwrap();
-                    guard.call2(|rate_limit_stats| {
-                        warn!(
-                            %rate_limit_stats,
-                            backtrace=%std::backtrace::Backtrace::force_capture(),
-                            "all VirtualFile instances are timeline-scoped",
-                        );
-                    });
-
-                    io_size_metrics
-                }
-            }
-            Scope::Timeline { arc_arc } => &arc_arc.storage_io_size,
-            Scope::SecondaryTimeline { io_size_metrics } => io_size_metrics,
-            Scope::SecondaryTenant { io_size_metrics } => io_size_metrics,
-            #[cfg(test)]
-            Scope::UnitTest { io_size_metrics } => io_size_metrics,
-            Scope::DebugTools { io_size_metrics } => io_size_metrics,
-        }
-    }
-
    pub(crate) fn ondemand_download_wait_observe(&self, duration: Duration) {
        if duration == Duration::ZERO {
            return;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -7,10 +7,10 @@ use enum_map::{Enum as _, EnumMap};
 use futures::Future;
 use metrics::{
    CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, 
-    IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
+    IntCounterPairVec, IntCounterVec, IntGaugeVec, UIntGauge, UIntGaugeVec,
    register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
    register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
-    register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
+    register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
 };
 use once_cell::sync::Lazy;
 use pageserver_api::config::{
@@ -626,59 +626,6 @@ pub(crate) enum StorageIoOperation {
 }


-#[derive(Clone, Copy)]
-#[repr(usize)]
-pub(crate) enum StorageIoSizeOperation {
-    Read,
-    Write,
-}
-
-impl StorageIoSizeOperation {
-    pub(crate) const VARIANTS: &'static [&'static str] = &["read", "write"];
-
-    fn as_str(&self) -> &'static str {
-        Self::VARIANTS[*self as usize]
-    }
-}
-
-// Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
-pub(crate) static STORAGE_IO_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
-    register_uint_gauge_vec!(
-        "pageserver_io_operations_bytes_total",
-        "Total amount of bytes read/written in IO operations",
-        &["operation", "tenant_id", "shard_id", "timeline_id"]
-    )
-    .expect("failed to define a metric")
-});
-
-#[derive(Clone, Debug)]
-pub(crate) struct StorageIoSizeMetrics {
-    pub read: UIntGauge,
-    pub write: UIntGauge,
-}
-
-impl StorageIoSizeMetrics {
-    pub(crate) fn new(tenant_id: &str, shard_id: &str, timeline_id: &str) -> Self {
-        let read = STORAGE_IO_SIZE
-            .get_metric_with_label_values(&[
-                StorageIoSizeOperation::Read.as_str(),
-                tenant_id,
-                shard_id,
-                timeline_id,
-            ])
-            .unwrap();
-        let write = STORAGE_IO_SIZE
-            .get_metric_with_label_values(&[
-                StorageIoSizeOperation::Write.as_str(),
-                tenant_id,
-                shard_id,
-                timeline_id,
-            ])
-            .unwrap();
-        Self { read, write }
-    }
-}
-
 #[cfg(not(test))]
 pub(crate) mod virtual_file_descriptor_cache {
    use super::*;
@@ -941,19 +888,9 @@ static PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE: Lazy<IntCounterVec>
    .expect("failed to define a metric")
 });

-pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
-        "pageserver_page_service_config_max_batch_size",
-        "Configured maximum batch size for the server-side batching functionality of page_service. \
-         Labels expose more of the configuration parameters.",
-        &["mode", "execution", "batching"]
-    )
-    .expect("failed to define a metric")
-});

 fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
-    PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset();
-    let (label_values, value) = match conf {
+    let (_label_values, _value) = match conf {
        PageServicePipeliningConfig::Serial => (["serial", "-", "-"], 1),
        PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
            max_batch_size,
@@ -975,9 +912,6 @@ fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
            ([mode, execution, batching], max_batch_size.get())
        }
    };
-    PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE
-        .with_label_values(&label_values)
-        .set(value.try_into().unwrap());
 }

 static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
@@ -1329,14 +1263,6 @@ pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(
    .expect("failed to define a metric")
 });

-pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
-    register_int_gauge!(
-        "pageserver_walreceiver_active_managers",
-        "Number of active walreceiver managers"
-    )
-    .expect("failed to define a metric")
-});
-

 pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
@@ -1346,21 +1272,6 @@ pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

-pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "pageserver_walreceiver_candidates_events_total",
-        "Number of walreceiver candidate events",
-        &["event"]
-    )
-    .expect("failed to define a metric")
-});
-
-pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
-    Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
-
-pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
-    Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
-
 // Metrics collected on WAL redo operations
 //
 // We collect the time spent in actual WAL redo ('redo'), and time waiting
@@ -1670,7 +1581,6 @@ pub(crate) struct TimelineMetrics {
    pub garbage_collect_histo: StorageTimeMetrics,
    pub find_gc_cutoffs_histo: StorageTimeMetrics,
    /// copy of LayeredTimeline.current_logical_size
-    pub storage_io_size: StorageIoSizeMetrics,
    shutdown: std::sync::atomic::AtomicBool,
 }

@@ -1726,9 +1636,6 @@ impl TimelineMetrics {
            &timeline_id,
        );

-
-        let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);
-
        TimelineMetrics {
            tenant_id,
            shard_id,
@@ -1740,7 +1647,6 @@ impl TimelineMetrics {
            imitate_logical_size_histo,
            garbage_collect_histo,
            find_gc_cutoffs_histo,
-            storage_io_size,
            shutdown: std::sync::atomic::AtomicBool::default(),
        }
    }
@@ -1835,10 +1741,6 @@ impl TimelineMetrics {
            ]);
        }

-        for op in StorageIoSizeOperation::VARIANTS {
-            let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
-        }
-
        let _ =
            WAIT_LSN_IN_PROGRESS_MICROS.remove_label_values(&[tenant_id, shard_id, timeline_id]);

@@ -2259,8 +2161,6 @@ pub fn preinitialize_metrics(
    [
        &WALRECEIVER_STARTED_CONNECTIONS,
        &WALRECEIVER_BROKER_UPDATES,
-        &WALRECEIVER_CANDIDATES_ADDED,
-        &WALRECEIVER_CANDIDATES_REMOVED,
        &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
        &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
        &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -4,7 +4,6 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{Duration, Instant, SystemTime};

-use crate::metrics::{STORAGE_IO_SIZE, StorageIoSizeOperation};
 use camino::Utf8PathBuf;
 use chrono::format::{DelayedFormat, StrftimeItems};
 use futures::Future;
@@ -317,8 +316,8 @@ impl SecondaryDetail {
    }

    fn clear_timeline_metrics(
-        tenant_shard_id: &TenantShardId,
-        timeline_id: &TimelineId,
+        _tenant_shard_id: &TenantShardId,
+        _timeline_id: &TimelineId,
        detail: SecondaryDetailTimeline,
        resident_metric: &UIntGauge,
    ) {
@@ -330,17 +329,7 @@ impl SecondaryDetail {
                .sum(),
        );

-        let shard_id = format!("{}", tenant_shard_id.shard_slug());
-        let tenant_id = tenant_shard_id.tenant_id.to_string();
-        let timeline_id = timeline_id.to_string();
-        for op in StorageIoSizeOperation::VARIANTS {
-            let _ = STORAGE_IO_SIZE.remove_label_values(&[
-                op,
-                tenant_id.as_str(),
-                shard_id.as_str(),
-                timeline_id.as_str(),
-            ]);
-        }
+        
    }

    /// Additionally returns the total number of layers, used for more stable relative access time
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -39,10 +39,6 @@ use utils::postgres_client::{
 use super::walreceiver_connection::{WalConnectionStatus, WalReceiverError};
 use super::{TaskEvent, TaskHandle, TaskStateUpdate, WalReceiverConf};
 use crate::context::{DownloadBehavior, RequestContext};
-use crate::metrics::{
-    WALRECEIVER_ACTIVE_MANAGERS, WALRECEIVER_BROKER_UPDATES, WALRECEIVER_CANDIDATES_ADDED,
-    WALRECEIVER_CANDIDATES_REMOVED, 
-};
 use crate::task_mgr::TaskKind;
 use crate::tenant::{Timeline, debug_assert_current_span_has_tenant_and_timeline_id};

@@ -76,11 +72,6 @@ pub(super) async fn connection_manager_loop_step(
        }
    }

-    WALRECEIVER_ACTIVE_MANAGERS.inc();
-    scopeguard::defer! {
-        WALRECEIVER_ACTIVE_MANAGERS.dec();
-    }
-
    let id = TenantTimelineId {
        tenant_id: connection_manager_state.timeline.tenant_shard_id.tenant_id,
        timeline_id: connection_manager_state.timeline.timeline_id,
@@ -728,8 +719,6 @@ impl ConnectionManagerState {
            }
        };

-        WALRECEIVER_BROKER_UPDATES.inc();
-
        trace!(
            "safekeeper info update: standby_horizon(cutoff)={}",
            timeline_update.standby_horizon
@@ -756,7 +745,6 @@ impl ConnectionManagerState {
                %new_safekeeper_id,
                "New SK node was added",
            );
-            WALRECEIVER_CANDIDATES_ADDED.inc();
        }
    }

@@ -1044,7 +1032,6 @@ impl ConnectionManagerState {
                    "Safekeeper node {node_id} did not send events for over {lagging_wal_timeout:?}, not retrying the connections"
                );
                self.wal_connection_retries.remove(&node_id);
-                WALRECEIVER_CANDIDATES_REMOVED.inc();
            }
        }
    }
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -29,7 +29,6 @@ pub use pageserver_api::models::virtual_file as api;
 use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice};

-use crate::assert_u64_eq_usize::UsizeIsU64;
 use crate::context::RequestContext;
 use crate::page_cache::{PAGE_SZ, PageWriteGuard};
 pub(crate) mod io_engine;
@@ -905,7 +904,7 @@ impl VirtualFileInner {
        &self,
        buf: tokio_epoll_uring::Slice<Buf>,
        offset: u64,
-        ctx: &RequestContext,
+        _ctx: &RequestContext,
    ) -> (tokio_epoll_uring::Slice<Buf>, Result<usize, Error>)
    where
        Buf: tokio_epoll_uring::IoBufMut + Send,
@@ -922,9 +921,7 @@ impl VirtualFileInner {
        observe_duration!(StorageIoOperation::Read, {
            let ((_file_guard, buf), res) = io_engine::get().read_at(file_guard, offset, buf).await;
            let res = res.maybe_fatal_err("io_engine read_at inside VirtualFileInner::read_at");
-            if let Ok(size) = res {
-                ctx.io_size_metrics().read.add(size.into_u64());
-            }
+            
            (buf, res)
        })
    }
@@ -945,7 +942,7 @@ impl VirtualFileInner {
        &self,
        buf: FullSlice<B>,
        offset: u64,
-        ctx: &RequestContext,
+        _ctx: &RequestContext,
    ) -> (FullSlice<B>, Result<usize, Error>) {
        let file_guard = match self.lock_file().await {
            Ok(file_guard) => file_guard,
@@ -954,9 +951,7 @@ impl VirtualFileInner {
        observe_duration!(StorageIoOperation::Write, {
            let ((_file_guard, buf), result) =
                io_engine::get().write_at(file_guard, offset, buf).await;
-            if let Ok(size) = result {
-                ctx.io_size_metrics().write.add(size.into_u64());
-            }
+
            (buf, result)
        })
    }