From e43cde7aba2681d8d2e703fdfdb963caccf5acd0 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 1 Dec 2023 22:45:04 +0100 Subject: [PATCH] initial logical size: remove CALLS metric from hot path (#6018) Only introduced a few hours ago (#5995), I took a look at the numbers from staging and realized that `get_current_logical_size()` is on the walingest hot path: we call it for every `ReplicationMessage::XLogData` that we receive. Since the metric is global, it would be quite a busy cache line. This PR replaces it with a new metric purpose-built for what's most interesting right now. --- pageserver/src/metrics.rs | 28 +++++++------------ pageserver/src/tenant/timeline.rs | 18 ++++++++++++ .../src/tenant/timeline/logical_size.rs | 12 ++++---- 3 files changed, 35 insertions(+), 23 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 6aee40b579..d2684691e0 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -511,24 +511,16 @@ pub(crate) mod initial_logical_size { } } - pub(crate) struct Calls { - pub(crate) approximate: IntCounter, - pub(crate) exact: IntCounter, - } - - pub(crate) static CALLS: Lazy = Lazy::new(|| { - let vec = register_int_counter_vec!( - "pageserver_initial_logical_size_calls", - "Incremented each time some code asks for incremental logical size.\ - The label records the accuracy of the result.", - &["accuracy"] - ) - .unwrap(); - Calls { - approximate: vec.with_label_values(&["approximate"]), - exact: vec.with_label_values(&["exact"]), - } - }); + // context: https://github.com/neondatabase/neon/issues/5963 + pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy = + Lazy::new(|| { + register_int_counter!( + "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size", + "Counter for the following event: walreceiver calls\ + Timeline::get_current_logical_size() and it returns `Approximate` for the first time." + ) + .unwrap() + }); } pub(crate) static TENANT_STATE_METRIC: Lazy = Lazy::new(|| { diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 6313e7be84..bf4e19e5fb 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -868,6 +868,24 @@ impl Timeline { self.try_spawn_size_init_task(initial_part_end, ctx); } + if let CurrentLogicalSize::Approximate(_) = ¤t_size { + if ctx.task_kind() == TaskKind::WalReceiverConnectionHandler { + let first = self + .current_logical_size + .did_return_approximate_to_walreceiver + .compare_exchange( + false, + true, + AtomicOrdering::Relaxed, + AtomicOrdering::Relaxed, + ) + .is_ok(); + if first { + crate::metrics::initial_logical_size::TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE.inc(); + } + } + } + current_size } diff --git a/pageserver/src/tenant/timeline/logical_size.rs b/pageserver/src/tenant/timeline/logical_size.rs index 1f103051ef..a33fb28ebd 100644 --- a/pageserver/src/tenant/timeline/logical_size.rs +++ b/pageserver/src/tenant/timeline/logical_size.rs @@ -4,7 +4,7 @@ use once_cell::sync::OnceCell; use tokio::sync::Semaphore; use utils::lsn::Lsn; -use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering}; +use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering}; use std::sync::Arc; /// Internal structure to hold all data needed for logical size calculation. @@ -55,6 +55,9 @@ pub(super) struct LogicalSize { /// see `current_logical_size_gauge`. Use the `update_current_logical_size` /// to modify this, it will also keep the prometheus metric in sync. pub size_added_after_initial: AtomicI64, + + /// For [`crate::metrics::initial_logical_size::TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE`]. + pub(super) did_return_approximate_to_walreceiver: AtomicBool, } /// Normalized current size, that the data in pageserver occupies. @@ -119,6 +122,7 @@ impl LogicalSize { initial_size_computation: Arc::new(Semaphore::new(0)), initial_part_end: None, size_added_after_initial: AtomicI64::new(0), + did_return_approximate_to_walreceiver: AtomicBool::new(false), } } @@ -128,6 +132,7 @@ impl LogicalSize { initial_size_computation: Arc::new(Semaphore::new(1)), initial_part_end: Some(compute_to), size_added_after_initial: AtomicI64::new(0), + did_return_approximate_to_walreceiver: AtomicBool::new(false), } } @@ -137,15 +142,12 @@ impl LogicalSize { // we change the type. match self.initial_logical_size.get() { Some((initial_size, _)) => { - crate::metrics::initial_logical_size::CALLS.exact.inc(); CurrentLogicalSize::Exact(Exact(initial_size.checked_add_signed(size_increment) .with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}")) .unwrap())) } None => { - crate::metrics::initial_logical_size::CALLS - .approximate - .inc(); + let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0); CurrentLogicalSize::Approximate(Approximate(non_negative_size_increment)) }