mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-15 17:32:56 +00:00
remove some more metrics, still compiles
This commit is contained in:
@@ -91,12 +91,12 @@
|
||||
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use tracing::warn;
|
||||
use utils::{id::TimelineId, shard::TenantShardId};
|
||||
|
||||
use crate::{
|
||||
metrics::{StorageIoSizeMetrics, TimelineMetrics},
|
||||
metrics::TimelineMetrics,
|
||||
task_mgr::TaskKind,
|
||||
tenant::Timeline,
|
||||
};
|
||||
@@ -122,38 +122,34 @@ pub struct RequestContext {
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum Scope {
|
||||
Global {
|
||||
io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
|
||||
|
||||
},
|
||||
SecondaryTenant {
|
||||
io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
|
||||
|
||||
},
|
||||
SecondaryTimeline {
|
||||
io_size_metrics: crate::metrics::StorageIoSizeMetrics,
|
||||
|
||||
},
|
||||
Timeline {
|
||||
// We wrap the `Arc<TimelineMetrics>`s inside another Arc to avoid child
|
||||
// We wrap the `Arc<TimelineMetrics>`s inside another Arc to avoid child
|
||||
// context creation contending for the ref counters of the Arc<TimelineMetrics>,
|
||||
// which are shared among all tasks that operate on the timeline, especially
|
||||
// concurrent page_service connections.
|
||||
#[allow(clippy::redundant_allocation)]
|
||||
arc_arc: Arc<Arc<TimelineMetrics>>,
|
||||
},
|
||||
arc_arc: Arc<Arc<TimelineMetrics>>, },
|
||||
#[cfg(test)]
|
||||
UnitTest {
|
||||
io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
|
||||
|
||||
},
|
||||
DebugTools {
|
||||
io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics,
|
||||
|
||||
},
|
||||
}
|
||||
|
||||
static GLOBAL_IO_SIZE_METRICS: Lazy<crate::metrics::StorageIoSizeMetrics> =
|
||||
Lazy::new(|| crate::metrics::StorageIoSizeMetrics::new("*", "*", "*"));
|
||||
|
||||
impl Scope {
|
||||
pub(crate) fn new_global() -> Self {
|
||||
Scope::Global {
|
||||
io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
|
||||
}
|
||||
}
|
||||
/// NB: this allocates, so, use only at relatively long-lived roots, e.g., at start
|
||||
@@ -173,18 +169,13 @@ impl Scope {
|
||||
}
|
||||
}
|
||||
pub(crate) fn new_secondary_timeline(
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
_tenant_shard_id: &TenantShardId,
|
||||
_timeline_id: &TimelineId,
|
||||
) -> Self {
|
||||
// TODO(https://github.com/neondatabase/neon/issues/11156): secondary timelines have no infrastructure for metrics lifecycle.
|
||||
|
||||
let tenant_id = tenant_shard_id.tenant_id.to_string();
|
||||
let shard_id = tenant_shard_id.shard_slug().to_string();
|
||||
let timeline_id = timeline_id.to_string();
|
||||
|
||||
let io_size_metrics =
|
||||
crate::metrics::StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);
|
||||
Scope::SecondaryTimeline { io_size_metrics }
|
||||
Scope::SecondaryTimeline { }
|
||||
}
|
||||
pub(crate) fn new_secondary_tenant(_tenant_shard_id: &TenantShardId) -> Self {
|
||||
// Before propagating metrics via RequestContext, the labels were inferred from file path.
|
||||
@@ -197,19 +188,19 @@ impl Scope {
|
||||
// like we do for attached timelines. (We don't have attached-tenant-scoped usage of VirtualFile
|
||||
// at this point, so, we were able to completely side-step tenant-scoped stuff there).
|
||||
Scope::SecondaryTenant {
|
||||
io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
|
||||
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
pub(crate) fn new_unit_test() -> Self {
|
||||
Scope::UnitTest {
|
||||
io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new_debug_tools() -> Self {
|
||||
Scope::DebugTools {
|
||||
io_size_metrics: &GLOBAL_IO_SIZE_METRICS,
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -528,41 +519,6 @@ impl RequestContext {
|
||||
self.read_path_debug
|
||||
}
|
||||
|
||||
pub(crate) fn io_size_metrics(&self) -> &StorageIoSizeMetrics {
|
||||
match &self.scope {
|
||||
Scope::Global { io_size_metrics } => {
|
||||
let is_unit_test = cfg!(test);
|
||||
let is_regress_test_build = cfg!(feature = "testing");
|
||||
if is_unit_test || is_regress_test_build {
|
||||
panic!("all VirtualFile instances are timeline-scoped");
|
||||
} else {
|
||||
use once_cell::sync::Lazy;
|
||||
use std::sync::Mutex;
|
||||
use std::time::Duration;
|
||||
use utils::rate_limit::RateLimit;
|
||||
static LIMIT: Lazy<Mutex<RateLimit>> =
|
||||
Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(1))));
|
||||
let mut guard = LIMIT.lock().unwrap();
|
||||
guard.call2(|rate_limit_stats| {
|
||||
warn!(
|
||||
%rate_limit_stats,
|
||||
backtrace=%std::backtrace::Backtrace::force_capture(),
|
||||
"all VirtualFile instances are timeline-scoped",
|
||||
);
|
||||
});
|
||||
|
||||
io_size_metrics
|
||||
}
|
||||
}
|
||||
Scope::Timeline { arc_arc } => &arc_arc.storage_io_size,
|
||||
Scope::SecondaryTimeline { io_size_metrics } => io_size_metrics,
|
||||
Scope::SecondaryTenant { io_size_metrics } => io_size_metrics,
|
||||
#[cfg(test)]
|
||||
Scope::UnitTest { io_size_metrics } => io_size_metrics,
|
||||
Scope::DebugTools { io_size_metrics } => io_size_metrics,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn ondemand_download_wait_observe(&self, duration: Duration) {
|
||||
if duration == Duration::ZERO {
|
||||
return;
|
||||
|
||||
@@ -7,10 +7,10 @@ use enum_map::{Enum as _, EnumMap};
|
||||
use futures::Future;
|
||||
use metrics::{
|
||||
CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter,
|
||||
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||
IntCounterPairVec, IntCounterVec, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
|
||||
register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
|
||||
register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
|
||||
register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_api::config::{
|
||||
@@ -626,59 +626,6 @@ pub(crate) enum StorageIoOperation {
|
||||
}
|
||||
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[repr(usize)]
|
||||
pub(crate) enum StorageIoSizeOperation {
|
||||
Read,
|
||||
Write,
|
||||
}
|
||||
|
||||
impl StorageIoSizeOperation {
|
||||
pub(crate) const VARIANTS: &'static [&'static str] = &["read", "write"];
|
||||
|
||||
fn as_str(&self) -> &'static str {
|
||||
Self::VARIANTS[*self as usize]
|
||||
}
|
||||
}
|
||||
|
||||
// Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
|
||||
pub(crate) static STORAGE_IO_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"pageserver_io_operations_bytes_total",
|
||||
"Total amount of bytes read/written in IO operations",
|
||||
&["operation", "tenant_id", "shard_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct StorageIoSizeMetrics {
|
||||
pub read: UIntGauge,
|
||||
pub write: UIntGauge,
|
||||
}
|
||||
|
||||
impl StorageIoSizeMetrics {
|
||||
pub(crate) fn new(tenant_id: &str, shard_id: &str, timeline_id: &str) -> Self {
|
||||
let read = STORAGE_IO_SIZE
|
||||
.get_metric_with_label_values(&[
|
||||
StorageIoSizeOperation::Read.as_str(),
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
])
|
||||
.unwrap();
|
||||
let write = STORAGE_IO_SIZE
|
||||
.get_metric_with_label_values(&[
|
||||
StorageIoSizeOperation::Write.as_str(),
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
])
|
||||
.unwrap();
|
||||
Self { read, write }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(test))]
|
||||
pub(crate) mod virtual_file_descriptor_cache {
|
||||
use super::*;
|
||||
@@ -941,19 +888,9 @@ static PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE: Lazy<IntCounterVec>
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"pageserver_page_service_config_max_batch_size",
|
||||
"Configured maximum batch size for the server-side batching functionality of page_service. \
|
||||
Labels expose more of the configuration parameters.",
|
||||
&["mode", "execution", "batching"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
|
||||
PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset();
|
||||
let (label_values, value) = match conf {
|
||||
let (_label_values, _value) = match conf {
|
||||
PageServicePipeliningConfig::Serial => (["serial", "-", "-"], 1),
|
||||
PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
|
||||
max_batch_size,
|
||||
@@ -975,9 +912,6 @@ fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
|
||||
([mode, execution, batching], max_batch_size.get())
|
||||
}
|
||||
};
|
||||
PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE
|
||||
.with_label_values(&label_values)
|
||||
.set(value.try_into().unwrap());
|
||||
}
|
||||
|
||||
static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
@@ -1329,14 +1263,6 @@ pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
|
||||
register_int_gauge!(
|
||||
"pageserver_walreceiver_active_managers",
|
||||
"Number of active walreceiver managers"
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
|
||||
pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
@@ -1346,21 +1272,6 @@ pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_walreceiver_candidates_events_total",
|
||||
"Number of walreceiver candidate events",
|
||||
&["event"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
|
||||
Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
|
||||
|
||||
pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
|
||||
Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
|
||||
|
||||
// Metrics collected on WAL redo operations
|
||||
//
|
||||
// We collect the time spent in actual WAL redo ('redo'), and time waiting
|
||||
@@ -1670,7 +1581,6 @@ pub(crate) struct TimelineMetrics {
|
||||
pub garbage_collect_histo: StorageTimeMetrics,
|
||||
pub find_gc_cutoffs_histo: StorageTimeMetrics,
|
||||
/// copy of LayeredTimeline.current_logical_size
|
||||
pub storage_io_size: StorageIoSizeMetrics,
|
||||
shutdown: std::sync::atomic::AtomicBool,
|
||||
}
|
||||
|
||||
@@ -1726,9 +1636,6 @@ impl TimelineMetrics {
|
||||
&timeline_id,
|
||||
);
|
||||
|
||||
|
||||
let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);
|
||||
|
||||
TimelineMetrics {
|
||||
tenant_id,
|
||||
shard_id,
|
||||
@@ -1740,7 +1647,6 @@ impl TimelineMetrics {
|
||||
imitate_logical_size_histo,
|
||||
garbage_collect_histo,
|
||||
find_gc_cutoffs_histo,
|
||||
storage_io_size,
|
||||
shutdown: std::sync::atomic::AtomicBool::default(),
|
||||
}
|
||||
}
|
||||
@@ -1835,10 +1741,6 @@ impl TimelineMetrics {
|
||||
]);
|
||||
}
|
||||
|
||||
for op in StorageIoSizeOperation::VARIANTS {
|
||||
let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
|
||||
}
|
||||
|
||||
let _ =
|
||||
WAIT_LSN_IN_PROGRESS_MICROS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
|
||||
@@ -2259,8 +2161,6 @@ pub fn preinitialize_metrics(
|
||||
[
|
||||
&WALRECEIVER_STARTED_CONNECTIONS,
|
||||
&WALRECEIVER_BROKER_UPDATES,
|
||||
&WALRECEIVER_CANDIDATES_ADDED,
|
||||
&WALRECEIVER_CANDIDATES_REMOVED,
|
||||
&tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
|
||||
&tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
|
||||
&REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use crate::metrics::{STORAGE_IO_SIZE, StorageIoSizeOperation};
|
||||
use camino::Utf8PathBuf;
|
||||
use chrono::format::{DelayedFormat, StrftimeItems};
|
||||
use futures::Future;
|
||||
@@ -317,8 +316,8 @@ impl SecondaryDetail {
|
||||
}
|
||||
|
||||
fn clear_timeline_metrics(
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
_tenant_shard_id: &TenantShardId,
|
||||
_timeline_id: &TimelineId,
|
||||
detail: SecondaryDetailTimeline,
|
||||
resident_metric: &UIntGauge,
|
||||
) {
|
||||
@@ -330,17 +329,7 @@ impl SecondaryDetail {
|
||||
.sum(),
|
||||
);
|
||||
|
||||
let shard_id = format!("{}", tenant_shard_id.shard_slug());
|
||||
let tenant_id = tenant_shard_id.tenant_id.to_string();
|
||||
let timeline_id = timeline_id.to_string();
|
||||
for op in StorageIoSizeOperation::VARIANTS {
|
||||
let _ = STORAGE_IO_SIZE.remove_label_values(&[
|
||||
op,
|
||||
tenant_id.as_str(),
|
||||
shard_id.as_str(),
|
||||
timeline_id.as_str(),
|
||||
]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Additionally returns the total number of layers, used for more stable relative access time
|
||||
|
||||
@@ -39,10 +39,6 @@ use utils::postgres_client::{
|
||||
use super::walreceiver_connection::{WalConnectionStatus, WalReceiverError};
|
||||
use super::{TaskEvent, TaskHandle, TaskStateUpdate, WalReceiverConf};
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::metrics::{
|
||||
WALRECEIVER_ACTIVE_MANAGERS, WALRECEIVER_BROKER_UPDATES, WALRECEIVER_CANDIDATES_ADDED,
|
||||
WALRECEIVER_CANDIDATES_REMOVED,
|
||||
};
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::{Timeline, debug_assert_current_span_has_tenant_and_timeline_id};
|
||||
|
||||
@@ -76,11 +72,6 @@ pub(super) async fn connection_manager_loop_step(
|
||||
}
|
||||
}
|
||||
|
||||
WALRECEIVER_ACTIVE_MANAGERS.inc();
|
||||
scopeguard::defer! {
|
||||
WALRECEIVER_ACTIVE_MANAGERS.dec();
|
||||
}
|
||||
|
||||
let id = TenantTimelineId {
|
||||
tenant_id: connection_manager_state.timeline.tenant_shard_id.tenant_id,
|
||||
timeline_id: connection_manager_state.timeline.timeline_id,
|
||||
@@ -728,8 +719,6 @@ impl ConnectionManagerState {
|
||||
}
|
||||
};
|
||||
|
||||
WALRECEIVER_BROKER_UPDATES.inc();
|
||||
|
||||
trace!(
|
||||
"safekeeper info update: standby_horizon(cutoff)={}",
|
||||
timeline_update.standby_horizon
|
||||
@@ -756,7 +745,6 @@ impl ConnectionManagerState {
|
||||
%new_safekeeper_id,
|
||||
"New SK node was added",
|
||||
);
|
||||
WALRECEIVER_CANDIDATES_ADDED.inc();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1044,7 +1032,6 @@ impl ConnectionManagerState {
|
||||
"Safekeeper node {node_id} did not send events for over {lagging_wal_timeout:?}, not retrying the connections"
|
||||
);
|
||||
self.wal_connection_retries.remove(&node_id);
|
||||
WALRECEIVER_CANDIDATES_REMOVED.inc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,6 @@ pub use pageserver_api::models::virtual_file as api;
|
||||
use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
|
||||
use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice};
|
||||
|
||||
use crate::assert_u64_eq_usize::UsizeIsU64;
|
||||
use crate::context::RequestContext;
|
||||
use crate::page_cache::{PAGE_SZ, PageWriteGuard};
|
||||
pub(crate) mod io_engine;
|
||||
@@ -905,7 +904,7 @@ impl VirtualFileInner {
|
||||
&self,
|
||||
buf: tokio_epoll_uring::Slice<Buf>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
_ctx: &RequestContext,
|
||||
) -> (tokio_epoll_uring::Slice<Buf>, Result<usize, Error>)
|
||||
where
|
||||
Buf: tokio_epoll_uring::IoBufMut + Send,
|
||||
@@ -922,9 +921,7 @@ impl VirtualFileInner {
|
||||
observe_duration!(StorageIoOperation::Read, {
|
||||
let ((_file_guard, buf), res) = io_engine::get().read_at(file_guard, offset, buf).await;
|
||||
let res = res.maybe_fatal_err("io_engine read_at inside VirtualFileInner::read_at");
|
||||
if let Ok(size) = res {
|
||||
ctx.io_size_metrics().read.add(size.into_u64());
|
||||
}
|
||||
|
||||
(buf, res)
|
||||
})
|
||||
}
|
||||
@@ -945,7 +942,7 @@ impl VirtualFileInner {
|
||||
&self,
|
||||
buf: FullSlice<B>,
|
||||
offset: u64,
|
||||
ctx: &RequestContext,
|
||||
_ctx: &RequestContext,
|
||||
) -> (FullSlice<B>, Result<usize, Error>) {
|
||||
let file_guard = match self.lock_file().await {
|
||||
Ok(file_guard) => file_guard,
|
||||
@@ -954,9 +951,7 @@ impl VirtualFileInner {
|
||||
observe_duration!(StorageIoOperation::Write, {
|
||||
let ((_file_guard, buf), result) =
|
||||
io_engine::get().write_at(file_guard, offset, buf).await;
|
||||
if let Ok(size) = result {
|
||||
ctx.io_size_metrics().write.add(size.into_u64());
|
||||
}
|
||||
|
||||
(buf, result)
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user