From ce1e575db186a4b7c5fe56b58dd6079d6b60ffdf Mon Sep 17 00:00:00 2001 From: BodoBolero Date: Wed, 23 Apr 2025 16:12:01 +0200 Subject: [PATCH] removed many more metrics, still compiles --- libs/metrics/src/lib.rs | 21 +- libs/remote_storage/src/azure_blob.rs | 58 +- libs/remote_storage/src/metrics.rs | 189 +-- libs/remote_storage/src/s3_bucket.rs | 101 +- pageserver/src/consumption_metrics/metrics.rs | 3 +- pageserver/src/context.rs | 1 + pageserver/src/deletion_queue.rs | 12 - pageserver/src/deletion_queue/deleter.rs | 12 - pageserver/src/deletion_queue/list_writer.rs | 14 +- pageserver/src/deletion_queue/validator.rs | 11 +- pageserver/src/disk_usage_eviction_task.rs | 41 +- pageserver/src/import_datadir.rs | 2 - pageserver/src/metrics.rs | 1272 +---------------- pageserver/src/page_service.rs | 3 +- pageserver/src/tenant.rs | 18 +- pageserver/src/tenant/mgr.rs | 4 +- .../src/tenant/remote_timeline_client.rs | 4 +- pageserver/src/tenant/secondary.rs | 41 +- pageserver/src/tenant/secondary/downloader.rs | 77 +- .../src/tenant/secondary/heatmap_uploader.rs | 16 +- pageserver/src/tenant/storage_layer/layer.rs | 14 +- pageserver/src/tenant/tasks.rs | 14 +- pageserver/src/tenant/timeline.rs | 84 +- pageserver/src/tenant/timeline/compaction.rs | 13 +- .../src/tenant/timeline/eviction_task.rs | 12 +- .../timeline/heatmap_layers_downloader.rs | 8 +- pageserver/src/walredo.rs | 25 +- pageserver/src/walredo/process.rs | 10 +- .../src/walredo/process/no_leak_child.rs | 5 +- 29 files changed, 207 insertions(+), 1878 deletions(-) diff --git a/libs/metrics/src/lib.rs b/libs/metrics/src/lib.rs index 4df8d7bc51..e1f77deb5a 100644 --- a/libs/metrics/src/lib.rs +++ b/libs/metrics/src/lib.rs @@ -76,14 +76,7 @@ pub fn gather() -> Vec { mfs } -static DISK_IO_BYTES: Lazy = Lazy::new(|| { - register_int_gauge_vec!( - "libmetrics_disk_io_bytes_total", - "Bytes written and read from disk, grouped by the operation (read|write)", - &["io_operation"] - ) - .expect("Failed to register disk i/o bytes int gauge vec") -}); + static MAXRSS_KB: Lazy = Lazy::new(|| { register_int_gauge!( @@ -261,12 +254,7 @@ const BYTES_IN_BLOCK: i64 = 512; fn update_rusage_metrics() { let rusage_stats = get_rusage_stats(); - DISK_IO_BYTES - .with_label_values(&["read"]) - .set(rusage_stats.ru_inblock * BYTES_IN_BLOCK); - DISK_IO_BYTES - .with_label_values(&["write"]) - .set(rusage_stats.ru_oublock * BYTES_IN_BLOCK); + // On macOS, the unit of maxrss is bytes; on Linux, it's kilobytes. https://stackoverflow.com/a/59915669 #[cfg(target_os = "macos")] @@ -357,10 +345,7 @@ impl GenericCounterPairVec

{ self.get_metric_with_label_values(vals).unwrap() } - pub fn remove_label_values(&self, res: &mut [prometheus::Result<()>; 2], vals: &[&str]) { - res[0] = self.inc.remove_label_values(vals); - res[1] = self.dec.remove_label_values(vals); - } + } impl GenericCounterPair

{ diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs index 18146c5464..e2d77f4126 100644 --- a/libs/remote_storage/src/azure_blob.rs +++ b/libs/remote_storage/src/azure_blob.rs @@ -23,7 +23,6 @@ use futures::future::Either; use futures::stream::Stream; use futures_util::{StreamExt, TryStreamExt}; use http_types::{StatusCode, Url}; -use scopeguard::ScopeGuard; use tokio_util::sync::CancellationToken; use tracing::debug; use utils::backoff; @@ -32,7 +31,7 @@ use utils::backoff::exponential_backoff_duration_seconds; use super::REMOTE_STORAGE_PREFIX_SEPARATOR; use crate::config::AzureConfig; use crate::error::Cancelled; -use crate::metrics::{AttemptOutcome, RequestKind, start_measuring_requests}; +use crate::metrics::RequestKind; use crate::{ ConcurrencyLimiter, Download, DownloadError, DownloadKind, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel, @@ -165,7 +164,7 @@ impl AzureBlobStorage { let mut last_modified = None; let mut metadata = HashMap::new(); - let started_at = start_measuring_requests(kind); + let download = async { let response = builder @@ -237,19 +236,8 @@ impl AzureBlobStorage { TimeoutOrCancel::Cancel => return Err(DownloadError::Cancelled), }, }; - let started_at = ScopeGuard::into_inner(started_at); - let outcome = match &download { - Ok(_) => AttemptOutcome::Ok, - // At this level in the stack 404 and 304 responses do not indicate an error. - // There's expected cases when a blob may not exist or hasn't been modified since - // the last get (e.g. probing for timeline indices and heatmap downloads). - // Callers should handle errors if they are unexpected. - Err(DownloadError::NotFound | DownloadError::Unmodified) => AttemptOutcome::Ok, - Err(_) => AttemptOutcome::Err, - }; - crate::metrics::BUCKET_METRICS - .req_seconds - .observe_elapsed(kind, outcome, started_at); + + download } @@ -431,7 +419,7 @@ impl RemoteStorage for AzureBlobStorage { let kind = RequestKind::Head; let _permit = self.permit(kind, cancel).await?; - let started_at = start_measuring_requests(kind); + let blob_client = self.client.blob_client(self.relative_path_to_name(key)); let properties_future = blob_client.get_properties().into_future(); @@ -443,12 +431,9 @@ impl RemoteStorage for AzureBlobStorage { _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()), }; - if let Ok(inner) = &res { - // do not incl. timeouts as errors in metrics but cancellations - let started_at = ScopeGuard::into_inner(started_at); - crate::metrics::BUCKET_METRICS - .req_seconds - .observe_elapsed(kind, inner, started_at); + if let Ok(_inner) = &res { + + } let data = match res { @@ -476,7 +461,7 @@ impl RemoteStorage for AzureBlobStorage { let kind = RequestKind::Put; let _permit = self.permit(kind, cancel).await?; - let started_at = start_measuring_requests(kind); + let op = async { let blob_client = self.client.blob_client(self.relative_path_to_name(to)); @@ -509,14 +494,7 @@ impl RemoteStorage for AzureBlobStorage { _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()), }; - let outcome = match res { - Ok(_) => AttemptOutcome::Ok, - Err(_) => AttemptOutcome::Err, - }; - let started_at = ScopeGuard::into_inner(started_at); - crate::metrics::BUCKET_METRICS - .req_seconds - .observe_elapsed(kind, outcome, started_at); + res } @@ -562,7 +540,7 @@ impl RemoteStorage for AzureBlobStorage { ) -> anyhow::Result<()> { let kind = RequestKind::Delete; let _permit = self.permit(kind, cancel).await?; - let started_at = start_measuring_requests(kind); + let op = async { // TODO batch requests are not supported by the SDK @@ -628,10 +606,8 @@ impl RemoteStorage for AzureBlobStorage { _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()), }; - let started_at = ScopeGuard::into_inner(started_at); - crate::metrics::BUCKET_METRICS - .req_seconds - .observe_elapsed(kind, &res, started_at); + + res } @@ -647,7 +623,7 @@ impl RemoteStorage for AzureBlobStorage { ) -> anyhow::Result<()> { let kind = RequestKind::Copy; let _permit = self.permit(kind, cancel).await?; - let started_at = start_measuring_requests(kind); + let timeout = tokio::time::sleep(self.timeout); @@ -701,10 +677,8 @@ impl RemoteStorage for AzureBlobStorage { }, }; - let started_at = ScopeGuard::into_inner(started_at); - crate::metrics::BUCKET_METRICS - .req_seconds - .observe_elapsed(kind, &res, started_at); + + res } diff --git a/libs/remote_storage/src/metrics.rs b/libs/remote_storage/src/metrics.rs index 81e68e9a29..f5161ced18 100644 --- a/libs/remote_storage/src/metrics.rs +++ b/libs/remote_storage/src/metrics.rs @@ -1,9 +1,7 @@ -use metrics::{ - Histogram, IntCounter, register_histogram_vec, register_int_counter, register_int_counter_vec, -}; -use once_cell::sync::Lazy; -pub(super) static BUCKET_METRICS: Lazy = Lazy::new(Default::default); + + + #[derive(Clone, Copy, Debug)] pub(crate) enum RequestKind { @@ -16,62 +14,9 @@ pub(crate) enum RequestKind { Head = 6, } -use RequestKind::*; + use scopeguard::ScopeGuard; -impl RequestKind { - const fn as_str(&self) -> &'static str { - match self { - Get => "get_object", - Put => "put_object", - Delete => "delete_object", - List => "list_objects", - Copy => "copy_object", - TimeTravel => "time_travel_recover", - Head => "head_object", - } - } - const fn as_index(&self) -> usize { - *self as usize - } -} - -const REQUEST_KIND_COUNT: usize = 7; -pub(crate) struct RequestTyped([C; REQUEST_KIND_COUNT]); - -impl RequestTyped { - pub(crate) fn get(&self, kind: RequestKind) -> &C { - &self.0[kind.as_index()] - } - - fn build_with(mut f: impl FnMut(RequestKind) -> C) -> Self { - use RequestKind::*; - let mut it = [Get, Put, Delete, List, Copy, TimeTravel, Head].into_iter(); - let arr = std::array::from_fn::(|index| { - let next = it.next().unwrap(); - assert_eq!(index, next.as_index()); - f(next) - }); - - if let Some(next) = it.next() { - panic!("unexpected {next:?}"); - } - - RequestTyped(arr) - } -} - -impl RequestTyped { - pub(crate) fn observe_elapsed(&self, kind: RequestKind, started_at: std::time::Instant) { - self.get(kind).observe(started_at.elapsed().as_secs_f64()) - } -} - -pub(crate) struct PassFailCancelledRequestTyped { - success: RequestTyped, - fail: RequestTyped, - cancelled: RequestTyped, -} #[derive(Debug, Clone, Copy)] pub(crate) enum AttemptOutcome { @@ -89,138 +34,22 @@ impl From<&Result> for AttemptOutcome { } } -impl AttemptOutcome { - pub(crate) fn as_str(&self) -> &'static str { - match self { - AttemptOutcome::Ok => "ok", - AttemptOutcome::Err => "err", - AttemptOutcome::Cancelled => "cancelled", - } - } -} -impl PassFailCancelledRequestTyped { - pub(crate) fn get(&self, kind: RequestKind, outcome: AttemptOutcome) -> &C { - let target = match outcome { - AttemptOutcome::Ok => &self.success, - AttemptOutcome::Err => &self.fail, - AttemptOutcome::Cancelled => &self.cancelled, - }; - target.get(kind) - } - fn build_with(mut f: impl FnMut(RequestKind, AttemptOutcome) -> C) -> Self { - let success = RequestTyped::build_with(|kind| f(kind, AttemptOutcome::Ok)); - let fail = RequestTyped::build_with(|kind| f(kind, AttemptOutcome::Err)); - let cancelled = RequestTyped::build_with(|kind| f(kind, AttemptOutcome::Cancelled)); - PassFailCancelledRequestTyped { - success, - fail, - cancelled, - } - } -} -impl PassFailCancelledRequestTyped { - pub(crate) fn observe_elapsed( - &self, - kind: RequestKind, - outcome: impl Into, - started_at: std::time::Instant, - ) { - self.get(kind, outcome.into()) - .observe(started_at.elapsed().as_secs_f64()) - } -} -/// On drop (cancellation) count towards [`BucketMetrics::cancelled_waits`]. -pub(crate) fn start_counting_cancelled_wait( - kind: RequestKind, -) -> ScopeGuard { - scopeguard::guard_on_success(std::time::Instant::now(), move |_| { - crate::metrics::BUCKET_METRICS - .cancelled_waits - .get(kind) - .inc() - }) -} + + /// On drop (cancellation) add time to [`BucketMetrics::req_seconds`]. pub(crate) fn start_measuring_requests( - kind: RequestKind, + _kind: RequestKind, ) -> ScopeGuard { - scopeguard::guard_on_success(std::time::Instant::now(), move |started_at| { - crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed( - kind, - AttemptOutcome::Cancelled, - started_at, - ) + scopeguard::guard_on_success(std::time::Instant::now(), move |_started_at| { + }) } -pub(crate) struct BucketMetrics { - /// Full request duration until successful completion, error or cancellation. - pub(crate) req_seconds: PassFailCancelledRequestTyped, - /// Total amount of seconds waited on queue. - pub(crate) wait_seconds: RequestTyped, - /// Track how many semaphore awaits were cancelled per request type. - /// - /// This is in case cancellations are happening more than expected. - pub(crate) cancelled_waits: RequestTyped, - /// Total amount of deleted objects in batches or single requests. - pub(crate) deleted_objects_total: IntCounter, -} - -impl Default for BucketMetrics { - fn default() -> Self { - // first bucket 100 microseconds to count requests that do not need to wait at all - // and get a permit immediately - let buckets = [0.0001, 0.01, 0.10, 0.5, 1.0, 5.0, 10.0, 50.0, 100.0]; - - let req_seconds = register_histogram_vec!( - "remote_storage_s3_request_seconds", - "Seconds to complete a request", - &["request_type", "result"], - buckets.to_vec(), - ) - .unwrap(); - let req_seconds = PassFailCancelledRequestTyped::build_with(|kind, outcome| { - req_seconds.with_label_values(&[kind.as_str(), outcome.as_str()]) - }); - - let wait_seconds = register_histogram_vec!( - "remote_storage_s3_wait_seconds", - "Seconds rate limited", - &["request_type"], - buckets.to_vec(), - ) - .unwrap(); - let wait_seconds = - RequestTyped::build_with(|kind| wait_seconds.with_label_values(&[kind.as_str()])); - - let cancelled_waits = register_int_counter_vec!( - "remote_storage_s3_cancelled_waits_total", - "Times a semaphore wait has been cancelled per request type", - &["request_type"], - ) - .unwrap(); - let cancelled_waits = - RequestTyped::build_with(|kind| cancelled_waits.with_label_values(&[kind.as_str()])); - - let deleted_objects_total = register_int_counter!( - "remote_storage_s3_deleted_objects_total", - "Amount of deleted objects in total", - ) - .unwrap(); - - Self { - req_seconds, - wait_seconds, - cancelled_waits, - deleted_objects_total, - } - } -} diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs index ba7ce9e1e7..da47e51960 100644 --- a/libs/remote_storage/src/s3_bucket.rs +++ b/libs/remote_storage/src/s3_bucket.rs @@ -41,7 +41,7 @@ use super::StorageMetadata; use crate::config::S3Config; use crate::error::Cancelled; pub(super) use crate::metrics::RequestKind; -use crate::metrics::{AttemptOutcome, start_counting_cancelled_wait, start_measuring_requests}; +use crate::metrics::{AttemptOutcome, start_measuring_requests}; use crate::support::PermitCarrying; use crate::{ ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, @@ -199,7 +199,7 @@ impl S3Bucket { kind: RequestKind, cancel: &CancellationToken, ) -> Result, Cancelled> { - let started_at = start_counting_cancelled_wait(kind); + let acquire = self.concurrency_limiter.acquire(kind); let permit = tokio::select! { @@ -207,10 +207,8 @@ impl S3Bucket { _ = cancel.cancelled() => return Err(Cancelled), }; - let started_at = ScopeGuard::into_inner(started_at); - crate::metrics::BUCKET_METRICS - .wait_seconds - .observe_elapsed(kind, started_at); + + Ok(permit) } @@ -220,7 +218,7 @@ impl S3Bucket { kind: RequestKind, cancel: &CancellationToken, ) -> Result { - let started_at = start_counting_cancelled_wait(kind); + let acquire = self.concurrency_limiter.acquire_owned(kind); let permit = tokio::select! { @@ -228,10 +226,8 @@ impl S3Bucket { _ = cancel.cancelled() => return Err(Cancelled), }; - let started_at = ScopeGuard::into_inner(started_at); - crate::metrics::BUCKET_METRICS - .wait_seconds - .observe_elapsed(kind, started_at); + + Ok(permit) } @@ -273,11 +269,7 @@ impl S3Bucket { // Count this in the AttemptOutcome::Ok bucket, because 404 is not // an error: we expect to sometimes fetch an object and find it missing, // e.g. when probing for timeline indices. - crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed( - kind, - AttemptOutcome::Ok, - started_at, - ); + return Err(DownloadError::NotFound); } Err(SdkError::ServiceError(e)) @@ -287,19 +279,11 @@ impl S3Bucket { if e.raw().status().as_u16() == StatusCode::NotModified => { // Count an unmodified file as a success. - crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed( - kind, - AttemptOutcome::Ok, - started_at, - ); + return Err(DownloadError::Unmodified); } Err(e) => { - crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed( - kind, - AttemptOutcome::Err, - started_at, - ); + return Err(DownloadError::Other( anyhow::Error::new(e).context("download s3 object"), @@ -346,11 +330,11 @@ impl S3Bucket { delete_objects: &[ObjectIdentifier], cancel: &CancellationToken, ) -> anyhow::Result<()> { - let kind = RequestKind::Delete; + let mut cancel = std::pin::pin!(cancel.cancelled()); for chunk in delete_objects.chunks(MAX_KEYS_PER_DELETE_S3) { - let started_at = start_measuring_requests(kind); + let req = self .client @@ -370,15 +354,10 @@ impl S3Bucket { _ = &mut cancel => return Err(TimeoutOrCancel::Cancel.into()), }; - let started_at = ScopeGuard::into_inner(started_at); - crate::metrics::BUCKET_METRICS - .req_seconds - .observe_elapsed(kind, &resp, started_at); - + + let resp = resp.context("request deletion")?; - crate::metrics::BUCKET_METRICS - .deleted_objects_total - .inc_by(chunk.len() as u64); + if let Some(errors) = resp.errors { // Log a bounded number of the errors within the response: @@ -445,8 +424,8 @@ pin_project_lite::pin_project! { } impl PinnedDrop for TimedDownload { - fn drop(mut this: Pin<&mut Self>) { - crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(RequestKind::Get, this.outcome, this.started_at); + fn drop(mut _this: Pin<&mut Self>) { + } } } @@ -511,7 +490,7 @@ impl RemoteStorage for S3Bucket { let mut continuation_token = None; 'outer: loop { - let started_at = start_measuring_requests(kind); + // min of two Options, returning Some if one is value and another is // None (None is smaller than anything, so plain min doesn't work). @@ -544,11 +523,9 @@ impl RemoteStorage for S3Bucket { .context("Failed to list S3 prefixes") .map_err(DownloadError::Other); - let started_at = ScopeGuard::into_inner(started_at); + - crate::metrics::BUCKET_METRICS - .req_seconds - .observe_elapsed(kind, &response, started_at); + let response = match response { Ok(response) => response, @@ -629,7 +606,7 @@ impl RemoteStorage for S3Bucket { let kind = RequestKind::Head; let _permit = self.permit(kind, cancel).await?; - let started_at = start_measuring_requests(kind); + let head_future = self .client @@ -648,30 +625,18 @@ impl RemoteStorage for S3Bucket { let res = res.map_err(|_e| DownloadError::Timeout)?; // do not incl. timeouts as errors in metrics but cancellations - let started_at = ScopeGuard::into_inner(started_at); - crate::metrics::BUCKET_METRICS - .req_seconds - .observe_elapsed(kind, &res, started_at); - + + let data = match res { Ok(object_output) => object_output, Err(SdkError::ServiceError(e)) if matches!(e.err(), HeadObjectError::NotFound(_)) => { // Count this in the AttemptOutcome::Ok bucket, because 404 is not // an error: we expect to sometimes fetch an object and find it missing, // e.g. when probing for timeline indices. - crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed( - kind, - AttemptOutcome::Ok, - started_at, - ); return Err(DownloadError::NotFound); } Err(e) => { - crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed( - kind, - AttemptOutcome::Err, - started_at, - ); + return Err(DownloadError::Other( anyhow::Error::new(e).context("s3 head object"), @@ -704,7 +669,7 @@ impl RemoteStorage for S3Bucket { let kind = RequestKind::Put; let _permit = self.permit(kind, cancel).await?; - let started_at = start_measuring_requests(kind); + let body = StreamBody::new(from.map(|x| x.map(Frame::data))); let bytes_stream = ByteStream::new(SdkBody::from_body_1_x(body)); @@ -727,12 +692,10 @@ impl RemoteStorage for S3Bucket { _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()), }; - if let Ok(inner) = &res { + if let Ok(_inner) = &res { // do not incl. timeouts as errors in metrics but cancellations - let started_at = ScopeGuard::into_inner(started_at); - crate::metrics::BUCKET_METRICS - .req_seconds - .observe_elapsed(kind, inner, started_at); + + } match res { @@ -753,7 +716,7 @@ impl RemoteStorage for S3Bucket { let timeout = tokio::time::sleep(self.timeout); - let started_at = start_measuring_requests(kind); + // we need to specify bucket_name as a prefix let copy_source = format!( @@ -777,10 +740,8 @@ impl RemoteStorage for S3Bucket { _ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()), }; - let started_at = ScopeGuard::into_inner(started_at); - crate::metrics::BUCKET_METRICS - .req_seconds - .observe_elapsed(kind, &res, started_at); + + res?; diff --git a/pageserver/src/consumption_metrics/metrics.rs b/pageserver/src/consumption_metrics/metrics.rs index 71910011ea..90e02ae0ca 100644 --- a/pageserver/src/consumption_metrics/metrics.rs +++ b/pageserver/src/consumption_metrics/metrics.rs @@ -261,7 +261,7 @@ where let mut tenants = std::pin::pin!(tenants); while let Some((tenant_id, tenant)) = tenants.next().await { - let mut tenant_resident_size = 0; + let tenant_resident_size = 0; for timeline in tenant.list_timelines() { let timeline_id = timeline.timeline_id; @@ -286,7 +286,6 @@ where } } - tenant_resident_size += timeline.resident_physical_size(); } let snap = TenantSnapshot::collect(&tenant, tenant_resident_size); diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index 3fc5b076f3..255d32e9ae 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -136,6 +136,7 @@ pub(crate) enum Scope { // which are shared among all tasks that operate on the timeline, especially // concurrent page_service connections. #[allow(clippy::redundant_allocation)] + #[allow(dead_code)] arc_arc: Arc>, }, #[cfg(test)] UnitTest { diff --git a/pageserver/src/deletion_queue.rs b/pageserver/src/deletion_queue.rs index d9c1c07b10..0dd09bdbb1 100644 --- a/pageserver/src/deletion_queue.rs +++ b/pageserver/src/deletion_queue.rs @@ -27,7 +27,6 @@ use self::list_writer::{DeletionOp, ListWriter, RecoverOp}; use self::validator::Validator; use crate::config::PageServerConf; use crate::controller_upcall_client::StorageControllerUpcallApi; -use crate::metrics; use crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_timeline_path}; use crate::tenant::storage_layer::LayerName; use crate::virtual_file::{MaybeFatalIo, VirtualFile}; @@ -163,11 +162,6 @@ struct TenantDeletionList { generation: Generation, } -impl TenantDeletionList { - pub(crate) fn len(&self) -> usize { - self.timelines.values().map(|v| v.len()).sum() - } -} /// Files ending with this suffix will be ignored and erased /// during recovery as startup. @@ -467,9 +461,6 @@ impl DeletionQueueClient { // they may be historical. assert!(!current_generation.is_none()); - metrics::DELETION_QUEUE - .keys_submitted - .inc_by(layers.len() as u64); self.do_push( &self.tx, ListWriterQueueMessage::Delete(DeletionOp { @@ -553,9 +544,6 @@ impl DeletionQueueClient { &self, objects: Vec, ) -> Result<(), DeletionQueueError> { - metrics::DELETION_QUEUE - .keys_submitted - .inc_by(objects.len() as u64); self.executor_tx .send(DeleterMessage::Delete(objects)) .await diff --git a/pageserver/src/deletion_queue/deleter.rs b/pageserver/src/deletion_queue/deleter.rs index 691ba75cc7..1deecd15a8 100644 --- a/pageserver/src/deletion_queue/deleter.rs +++ b/pageserver/src/deletion_queue/deleter.rs @@ -14,7 +14,6 @@ use tracing::{info, warn}; use utils::{backoff, pausable_failpoint}; use super::{DeletionQueueError, FlushOp}; -use crate::metrics; const AUTOFLUSH_INTERVAL: Duration = Duration::from_secs(10); @@ -60,10 +59,6 @@ impl Deleter { fail::fail_point!("deletion-queue-before-execute", |_| { info!("Skipping execution, failpoint set"); - metrics::DELETION_QUEUE - .remote_errors - .with_label_values(&["failpoint"]) - .inc(); Err(anyhow::anyhow!("failpoint: deletion-queue-before-execute")) }); @@ -90,9 +85,6 @@ impl Deleter { Ok(()) => { // Note: we assume that the remote storage layer returns Ok(()) if some // or all of the deleted objects were already gone. - metrics::DELETION_QUEUE - .keys_executed - .inc_by(self.accumulator.len() as u64); info!( "Executed deletion batch {}..{}", self.accumulator @@ -109,10 +101,6 @@ impl Deleter { return Err(DeletionQueueError::ShuttingDown); } warn!("DeleteObjects request failed: {e:#}, will continue trying"); - metrics::DELETION_QUEUE - .remote_errors - .with_label_values(&["execute"]) - .inc(); } }; } diff --git a/pageserver/src/deletion_queue/list_writer.rs b/pageserver/src/deletion_queue/list_writer.rs index a385e35a02..3de29810b5 100644 --- a/pageserver/src/deletion_queue/list_writer.rs +++ b/pageserver/src/deletion_queue/list_writer.rs @@ -25,7 +25,6 @@ use utils::id::TimelineId; use super::{DeletionHeader, DeletionList, FlushOp, ValidatorQueueMessage}; use crate::config::PageServerConf; use crate::deletion_queue::TEMP_SUFFIX; -use crate::metrics; use crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_layer_path}; use crate::tenant::storage_layer::LayerName; use crate::virtual_file::{MaybeFatalIo, on_fatal_io_error}; @@ -152,7 +151,7 @@ impl ListWriter { } } Err(e) => { - metrics::DELETION_QUEUE.unexpected_errors.inc(); + warn!( sequence = self.pending.sequence, "Failed to write deletion list, will retry later ({e:#})" @@ -180,7 +179,6 @@ impl ListWriter { // This should never happen unless we make a mistake with our serialization. // Ignoring a deletion header is not consequential for correctnes because all deletions // are ultimately allowed to fail: worst case we leak some objects for the scrubber to clean up. - metrics::DELETION_QUEUE.unexpected_errors.inc(); Ok(None) } } @@ -249,7 +247,6 @@ impl ListWriter { .as_str() } else { warn!("Unexpected key in deletion queue: {basename}"); - metrics::DELETION_QUEUE.unexpected_errors.inc(); continue; }; @@ -257,7 +254,6 @@ impl ListWriter { Ok(s) => s, Err(e) => { warn!("Malformed key '{basename}': {e}"); - metrics::DELETION_QUEUE.unexpected_errors.inc(); continue; } }; @@ -286,7 +282,6 @@ impl ListWriter { // Drop the list on the floor: any objects it referenced will be left behind // for scrubbing to clean up. This should never happen unless we have a serialization bug. warn!(sequence = s, "Failed to deserialize deletion list: {e}"); - metrics::DELETION_QUEUE.unexpected_errors.inc(); continue; } }; @@ -329,9 +324,6 @@ impl ListWriter { // We will drop out of recovery if this fails: it indicates that we are shutting down // or the backend has panicked - metrics::DELETION_QUEUE - .keys_submitted - .inc_by(deletion_list.len() as u64); self.tx .send(ValidatorQueueMessage::Delete(deletion_list)) .await?; @@ -353,7 +345,6 @@ impl ListWriter { "Failed to create deletion list directory {}, deletions will not be executed ({e})", self.conf.deletion_prefix(), ); - metrics::DELETION_QUEUE.unexpected_errors.inc(); return; } @@ -422,7 +413,6 @@ impl ListWriter { tracing::error!( "Failed to enqueue deletions, leaking objects. This is a bug." ); - metrics::DELETION_QUEUE.unexpected_errors.inc(); } } } @@ -450,7 +440,6 @@ impl ListWriter { tracing::error!( "Deletion queue recovery called more than once. This is a bug." ); - metrics::DELETION_QUEUE.unexpected_errors.inc(); // Non-fatal: although this is a bug, since we did recovery at least once we may proceed. continue; } @@ -462,7 +451,6 @@ impl ListWriter { info!( "Deletion queue recover aborted, deletion queue will not proceed ({e})" ); - metrics::DELETION_QUEUE.unexpected_errors.inc(); return; } else { self.recovered = true; diff --git a/pageserver/src/deletion_queue/validator.rs b/pageserver/src/deletion_queue/validator.rs index 4e775f15eb..48eff99614 100644 --- a/pageserver/src/deletion_queue/validator.rs +++ b/pageserver/src/deletion_queue/validator.rs @@ -26,7 +26,6 @@ use super::deleter::DeleterMessage; use super::{DeletionHeader, DeletionList, DeletionQueueError, FlushOp, VisibleLsnUpdates}; use crate::config::PageServerConf; use crate::controller_upcall_client::{RetryForeverError, StorageControllerUpcallApi}; -use crate::metrics; use crate::virtual_file::MaybeFatalIo; // After this length of time, do any validation work that is pending, @@ -186,7 +185,6 @@ where "Dropped remote consistent LSN updates for tenant {tenant_id} in stale generation {:?}", tenant_lsn_state.generation ); - metrics::DELETION_QUEUE.dropped_lsn_updates.inc(); } } @@ -221,11 +219,8 @@ where if !this_list_valid { info!("Dropping stale deletions for tenant {tenant_id} in generation {:?}, objects may be leaked", tenant.generation); - metrics::DELETION_QUEUE.keys_dropped.inc_by(tenant.len() as u64); mutated = true; - } else { - metrics::DELETION_QUEUE.keys_validated.inc_by(tenant.len() as u64); - } + } this_list_valid }); list.validated = true; @@ -237,7 +232,7 @@ where // Highly unexpected. Could happen if e.g. disk full. // If we didn't save the trimmed list, it is _not_ valid to execute. warn!("Failed to save modified deletion list {list}: {e:#}"); - metrics::DELETION_QUEUE.unexpected_errors.inc(); + // Rather than have a complex retry process, just drop it and leak the objects, // scrubber will clean up eventually. @@ -276,7 +271,7 @@ where // The save() function logs a warning on error. if let Err(e) = header.save(self.conf).await { warn!("Failed to write deletion queue header: {e:#}"); - metrics::DELETION_QUEUE.unexpected_errors.inc(); + } } } diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs index 13252037e5..575c2feac5 100644 --- a/pageserver/src/disk_usage_eviction_task.rs +++ b/pageserver/src/disk_usage_eviction_task.rs @@ -457,7 +457,6 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl( if let Some(next) = next { match next { Ok(Ok(file_size)) => { - METRICS.layers_evicted.inc(); usage_assumed.add_available_bytes(file_size); } Ok(Err(( @@ -788,7 +787,6 @@ async fn collect_eviction_candidates( eviction_order: EvictionOrder, cancel: &CancellationToken, ) -> anyhow::Result { - const LOG_DURATION_THRESHOLD: std::time::Duration = std::time::Duration::from_secs(10); // get a snapshot of the list of tenants let tenants = tenant_manager @@ -822,7 +820,7 @@ async fn collect_eviction_candidates( continue; } - let started_at = std::time::Instant::now(); + // collect layers from all timelines in this tenant // @@ -917,25 +915,11 @@ async fn collect_eviction_candidates( (partition, candidate) }); - METRICS - .tenant_layer_count - .observe(tenant_candidates.len() as f64); candidates.extend(tenant_candidates); - let elapsed = started_at.elapsed(); - METRICS - .tenant_collection_time - .observe(elapsed.as_secs_f64()); + - if elapsed > LOG_DURATION_THRESHOLD { - tracing::info!( - tenant_id=%tenant.tenant_shard_id().tenant_id, - shard_id=%tenant.tenant_shard_id().shard_slug(), - elapsed_ms = elapsed.as_millis(), - "collection took longer than threshold" - ); - } } // Note: the same tenant ID might be hit twice, if it transitions from attached to @@ -962,7 +946,7 @@ async fn collect_eviction_candidates( layer_info.resident_layers.len() ); - let started_at = std::time::Instant::now(); + layer_info .resident_layers @@ -984,28 +968,13 @@ async fn collect_eviction_candidates( candidate, ) }); - - METRICS - .tenant_layer_count - .observe(tenant_candidates.len() as f64); candidates.extend(tenant_candidates); tokio::task::yield_now().await; - let elapsed = started_at.elapsed(); + - METRICS - .tenant_collection_time - .observe(elapsed.as_secs_f64()); - - if elapsed > LOG_DURATION_THRESHOLD { - tracing::info!( - tenant_id=%tenant.tenant_shard_id().tenant_id, - shard_id=%tenant.tenant_shard_id().shard_slug(), - elapsed_ms = elapsed.as_millis(), - "collection took longer than threshold" - ); - } + } debug_assert!( diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs index 911449c7c5..30edcb446f 100644 --- a/pageserver/src/import_datadir.rs +++ b/pageserver/src/import_datadir.rs @@ -24,7 +24,6 @@ use wal_decoder::models::InterpretedWalRecord; use walkdir::WalkDir; use crate::context::RequestContext; -use crate::metrics::WAL_INGEST; use crate::pgdatadir_mapping::*; use crate::tenant::Timeline; use crate::walingest::{WalIngest, WalIngestErrorKind}; @@ -324,7 +323,6 @@ async fn import_wal( walingest .ingest_record(interpreted, &mut modification, ctx) .await?; - WAL_INGEST.records_committed.inc(); modification.commit(ctx).await?; last_lsn = lsn; diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 8338ce4fbb..4b6a999f0c 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1,16 +1,16 @@ use std::num::NonZeroUsize; use std::sync::Arc; -use std::time::{Duration, Instant}; +use std::time::Instant; -use enum_map::{Enum as _, EnumMap}; +use enum_map::EnumMap; use futures::Future; use metrics::{ - CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, - IntCounterPairVec, IntCounterVec, IntGaugeVec, UIntGauge, UIntGaugeVec, - register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec, - register_int_counter, register_int_counter_pair_vec, register_int_counter_vec, - register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec, + Histogram, IntCounter, + UIntGauge, + register_histogram, + register_int_counter, + register_uint_gauge, }; use once_cell::sync::Lazy; use pageserver_api::config::{ @@ -18,9 +18,7 @@ use pageserver_api::config::{ PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy, }; use pageserver_api::shard::TenantShardId; -use postgres_backend::{QueryError, is_expected_io_error}; -use pq_proto::framed::ConnectionError; -use strum::{IntoEnumIterator as _, VariantNames}; + use strum_macros::{IntoStaticStr, VariantNames}; use utils::id::TimelineId; @@ -28,7 +26,7 @@ use crate::config; use crate::config::PageServerConf; use crate::pgdatadir_mapping::DatadirModificationStats; use crate::task_mgr::TaskKind; -use crate::tenant::Timeline; + use crate::tenant::mgr::TenantSlot; use crate::tenant::storage_layer::PersistentLayerDesc; use crate::tenant::throttle::ThrottleResult; @@ -51,133 +49,14 @@ const CRITICAL_OP_BUCKETS: &[f64] = &[ #[strum(serialize_all = "kebab_case")] pub(crate) enum StorageTimeOperation { - #[strum(serialize = "layer flush delay")] - LayerFlushDelay, - - #[strum(serialize = "compact")] - Compact, - - #[strum(serialize = "create images")] - CreateImages, - - #[strum(serialize = "logical size")] - LogicalSize, - - #[strum(serialize = "imitate logical size")] - ImitateLogicalSize, - - #[strum(serialize = "gc")] - Gc, - - #[strum(serialize = "find gc cutoffs")] - FindGcCutoffs, + + } -pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy = Lazy::new(|| { - register_counter_vec!( - "pageserver_storage_operations_seconds_sum", - "Total time spent on storage operations with operation, tenant and timeline dimensions", - &["operation", "tenant_id", "shard_id", "timeline_id"], - ) - .expect("failed to define a metric") -}); -pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_storage_operations_seconds_count", - "Count of storage operations with operation, tenant and timeline dimensions", - &["operation", "tenant_id", "shard_id", "timeline_id"], - ) - .expect("failed to define a metric") -}); -// Buckets for background operation duration in seconds, like compaction, GC, size calculation. -const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0]; -/// Measures layers visited per read (i.e. read amplification). -/// -/// NB: for a batch, we count all visited layers towards each read. While the cost of layer visits -/// are amortized across the batch, and some layers may not intersect with a given key, each visited -/// layer contributes directly to the observed latency for every read in the batch, which is what we -/// care about. -pub(crate) static LAYERS_PER_READ: Lazy = Lazy::new(|| { - register_histogram_vec!( - "pageserver_layers_per_read", - "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.", - &["tenant_id", "shard_id", "timeline_id"], - // Low resolution to reduce cardinality. - vec![4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0], - ) - .expect("failed to define a metric") -}); -pub(crate) static LAYERS_PER_READ_GLOBAL: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_layers_per_read_global", - "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.", - vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0], - ) - .expect("failed to define a metric") -}); - -pub(crate) static LAYERS_PER_READ_BATCH_GLOBAL: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_layers_per_read_batch_global", - "Layers visited to serve a single read batch (read amplification), regardless of number of reads.", - vec![ - 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0 - ], - ) - .expect("failed to define a metric") -}); - -pub(crate) static LAYERS_PER_READ_AMORTIZED_GLOBAL: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_layers_per_read_amortized_global", - "Layers visited to serve a single read (read amplification). Amortized across a batch: \ - all visited layers are divided by number of reads.", - vec![ - 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0 - ], - ) - .expect("failed to define a metric") -}); - -pub(crate) static DELTAS_PER_READ_GLOBAL: Lazy = Lazy::new(|| { - // We expect this to be low because of Postgres checkpoints. Let's see if that holds. - register_histogram!( - "pageserver_deltas_per_read_global", - "Number of delta pages applied to image page per read", - vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0], - ) - .expect("failed to define a metric") -}); - -pub(crate) static CONCURRENT_INITDBS: Lazy = Lazy::new(|| { - register_uint_gauge!( - "pageserver_concurrent_initdb", - "Number of initdb processes running" - ) - .expect("failed to define a metric") -}); - -pub(crate) static INITDB_SEMAPHORE_ACQUISITION_TIME: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_initdb_semaphore_seconds_global", - "Time spent getting a permit from the global initdb semaphore", - STORAGE_OP_BUCKETS.into() - ) - .expect("failed to define metric") -}); - -pub(crate) static INITDB_RUN_TIME: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_initdb_seconds_global", - "Time spent performing initdb", - STORAGE_OP_BUCKETS.into() - ) - .expect("failed to define metric") -}); #[allow(dead_code)] @@ -194,42 +73,9 @@ pub(crate) enum PageCacheErrorKind { pub(crate) fn page_cache_errors_inc(_error_kind: PageCacheErrorKind) { } -pub(crate) static WAIT_LSN_TIME: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_wait_lsn_seconds", - "Time spent waiting for WAL to arrive. Updated on completion of the wait_lsn operation.", - CRITICAL_OP_BUCKETS.into(), - ) - .expect("failed to define a metric") -}); -pub(crate) static WAIT_LSN_START_FINISH_COUNTERPAIR: Lazy = Lazy::new(|| { - register_int_counter_pair_vec!( - "pageserver_wait_lsn_started_count", - "Number of wait_lsn operations started.", - "pageserver_wait_lsn_finished_count", - "Number of wait_lsn operations finished.", - &["tenant_id", "shard_id", "timeline_id"], - ) - .expect("failed to define a metric") -}); -pub(crate) static WAIT_LSN_IN_PROGRESS_MICROS: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_wait_lsn_in_progress_micros", - "Time spent waiting for WAL to arrive, by timeline_id. Updated periodically while waiting.", - &["tenant_id", "shard_id", "timeline_id"], - ) - .expect("failed to define a metric") -}); -pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy = Lazy::new(|| { - register_int_counter!( - "pageserver_wait_lsn_in_progress_micros_global", - "Time spent waiting for WAL to arrive, globally. Updated periodically while waiting." - ) - .expect("failed to define a metric") -}); pub(crate) mod wait_ondemand_download_time { @@ -240,32 +86,7 @@ pub(crate) mod wait_ondemand_download_time { } } -static LAST_RECORD_LSN: Lazy = Lazy::new(|| { - register_int_gauge_vec!( - "pageserver_last_record_lsn", - "Last record LSN grouped by timeline", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); -static DISK_CONSISTENT_LSN: Lazy = Lazy::new(|| { - register_int_gauge_vec!( - "pageserver_disk_consistent_lsn", - "Disk consistent LSN grouped by timeline", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); - -static PITR_HISTORY_SIZE: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_pitr_history_size", - "Data written since PITR cutoff on this timeline", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); #[derive( strum_macros::EnumIter, @@ -294,103 +115,14 @@ pub(crate) enum LayerLevel { L1, } -static TIMELINE_LAYER_SIZE: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_layer_bytes", - "Sum of frozen, L0, and L1 layer physical sizes in bytes (excluding the open ephemeral layer)", - &["tenant_id", "shard_id", "timeline_id", "level", "kind"] - ) - .expect("failed to define a metric") -}); - -static TIMELINE_LAYER_COUNT: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_layer_count", - "Number of frozen, L0, and L1 layers (excluding the open ephemeral layer)", - &["tenant_id", "shard_id", "timeline_id", "level", "kind"] - ) - .expect("failed to define a metric") -}); - -static TIMELINE_ARCHIVE_SIZE: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_archive_size", - "Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); - -static STANDBY_HORIZON: Lazy = Lazy::new(|| { - register_int_gauge_vec!( - "pageserver_standby_horizon", - "Standby apply LSN for which GC is hold off, by timeline.", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); - -static RESIDENT_PHYSICAL_SIZE: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_resident_physical_size", - "The size of the layer files present in the pageserver's filesystem, for attached locations.", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); - -static VISIBLE_PHYSICAL_SIZE: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_visible_physical_size", - "The size of the layer files present in the pageserver's filesystem.", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); -pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy = Lazy::new(|| { - register_int_counter!( - "pageserver_remote_ondemand_downloaded_layers_total", - "Total on-demand downloaded layers" - ) - .unwrap() -}); -pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy = Lazy::new(|| { - register_int_counter!( - "pageserver_remote_ondemand_downloaded_bytes_total", - "Total bytes of layers on-demand downloaded", - ) - .unwrap() -}); -static CURRENT_LOGICAL_SIZE: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_current_logical_size", - "Current logical size grouped by timeline", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define current logical size metric") -}); -static AUX_FILE_SIZE: Lazy = Lazy::new(|| { - register_int_gauge_vec!( - "pageserver_aux_file_estimated_size", - "The size of all aux files for a timeline in aux file v2 store.", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); -static VALID_LSN_LEASE_COUNT: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_valid_lsn_lease_count", - "The number of valid leases after refreshing gc info.", - &["tenant_id", "shard_id", "timeline_id"], - ) - .expect("failed to define a metric") -}); + + pub(crate) static CIRCUIT_BREAKERS_BROKEN: Lazy = Lazy::new(|| { register_int_counter!( @@ -492,46 +224,8 @@ pub(crate) mod initial_logical_size { }); } -static DIRECTORY_ENTRIES_COUNT: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_directory_entries_count", - "Sum of the entries in pageserver-stored directory listings", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); -pub(crate) static EVICTION_ITERATION_DURATION: Lazy = Lazy::new(|| { - register_histogram_vec!( - "pageserver_eviction_iteration_duration_seconds_global", - "Time spent on a single eviction iteration", - &["period_secs", "threshold_secs"], - STORAGE_OP_BUCKETS.into(), - ) - .expect("failed to define a metric") -}); - -static EVICTIONS: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_evictions", - "Number of layers evicted from the pageserver", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); - -/// How long did we take to start up? Broken down by labels to describe -/// different phases of startup. -pub static STARTUP_DURATION: Lazy = Lazy::new(|| { - register_gauge_vec!( - "pageserver_startup_duration_seconds", - "Time taken by phases of pageserver startup, in seconds", - &["phase"] - ) - .expect("Failed to register pageserver_startup_duration_seconds metric") -}); - pub static STARTUP_IS_LOADING: Lazy = Lazy::new(|| { register_uint_gauge!( "pageserver_startup_is_loading", @@ -556,8 +250,6 @@ pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy = Lazy::new(|| { /// at a timeline level than tenant level. pub(crate) struct TenantMetrics { /// How long did tenants take to go from construction to active state? - pub(crate) activation: Histogram, - pub(crate) preload: Histogram, pub(crate) attach: Histogram, /// How many tenants are included in the initial startup of the pagesrever? @@ -567,18 +259,8 @@ pub(crate) struct TenantMetrics { pub(crate) static TENANT: Lazy = Lazy::new(|| { TenantMetrics { - activation: register_histogram!( - "pageserver_tenant_activation_seconds", - "Time taken by tenants to activate, in seconds", - CRITICAL_OP_BUCKETS.into() - ) - .expect("Failed to register metric"), - preload: register_histogram!( - "pageserver_tenant_preload_seconds", - "Time taken by tenants to load remote metadata on startup/attach, in seconds", - CRITICAL_OP_BUCKETS.into() - ) - .expect("Failed to register metric"), + + attach: register_histogram!( "pageserver_tenant_attach_seconds", "Time taken by tenants to intialize, after remote metadata is already loaded", @@ -748,145 +430,21 @@ pub enum GetPageBatchBreakReason { pub(crate) struct SmgrQueryTimePerTimeline { } -static SMGR_QUERY_STARTED_GLOBAL: Lazy = Lazy::new(|| { - register_int_counter_vec!( - // it's a counter, but, name is prepared to extend it to a histogram of queue depth - "pageserver_smgr_query_started_global_count", - "Number of smgr queries started, aggregated by query type.", - &["smgr_query_type"], - ) - .expect("failed to define a metric") -}); - -static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy = Lazy::new(|| { - register_int_counter_vec!( - // it's a counter, but, name is prepared to extend it to a histogram of queue depth - "pageserver_smgr_query_started_count", - "Number of smgr queries started, aggregated by query type and tenant/timeline.", - &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"], - ) - .expect("failed to define a metric") -}); - -// Alias so all histograms recording per-timeline smgr timings use the same buckets. -static SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS: &[f64] = CRITICAL_OP_BUCKETS; - -static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy = Lazy::new(|| { - register_histogram_vec!( - "pageserver_smgr_query_seconds", - "Time spent _executing_ smgr query handling, excluding batch and throttle delays.", - &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"], - SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(), - ) - .expect("failed to define a metric") -}); - -static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy> = Lazy::new(|| { - [ - 1, - 10, - 20, - 40, - 60, - 80, - 100, - 200, - 300, - 400, - 500, - 600, - 700, - 800, - 900, - 1_000, // 1ms - 2_000, - 4_000, - 6_000, - 8_000, - 10_000, // 10ms - 20_000, - 40_000, - 60_000, - 80_000, - 100_000, - 200_000, - 400_000, - 600_000, - 800_000, - 1_000_000, // 1s - 2_000_000, - 4_000_000, - 6_000_000, - 8_000_000, - 10_000_000, // 10s - 20_000_000, - 50_000_000, - 100_000_000, - 200_000_000, - 1_000_000_000, // 1000s - ] - .into_iter() - .map(Duration::from_micros) - .map(|d| d.as_secs_f64()) - .collect() -}); - -static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy> = Lazy::new(|| { - (1..=u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap()) - .map(|v| v.into()) - .collect() -}); - -static PAGE_SERVICE_BATCH_SIZE_GLOBAL: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_page_service_batch_size_global", - "Batch size of pageserver page service requests", - PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL.clone(), - ) - .expect("failed to define a metric") -}); - -static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy> = Lazy::new(|| { - let mut buckets = Vec::new(); - for i in 0.. { - let bucket = 1 << i; - if bucket > u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap() { - break; - } - buckets.push(bucket.into()); - } - buckets -}); - -static PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy = Lazy::new(|| { - register_histogram_vec!( - "pageserver_page_service_batch_size", - "Batch size of pageserver page service requests", - &["tenant_id", "shard_id", "timeline_id"], - PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE.clone() - ) - .expect("failed to define a metric") -}); - -static PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL: Lazy = Lazy::new(|| { - register_int_counter_vec!( - // it's a counter, but, name is prepared to extend it to a histogram of queue depth - "pageserver_page_service_batch_break_reason_global", - "Reason for breaking batches of get page requests", - &["reason"], - ) - .expect("failed to define a metric") -}); -static PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_page_service_batch_break_reason", - "Reason for breaking batches of get page requests", - &["tenant_id", "shard_id", "timeline_id", "reason"], - ) - .expect("failed to define a metric") -}); + + + + + + + + + + + + + fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) { @@ -914,44 +472,12 @@ fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) { }; } -static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_page_service_pagestream_flush_in_progress_micros", - "Counter that sums up the microseconds that a pagestream response was being flushed into the TCP connection. \ - If the flush is particularly slow, this counter will be updated periodically to make slow flushes \ - easily discoverable in monitoring. \ - Hence, this is NOT a completion latency historgram.", - &["tenant_id", "shard_id", "timeline_id"], - ) - .expect("failed to define a metric") -}); -static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL: Lazy = Lazy::new(|| { - register_int_counter!( - "pageserver_page_service_pagestream_flush_in_progress_micros_global", - "Like pageserver_page_service_pagestream_flush_in_progress_seconds, but instance-wide.", - ) - .expect("failed to define a metric") -}); -static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME: Lazy = Lazy::new(|| { - register_histogram_vec!( - "pageserver_page_service_pagestream_batch_wait_time_seconds", - "Time a request spent waiting in its batch until the batch moved to throttle&execution.", - &["tenant_id", "shard_id", "timeline_id"], - SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(), - ) - .expect("failed to define a metric") -}); -static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_page_service_pagestream_batch_wait_time_seconds_global", - "Like pageserver_page_service_pagestream_batch_wait_time_seconds, but aggregated to instance level.", - SMGR_QUERY_TIME_GLOBAL_BUCKETS.to_vec(), - ) - .expect("failed to define a metric") -}); + + + impl SmgrQueryTimePerTimeline { pub(crate) fn new( @@ -972,69 +498,7 @@ impl SmgrQueryTimePerTimeline { } -// keep in sync with control plane Go code so that we can validate -// compute's basebackup_ms metric with our perspective in the context of SLI/SLO. -static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| { - // Go code uses milliseconds. Variable is called `computeStartupBuckets` - [ - 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000, - 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000, - ] - .map(|ms| (ms as f64) / 1000.0) -}); -pub(crate) struct BasebackupQueryTime { - ok: Histogram, - error: Histogram, - client_error: Histogram, -} - -pub(crate) static BASEBACKUP_QUERY_TIME: Lazy = Lazy::new(|| { - let vec = register_histogram_vec!( - "pageserver_basebackup_query_seconds", - "Histogram of basebackup queries durations, by result type", - &["result"], - COMPUTE_STARTUP_BUCKETS.to_vec(), - ) - .expect("failed to define a metric"); - BasebackupQueryTime { - ok: vec.get_metric_with_label_values(&["ok"]).unwrap(), - error: vec.get_metric_with_label_values(&["error"]).unwrap(), - client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(), - } -}); - -pub(crate) struct BasebackupQueryTimeOngoingRecording<'a> { - parent: &'a BasebackupQueryTime, - start: std::time::Instant, -} - -impl BasebackupQueryTime { - pub(crate) fn start_recording(&self) -> BasebackupQueryTimeOngoingRecording<'_> { - let start = Instant::now(); - BasebackupQueryTimeOngoingRecording { - parent: self, - start, - } - } -} - -impl BasebackupQueryTimeOngoingRecording<'_> { - pub(crate) fn observe(self, res: &Result) { - let elapsed = self.start.elapsed().as_secs_f64(); - // If you want to change categorize of a specific error, also change it in `log_query_error`. - let metric = match res { - Ok(_) => &self.parent.ok, - Err(QueryError::Disconnected(ConnectionError::Io(io_error))) - if is_expected_io_error(io_error) => - { - &self.parent.client_error - } - Err(_) => &self.parent.error, - }; - metric.observe(elapsed); - } -} #[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)] pub(crate) enum ComputeCommandKind { @@ -1087,106 +551,6 @@ pub(crate) static TENANT_MANAGER: Lazy = Lazy::new(|| { } }); -pub(crate) struct DeletionQueueMetrics { - pub(crate) keys_submitted: IntCounter, - pub(crate) keys_dropped: IntCounter, - pub(crate) keys_executed: IntCounter, - pub(crate) keys_validated: IntCounter, - pub(crate) dropped_lsn_updates: IntCounter, - pub(crate) unexpected_errors: IntCounter, - pub(crate) remote_errors: IntCounterVec, -} -pub(crate) static DELETION_QUEUE: Lazy = Lazy::new(|| { - DeletionQueueMetrics{ - - keys_submitted: register_int_counter!( - "pageserver_deletion_queue_submitted_total", - "Number of objects submitted for deletion" - ) - .expect("failed to define a metric"), - - keys_dropped: register_int_counter!( - "pageserver_deletion_queue_dropped_total", - "Number of object deletions dropped due to stale generation." - ) - .expect("failed to define a metric"), - - keys_executed: register_int_counter!( - "pageserver_deletion_queue_executed_total", - "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion" - ) - .expect("failed to define a metric"), - - keys_validated: register_int_counter!( - "pageserver_deletion_queue_validated_total", - "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage." - ) - .expect("failed to define a metric"), - - dropped_lsn_updates: register_int_counter!( - "pageserver_deletion_queue_dropped_lsn_updates_total", - "Updates to remote_consistent_lsn dropped due to stale generation number." - ) - .expect("failed to define a metric"), - unexpected_errors: register_int_counter!( - "pageserver_deletion_queue_unexpected_errors_total", - "Number of unexpected condiions that may stall the queue: any value above zero is unexpected." - ) - .expect("failed to define a metric"), - remote_errors: register_int_counter_vec!( - "pageserver_deletion_queue_remote_errors_total", - "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects", - &["op_kind"], - ) - .expect("failed to define a metric") -} -}); - -pub(crate) struct SecondaryModeMetrics { - pub(crate) upload_heatmap: IntCounter, - pub(crate) upload_heatmap_errors: IntCounter, - pub(crate) upload_heatmap_duration: Histogram, - pub(crate) download_heatmap: IntCounter, - pub(crate) download_layer: IntCounter, -} -pub(crate) static SECONDARY_MODE: Lazy = Lazy::new(|| { - SecondaryModeMetrics { - upload_heatmap: register_int_counter!( - "pageserver_secondary_upload_heatmap", - "Number of heatmaps written to remote storage by attached tenants" - ) - .expect("failed to define a metric"), - upload_heatmap_errors: register_int_counter!( - "pageserver_secondary_upload_heatmap_errors", - "Failures writing heatmap to remote storage" - ) - .expect("failed to define a metric"), - upload_heatmap_duration: register_histogram!( - "pageserver_secondary_upload_heatmap_duration", - "Time to build and upload a heatmap, including any waiting inside the remote storage client" - ) - .expect("failed to define a metric"), - download_heatmap: register_int_counter!( - "pageserver_secondary_download_heatmap", - "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed" - ) - .expect("failed to define a metric"), - download_layer: register_int_counter!( - "pageserver_secondary_download_layer", - "Number of downloads of layers by secondary mode locations" - ) - .expect("failed to define a metric"), -} -}); - -pub(crate) static SECONDARY_RESIDENT_PHYSICAL_SIZE: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_secondary_resident_physical_size", - "The size of the layer files present in the pageserver's filesystem, for secondary locations.", - &["tenant_id", "shard_id"] - ) - .expect("failed to define a metric") -}); pub(crate) static NODE_UTILIZATION_SCORE: Lazy = Lazy::new(|| { register_uint_gauge!( @@ -1196,14 +560,6 @@ pub(crate) static NODE_UTILIZATION_SCORE: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); -pub(crate) static SECONDARY_HEATMAP_TOTAL_SIZE: Lazy = Lazy::new(|| { - register_uint_gauge_vec!( - "pageserver_secondary_heatmap_total_size", - "The total size in bytes of all layers in the most recently downloaded heatmap.", - &["tenant_id", "shard_id"] - ) - .expect("failed to define a metric") -}); #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum RemoteOpKind { @@ -1235,23 +591,7 @@ impl RemoteOpFileKind { } } -pub(crate) static TENANT_TASK_EVENTS: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_tenant_task_events", - "Number of task start/stop/fail events.", - &["event"], - ) - .expect("Failed to register tenant_task_events metric") -}); -pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_background_loop_period_overrun_count", - "Incremented whenever warn_when_period_overrun() logs a warning.", - &["task", "period"], - ) - .expect("failed to define a metric") -}); // walreceiver metrics @@ -1264,57 +604,11 @@ pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy = Lazy::new( }); -pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy = Lazy::new(|| { - register_int_counter!( - "pageserver_walreceiver_broker_updates_total", - "Number of received broker updates in walreceiver" - ) - .expect("failed to define a metric") -}); -// Metrics collected on WAL redo operations -// -// We collect the time spent in actual WAL redo ('redo'), and time waiting -// for access to the postgres process ('wait') since there is only one for -// each tenant. -/// Time buckets are small because we want to be able to measure the -/// smallest redo processing times. These buckets allow us to measure down -/// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec. -/// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec. -/// -/// Values up to 1s are recorded because metrics show that we have redo -/// durations and lock times larger than 0.250s. -macro_rules! redo_histogram_time_buckets { - () => { - vec![ - 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000, - 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000, - 1.000_000, - ] - }; -} -/// While we're at it, also measure the amount of records replayed in each -/// operation. We have a global 'total replayed' counter, but that's not -/// as useful as 'what is the skew for how many records we replay in one -/// operation'. -macro_rules! redo_histogram_count_buckets { - () => { - vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0] - }; -} -macro_rules! redo_bytes_histogram_count_buckets { - () => { - // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets) - // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too. - vec![ - 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0, - 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0, - ] - }; -} + pub(crate) struct WalIngestMetrics { pub(crate) bytes_received: IntCounter, @@ -1379,73 +673,14 @@ pub(crate) static WAL_INGEST: Lazy = Lazy::new(|| { } }); -pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_timeline_wal_records_received", - "Number of WAL records received per shard", - &["tenant_id", "shard_id", "timeline_id"] - ) - .expect("failed to define a metric") -}); -pub(crate) static WAL_REDO_TIME: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_wal_redo_seconds", - "Time spent on WAL redo", - redo_histogram_time_buckets!() - ) - .expect("failed to define a metric") -}); -pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_wal_redo_records_histogram", - "Histogram of number of records replayed per redo in the Postgres WAL redo process", - redo_histogram_count_buckets!(), - ) - .expect("failed to define a metric") -}); -pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_wal_redo_bytes_histogram", - "Histogram of number of records replayed per redo sent to Postgres", - redo_bytes_histogram_count_buckets!(), - ) - .expect("failed to define a metric") -}); -// FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count? -pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy = Lazy::new(|| { - register_int_counter!( - "pageserver_replayed_wal_records_total", - "Number of WAL records replayed in WAL redo process" - ) - .unwrap() -}); -#[rustfmt::skip] -pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy = Lazy::new(|| { - register_histogram!( - "pageserver_wal_redo_process_launch_duration", - "Histogram of the duration of successful WalRedoProcess::launch calls", - vec![ - 0.0002, 0.0004, 0.0006, 0.0008, 0.0010, - 0.0020, 0.0040, 0.0060, 0.0080, 0.0100, - 0.0200, 0.0400, 0.0600, 0.0800, 0.1000, - 0.2000, 0.4000, 0.6000, 0.8000, 1.0000, - 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000 - ], - ) - .expect("failed to define a metric") -}); -pub(crate) struct WalRedoProcessCounters { - pub(crate) started: IntCounter, - pub(crate) killed_by_cause: EnumMap, - pub(crate) active_stderr_logger_tasks_started: IntCounter, - pub(crate) active_stderr_logger_tasks_finished: IntCounter, -} + + #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)] pub(crate) enum WalRedoKillCause { @@ -1454,132 +689,17 @@ pub(crate) enum WalRedoKillCause { Startup, } -impl Default for WalRedoProcessCounters { - fn default() -> Self { - let started = register_int_counter!( - "pageserver_wal_redo_process_started_total", - "Number of WAL redo processes started", - ) - .unwrap(); - let killed = register_int_counter_vec!( - "pageserver_wal_redo_process_stopped_total", - "Number of WAL redo processes stopped", - &["cause"], - ) - .unwrap(); - let active_stderr_logger_tasks_started = register_int_counter!( - "pageserver_walredo_stderr_logger_tasks_started_total", - "Number of active walredo stderr logger tasks that have started", - ) - .unwrap(); - let active_stderr_logger_tasks_finished = register_int_counter!( - "pageserver_walredo_stderr_logger_tasks_finished_total", - "Number of active walredo stderr logger tasks that have finished", - ) - .unwrap(); - Self { - started, - killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| { - let cause = WalRedoKillCause::from_usize(i); - let cause_str: &'static str = cause.into(); - killed.with_label_values(&[cause_str]) - })), - active_stderr_logger_tasks_started, - active_stderr_logger_tasks_finished, - } - } -} - -pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy = - Lazy::new(WalRedoProcessCounters::default); - -/// Similar to `prometheus::HistogramTimer` but does not record on drop. -pub(crate) struct StorageTimeMetricsTimer { - start: Instant, -} - -impl StorageTimeMetricsTimer { - fn new(_metrics: StorageTimeMetrics) -> Self { - Self { - start: Instant::now(), - } - } - - /// Returns the elapsed duration of the timer. - pub fn elapsed(&self) -> Duration { - self.start.elapsed() - } - - /// Record the time from creation to now and return it. - pub fn stop_and_record(self) -> Duration { - self.elapsed() - } - - /// Turns this timer into a timer, which will always record -- usually this means recording - /// regardless an early `?` path was taken in a function. - pub(crate) fn record_on_drop(self) -> AlwaysRecordingStorageTimeMetricsTimer { - AlwaysRecordingStorageTimeMetricsTimer(Some(self)) - } -} - -pub(crate) struct AlwaysRecordingStorageTimeMetricsTimer(Option); - -impl Drop for AlwaysRecordingStorageTimeMetricsTimer { - fn drop(&mut self) { - if let Some(inner) = self.0.take() { - inner.stop_and_record(); - } - } -} - -impl AlwaysRecordingStorageTimeMetricsTimer { - /// Returns the elapsed duration of the timer. - pub fn elapsed(&self) -> Duration { - self.0.as_ref().expect("not dropped yet").elapsed() - } -} - -/// Timing facilities for an globally histogrammed metric, which is supported by per tenant and -/// timeline total sum and count. -#[derive(Clone, Debug)] -pub(crate) struct StorageTimeMetrics { -} - -impl StorageTimeMetrics { - pub fn new( - _operation: StorageTimeOperation, - _tenant_id: &str, - _shard_id: &str, - _timeline_id: &str, - ) -> Self { - - StorageTimeMetrics { - } - } - - /// Starts timing a new operation. - /// - /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop. - pub fn start_timer(&self) -> StorageTimeMetricsTimer { - StorageTimeMetricsTimer::new(self.clone()) - } -} pub(crate) struct TimelineMetrics { tenant_id: String, shard_id: String, timeline_id: String, - pub flush_delay_histo: StorageTimeMetrics, - pub compact_time_histo: StorageTimeMetrics, - pub create_images_time_histo: StorageTimeMetrics, - pub logical_size_histo: StorageTimeMetrics, - pub imitate_logical_size_histo: StorageTimeMetrics, - pub garbage_collect_histo: StorageTimeMetrics, - pub find_gc_cutoffs_histo: StorageTimeMetrics, + + /// copy of LayeredTimeline.current_logical_size shutdown: std::sync::atomic::AtomicBool, } @@ -1592,81 +712,20 @@ impl TimelineMetrics { let tenant_id = tenant_shard_id.tenant_id.to_string(); let shard_id = format!("{}", tenant_shard_id.shard_slug()); let timeline_id = timeline_id_raw.to_string(); - - let flush_delay_histo = StorageTimeMetrics::new( - StorageTimeOperation::LayerFlushDelay, - &tenant_id, - &shard_id, - &timeline_id, - ); - let compact_time_histo = StorageTimeMetrics::new( - StorageTimeOperation::Compact, - &tenant_id, - &shard_id, - &timeline_id, - ); - let create_images_time_histo = StorageTimeMetrics::new( - StorageTimeOperation::CreateImages, - &tenant_id, - &shard_id, - &timeline_id, - ); - let logical_size_histo = StorageTimeMetrics::new( - StorageTimeOperation::LogicalSize, - &tenant_id, - &shard_id, - &timeline_id, - ); - let imitate_logical_size_histo = StorageTimeMetrics::new( - StorageTimeOperation::ImitateLogicalSize, - &tenant_id, - &shard_id, - &timeline_id, - ); - let garbage_collect_histo = StorageTimeMetrics::new( - StorageTimeOperation::Gc, - &tenant_id, - &shard_id, - &timeline_id, - ); - let find_gc_cutoffs_histo = StorageTimeMetrics::new( - StorageTimeOperation::FindGcCutoffs, - &tenant_id, - &shard_id, - &timeline_id, - ); + + TimelineMetrics { tenant_id, shard_id, timeline_id, - flush_delay_histo, - compact_time_histo, - create_images_time_histo, - logical_size_histo, - imitate_logical_size_histo, - garbage_collect_histo, - find_gc_cutoffs_histo, + + + shutdown: std::sync::atomic::AtomicBool::default(), } } - - pub(crate) fn resident_physical_size_sub(&self, _sz: u64) { - // self.resident_physical_size_gauge.sub(sz); - // crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz); - } - - pub(crate) fn resident_physical_size_add(&self, _sz: u64) { - // self.resident_physical_size_gauge.add(sz); - // crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz); - } - - pub(crate) fn resident_physical_size_get(&self) -> u64 { - // self.resident_physical_size_gauge.get() - 0 // FIXME: Return dummy value as gauge access is commented out - } - /// Removes a persistent layer from TIMELINE_LAYER metrics. pub fn dec_layer(&self, _layer_desc: &PersistentLayerDesc) { } @@ -1690,118 +749,25 @@ impl TimelineMetrics { let tenant_id = &self.tenant_id; let timeline_id = &self.timeline_id; let shard_id = &self.shard_id; - let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); - let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]); - let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]); - { - // RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get()); - let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); - } - let _ = VISIBLE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); - let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); - if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) { - let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]); - } + - let _ = TIMELINE_ARCHIVE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); - let _ = PITR_HISTORY_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); - - for ref level in LayerLevel::iter() { - for ref kind in LayerKind::iter() { - let labels: [&str; 5] = - [tenant_id, shard_id, timeline_id, level.into(), kind.into()]; - let _ = TIMELINE_LAYER_SIZE.remove_label_values(&labels); - let _ = TIMELINE_LAYER_COUNT.remove_label_values(&labels); - } - } - - let _ = LAYERS_PER_READ.remove_label_values(&[tenant_id, shard_id, timeline_id]); - - let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]); - let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); - let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]); // The following metrics are born outside of the TimelineMetrics lifecycle but still // removed at the end of it. The idea is to have the metrics outlive the // entity during which they're observed, e.g., the smgr metrics shall // outlive an individual smgr connection, but not the timeline. - for op in StorageTimeOperation::VARIANTS { - let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[ - op, - tenant_id, - shard_id, - timeline_id, - ]); - let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[ - op, - tenant_id, - shard_id, - timeline_id, - ]); - } + - let _ = - WAIT_LSN_IN_PROGRESS_MICROS.remove_label_values(&[tenant_id, shard_id, timeline_id]); - - { - let mut res = [Ok(()), Ok(())]; - WAIT_LSN_START_FINISH_COUNTERPAIR - .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id]); - } + wait_ondemand_download_time::shutdown_timeline(tenant_id, shard_id, timeline_id); - let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[ - SmgrQueryType::GetPageAtLsn.into(), - tenant_id, - shard_id, - timeline_id, - ]); - let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[ - SmgrQueryType::GetPageAtLsn.into(), - tenant_id, - shard_id, - timeline_id, - ]); - let _ = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE.remove_label_values(&[ - tenant_id, - shard_id, - timeline_id, - ]); - let _ = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED.remove_label_values(&[ - tenant_id, - shard_id, - timeline_id, - ]); - let _ = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS.remove_label_values(&[ - tenant_id, - shard_id, - timeline_id, - ]); - let _ = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME.remove_label_values(&[ - tenant_id, - shard_id, - timeline_id, - ]); - - for reason in GetPageBatchBreakReason::iter() { - let _ = PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE.remove_label_values(&[ - tenant_id, - shard_id, - timeline_id, - reason.into(), - ]); - } + + } } -pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) { - - tenant_throttling::remove_tenant_metrics(tenant_shard_id); - - // we leave the BROKEN_TENANTS_SET entry if any -} /// Wrapper future that measures the time spent by a remote storage operation, @@ -1825,7 +791,7 @@ pub mod tokio_epoll_uring { use std::collections::HashMap; use std::sync::{Arc, Mutex}; - use metrics::{Histogram, LocalHistogram, UIntGauge, register_histogram, register_int_counter}; + use metrics::{Histogram, LocalHistogram, UIntGauge, register_histogram}; use once_cell::sync::Lazy; /// Shared storage for tokio-epoll-uring thread local metrics. @@ -1964,21 +930,7 @@ pub mod tokio_epoll_uring { } } - pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy = Lazy::new(|| { - register_int_counter!( - "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count", - "Number of times where thread_local_system creation spanned multiple executor threads", - ) - .unwrap() - }); - - pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy = Lazy::new(|| { - register_int_counter!( - "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count", - "Number of times thread_local_system creation failed and was retried after back-off.", - ) - .unwrap() - }); + } pub(crate) mod tenant_throttling { use metrics::register_int_counter_vec; @@ -1996,14 +948,7 @@ pub(crate) mod tenant_throttling { ) .unwrap() }); - static COUNT_ACCOUNTED_START_PER_TENANT: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_tenant_throttling_count_accounted_start", - "Count of tenant throttling starts, by kind of throttle.", - &["kind", "tenant_id", "shard_id"] - ) - .unwrap() - }); + static COUNT_ACCOUNTED_FINISH: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_tenant_throttling_count_accounted_finish_global", @@ -2012,14 +957,7 @@ pub(crate) mod tenant_throttling { ) .unwrap() }); - static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_tenant_throttling_count_accounted_finish", - "Count of tenant throttling finishes, by kind of throttle.", - &["kind", "tenant_id", "shard_id"] - ) - .unwrap() - }); + static WAIT_USECS: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_tenant_throttling_wait_usecs_sum_global", @@ -2028,14 +966,7 @@ pub(crate) mod tenant_throttling { ) .unwrap() }); - static WAIT_USECS_PER_TENANT: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_tenant_throttling_wait_usecs_sum", - "Sum of microseconds that spent waiting throttle by kind of throttle.", - &["kind", "tenant_id", "shard_id"] - ) - .unwrap() - }); + static WAIT_COUNT: Lazy = Lazy::new(|| { register_int_counter_vec!( @@ -2045,16 +976,9 @@ pub(crate) mod tenant_throttling { ) .unwrap() }); - static WAIT_COUNT_PER_TENANT: Lazy = Lazy::new(|| { - register_int_counter_vec!( - "pageserver_tenant_throttling_count", - "Count of tenant throttlings, by kind of throttle.", - &["kind", "tenant_id", "shard_id"] - ) - .unwrap() - }); + - const KINDS: &[&str] = &["pagestream"]; + pub type Pagestream = Metrics<0>; impl Metrics { @@ -2071,50 +995,20 @@ pub(crate) mod tenant_throttling { Lazy::force(&WAIT_COUNT); } - pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) { - for m in &[ - &COUNT_ACCOUNTED_START_PER_TENANT, - &COUNT_ACCOUNTED_FINISH_PER_TENANT, - &WAIT_USECS_PER_TENANT, - &WAIT_COUNT_PER_TENANT, - ] { - for kind in KINDS { - let _ = m.remove_label_values(&[ - kind, - &tenant_shard_id.tenant_id.to_string(), - &tenant_shard_id.shard_slug().to_string(), - ]); - } - } - } } pub(crate) mod disk_usage_based_eviction { use super::*; pub(crate) struct Metrics { - pub(crate) tenant_collection_time: Histogram, - pub(crate) tenant_layer_count: Histogram, + pub(crate) layers_collected: IntCounter, pub(crate) layers_selected: IntCounter, - pub(crate) layers_evicted: IntCounter, } impl Default for Metrics { fn default() -> Self { - let tenant_collection_time = register_histogram!( - "pageserver_disk_usage_based_eviction_tenant_collection_seconds", - "Time spent collecting layers from a tenant -- not normalized by collected layer amount", - vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0] - ) - .unwrap(); - - let tenant_layer_count = register_histogram!( - "pageserver_disk_usage_based_eviction_tenant_collected_layers", - "Amount of layers gathered from a tenant", - vec![5.0, 50.0, 500.0, 5000.0, 50000.0] - ) - .unwrap(); + let layers_collected = register_int_counter!( "pageserver_disk_usage_based_eviction_collected_layers_total", @@ -2128,18 +1022,12 @@ pub(crate) mod disk_usage_based_eviction { ) .unwrap(); - let layers_evicted = register_int_counter!( - "pageserver_disk_usage_based_eviction_evicted_layers_total", - "Amount of layers successfully evicted" - ) - .unwrap(); Self { - tenant_collection_time, - tenant_layer_count, + layers_collected, layers_selected, - layers_evicted, + } } } @@ -2158,25 +1046,8 @@ pub fn preinitialize_metrics( set_page_service_config_max_batch_size(&conf.page_service_pipelining); // counters - [ - &WALRECEIVER_STARTED_CONNECTIONS, - &WALRECEIVER_BROKER_UPDATES, - &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES, - &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES, - &REMOTE_ONDEMAND_DOWNLOADED_LAYERS, - &REMOTE_ONDEMAND_DOWNLOADED_BYTES, - &CIRCUIT_BREAKERS_BROKEN, - &CIRCUIT_BREAKERS_UNBROKEN, - &PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL, - &WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS, - ] - .into_iter() - .for_each(|c| { - Lazy::force(c); - }); // Deletion queue stats - Lazy::force(&DELETION_QUEUE); // Tenant stats Lazy::force(&TENANT); @@ -2187,41 +1058,14 @@ pub fn preinitialize_metrics( Lazy::force(&disk_usage_based_eviction::METRICS); - // countervecs - [ - &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT, - &SMGR_QUERY_STARTED_GLOBAL, - &PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL, - ] - .into_iter() - .for_each(|c| { - Lazy::force(c); - }); // gauges // WALRECEIVER_ACTIVE_MANAGERS.get(); // This seems like a read, not a modification, leaving it for now. // histograms - [ - &LAYERS_PER_READ_GLOBAL, - &LAYERS_PER_READ_BATCH_GLOBAL, - &LAYERS_PER_READ_AMORTIZED_GLOBAL, - &DELTAS_PER_READ_GLOBAL, - &WAIT_LSN_TIME, - &WAL_REDO_TIME, - &WAL_REDO_RECORDS_HISTOGRAM, - &WAL_REDO_BYTES_HISTOGRAM, - &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, - &PAGE_SERVICE_BATCH_SIZE_GLOBAL, - &PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL, - ] - .into_iter() - .for_each(|h| { - Lazy::force(h); - }); + // Custom - Lazy::force(&BASEBACKUP_QUERY_TIME); Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE); tenant_throttling::preinitialize_global_metrics(); diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 14b1793d61..58c2164d14 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -2896,7 +2896,6 @@ where self.check_permission(Some(tenant_id))?; - let metric_recording = metrics::BASEBACKUP_QUERY_TIME.start_recording(); let res = async { self.handle_basebackup_request( pgb, @@ -2914,7 +2913,7 @@ where Result::<(), QueryError>::Ok(()) } .await; - metric_recording.observe(&res); + res?; } // same as basebackup, but result includes relational data as well diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index a9622b36c1..3620681ca1 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -84,9 +84,9 @@ use crate::context::{DownloadBehavior, RequestContext}; use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError}; use crate::l0_flush::L0FlushGlobalState; use crate::metrics::{ - CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN, CONCURRENT_INITDBS, - INITDB_RUN_TIME, INITDB_SEMAPHORE_ACQUISITION_TIME, TENANT, - remove_tenant_metrics, + CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN, + TENANT, + }; use crate::task_mgr::TaskKind; use crate::tenant::config::LocationMode; @@ -1461,7 +1461,7 @@ impl Tenant { let preload = match &mode { SpawnMode::Eager | SpawnMode::Lazy => { - let _preload_timer = TENANT.preload.start_timer(); + let res = tenant_clone .preload(&remote_storage, task_mgr::shutdown_token()) .await; @@ -3392,7 +3392,7 @@ impl Tenant { "activation attempt finished" ); - TENANT.activation.observe(elapsed.as_secs_f64()); + }); } } @@ -3517,7 +3517,6 @@ impl Tenant { // Wait for any in-flight operations to complete self.gate.close().await; - remove_tenant_metrics(&self.tenant_shard_id); Ok(()) } @@ -5524,16 +5523,11 @@ async fn run_initdb( ); let _permit = { - let _timer = INITDB_SEMAPHORE_ACQUISITION_TIME.start_timer(); + INIT_DB_SEMAPHORE.acquire().await }; - CONCURRENT_INITDBS.inc(); - scopeguard::defer! { - CONCURRENT_INITDBS.dec(); - } - let _timer = INITDB_RUN_TIME.start_timer(); let res = postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs { superuser: &conf.superuser, locale: &conf.locale, diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index ac81b8e3d7..8182197beb 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -2177,9 +2177,7 @@ impl TenantManager { // we would use if not doing any eviction. progress.bytes_total } else { - // In the absence of heatmap info, assume that the secondary location simply - // needs as much space as it is currently using. - secondary.resident_size_metric.get() + 42 } } } diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 5fac8689d8..9d9bcd8e49 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -223,7 +223,7 @@ use crate::config::PageServerConf; use crate::context::RequestContext; use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError}; use crate::metrics::{ - MeasureRemoteOp, REMOTE_ONDEMAND_DOWNLOADED_BYTES, REMOTE_ONDEMAND_DOWNLOADED_LAYERS, + MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, }; use crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind, shutdown_token}; @@ -720,8 +720,6 @@ impl RemoteTimelineClient { .await? }; - REMOTE_ONDEMAND_DOWNLOADED_LAYERS.inc(); - REMOTE_ONDEMAND_DOWNLOADED_BYTES.inc_by(downloaded_size); Ok(downloaded_size) } diff --git a/pageserver/src/tenant/secondary.rs b/pageserver/src/tenant/secondary.rs index 2fa0ed9be9..a601ae88f4 100644 --- a/pageserver/src/tenant/secondary.rs +++ b/pageserver/src/tenant/secondary.rs @@ -6,7 +6,6 @@ mod scheduler; use std::sync::Arc; use std::time::SystemTime; -use metrics::UIntGauge; use pageserver_api::models; use pageserver_api::shard::{ShardIdentity, TenantShardId}; use remote_storage::GenericRemoteStorage; @@ -26,7 +25,6 @@ use super::span::debug_assert_current_span_has_tenant_id; use super::storage_layer::LayerName; use crate::context::RequestContext; use crate::disk_usage_eviction_task::DiskUsageEvictionInfo; -use crate::metrics::{SECONDARY_HEATMAP_TOTAL_SIZE, SECONDARY_RESIDENT_PHYSICAL_SIZE}; use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind}; enum DownloadCommand { @@ -109,12 +107,7 @@ pub(crate) struct SecondaryTenant { // Public state indicating overall progress of downloads relative to the last heatmap seen pub(crate) progress: std::sync::Mutex, - - // Sum of layer sizes on local disk - pub(super) resident_size_metric: UIntGauge, - - // Sum of layer sizes in the most recently downloaded heatmap - pub(super) heatmap_total_size_metric: UIntGauge, + } impl SecondaryTenant { @@ -124,16 +117,8 @@ impl SecondaryTenant { tenant_conf: pageserver_api::models::TenantConfig, config: &SecondaryLocationConfig, ) -> Arc { - let tenant_id = tenant_shard_id.tenant_id.to_string(); - let shard_id = format!("{}", tenant_shard_id.shard_slug()); - let resident_size_metric = SECONDARY_RESIDENT_PHYSICAL_SIZE - .get_metric_with_label_values(&[&tenant_id, &shard_id]) - .unwrap(); - - let heatmap_total_size_metric = SECONDARY_HEATMAP_TOTAL_SIZE - .get_metric_with_label_values(&[&tenant_id, &shard_id]) - .unwrap(); - + + Arc::new(Self { tenant_shard_id, // todo: shall we make this a descendent of the @@ -150,14 +135,10 @@ impl SecondaryTenant { progress: std::sync::Mutex::default(), - resident_size_metric, - heatmap_total_size_metric, }) } - pub(crate) fn tenant_shard_id(&self) -> TenantShardId { - self.tenant_shard_id - } + pub(crate) async fn shutdown(&self) { self.cancel.cancel(); @@ -169,15 +150,10 @@ impl SecondaryTenant { // Metrics are subtracted from and/or removed eagerly. // Deletions are done in the background via [`BackgroundPurges::spawn`]. - let tenant_id = self.tenant_shard_id.tenant_id.to_string(); - let shard_id = format!("{}", self.tenant_shard_id.shard_slug()); - let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]); - let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]); - self.detail .lock() .unwrap() - .drain_timelines(&self.tenant_shard_id, &self.resident_size_metric); + .drain_timelines(&self.tenant_shard_id); } pub(crate) fn set_config(&self, config: &SecondaryLocationConfig) { @@ -255,7 +231,7 @@ impl SecondaryTenant { // of the cache. let mut detail = this.detail.lock().unwrap(); if let Some(removed) = - detail.evict_layer(name, &timeline_id, now, &this.resident_size_metric) + detail.evict_layer(name, &timeline_id, now) { // We might race with removal of the same layer during downloads, so finding the layer we // were trying to remove is optional. Only issue the disk I/O to remove it if we found it. @@ -269,10 +245,9 @@ impl SecondaryTenant { /// Exhaustive check that incrementally updated metrics match the actual state. #[cfg(feature = "testing")] fn validate_metrics(&self) { - let detail = self.detail.lock().unwrap(); - let resident_size = detail.total_resident_size(); + - assert_eq!(resident_size, self.resident_size_metric.get()); + } #[cfg(not(feature = "testing"))] diff --git a/pageserver/src/tenant/secondary/downloader.rs b/pageserver/src/tenant/secondary/downloader.rs index 847e15aa49..a9fd6b8d6b 100644 --- a/pageserver/src/tenant/secondary/downloader.rs +++ b/pageserver/src/tenant/secondary/downloader.rs @@ -7,7 +7,6 @@ use std::time::{Duration, Instant, SystemTime}; use camino::Utf8PathBuf; use chrono::format::{DelayedFormat, StrftimeItems}; use futures::Future; -use metrics::UIntGauge; use pageserver_api::models::SecondaryProgress; use pageserver_api::shard::TenantShardId; use remote_storage::{DownloadError, DownloadKind, DownloadOpts, Etag, GenericRemoteStorage}; @@ -32,7 +31,6 @@ use crate::context::RequestContext; use crate::disk_usage_eviction_task::{ DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer, finite_f32, }; -use crate::metrics::SECONDARY_MODE; use crate::tenant::config::SecondaryLocationConfig; use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id; use crate::tenant::ephemeral_file::is_ephemeral_file; @@ -119,9 +117,6 @@ impl OnDiskState { .fatal_err("Deleting secondary layer") } - pub(crate) fn file_size(&self) -> u64 { - self.metadata.file_size - } } pub(super) struct SecondaryDetailTimeline { @@ -174,13 +169,9 @@ impl SecondaryDetailTimeline { pub(super) fn remove_layer( &mut self, name: &LayerName, - resident_metric: &UIntGauge, ) -> Option { - let removed = self.on_disk_layers.remove(name); - if let Some(removed) = &removed { - resident_metric.sub(removed.file_size()); - } - removed + self.on_disk_layers.remove(name) + } /// `local_path` @@ -190,7 +181,6 @@ impl SecondaryDetailTimeline { tenant_shard_id: &TenantShardId, timeline_id: &TimelineId, touched: &HeatMapLayer, - _resident_metric: &UIntGauge, local_path: F, ) where F: FnOnce() -> Utf8PathBuf, @@ -265,28 +255,16 @@ impl SecondaryDetail { } } - #[cfg(feature = "testing")] - pub(crate) fn total_resident_size(&self) -> u64 { - self.timelines - .values() - .map(|tl| { - tl.on_disk_layers - .values() - .map(|v| v.metadata.file_size) - .sum::() - }) - .sum::() - } pub(super) fn evict_layer( &mut self, name: LayerName, timeline_id: &TimelineId, now: SystemTime, - resident_metric: &UIntGauge, + ) -> Option { let timeline = self.timelines.get_mut(timeline_id)?; - let removed = timeline.remove_layer(&name, resident_metric); + let removed = timeline.remove_layer(&name); if removed.is_some() { timeline.evicted_at.insert(name, now); } @@ -295,43 +273,22 @@ impl SecondaryDetail { pub(super) fn remove_timeline( &mut self, - tenant_shard_id: &TenantShardId, + _tenant_shard_id: &TenantShardId, timeline_id: &TimelineId, - resident_metric: &UIntGauge, ) { - let removed = self.timelines.remove(timeline_id); - if let Some(removed) = removed { - Self::clear_timeline_metrics(tenant_shard_id, timeline_id, removed, resident_metric); - } + self.timelines.remove(timeline_id); + } pub(super) fn drain_timelines( &mut self, - tenant_shard_id: &TenantShardId, - resident_metric: &UIntGauge, - ) { - for (timeline_id, removed) in self.timelines.drain() { - Self::clear_timeline_metrics(tenant_shard_id, &timeline_id, removed, resident_metric); - } - } - - fn clear_timeline_metrics( _tenant_shard_id: &TenantShardId, - _timeline_id: &TimelineId, - detail: SecondaryDetailTimeline, - resident_metric: &UIntGauge, - ) { - resident_metric.sub( - detail - .on_disk_layers - .values() - .map(|l| l.metadata.file_size) - .sum(), - ); + ) { } + /// Additionally returns the total number of layers, used for more stable relative access time /// based eviction. pub(super) fn get_layers_for_eviction( @@ -785,7 +742,6 @@ impl<'a> TenantDownloader<'a> { tenant_shard_id, last_heatmap, timeline, - &self.secondary_state.resident_size_metric, ctx, ) .await; @@ -908,11 +864,7 @@ impl<'a> TenantDownloader<'a> { bytes_downloaded: 0, }; - // Also expose heatmap bytes_total as a metric - self.secondary_state - .heatmap_total_size_metric - .set(heatmap_stats.bytes); - + // Accumulate list of things to delete while holding the detail lock, for execution after dropping the lock let mut delete_layers = Vec::new(); let mut delete_timelines = Vec::new(); @@ -979,7 +931,6 @@ impl<'a> TenantDownloader<'a> { detail.remove_timeline( self.secondary_state.get_tenant_shard_id(), delete_timeline, - &self.secondary_state.resident_size_metric, ); } } @@ -998,7 +949,7 @@ impl<'a> TenantDownloader<'a> { let Some(timeline_state) = detail.timelines.get_mut(&timeline_id) else { continue; }; - timeline_state.remove_layer(&layer_name, &self.secondary_state.resident_size_metric); + timeline_state.remove_layer(&layer_name); } for timeline_id in delete_timelines { @@ -1065,7 +1016,7 @@ impl<'a> TenantDownloader<'a> { .await .ok_or_else(|| UpdateError::Cancelled) .and_then(|x| x) - .inspect(|_| SECONDARY_MODE.download_heatmap.inc()) + .inspect(|_|{} ) } /// Download heatmap layers that are not present on local disk, or update their @@ -1240,7 +1191,6 @@ impl<'a> TenantDownloader<'a> { tenant_shard_id, &timeline_id, &t, - &self.secondary_state.resident_size_metric, || { local_layer_path( self.conf, @@ -1352,7 +1302,6 @@ impl<'a> TenantDownloader<'a> { progress.layers_downloaded += 1; } - SECONDARY_MODE.download_layer.inc(); Ok(Some(layer)) } @@ -1364,7 +1313,6 @@ async fn init_timeline_state( tenant_shard_id: &TenantShardId, last_heatmap: Option<&HeatMapTimeline>, heatmap: &HeatMapTimeline, - resident_metric: &UIntGauge, ctx: &RequestContext, ) -> SecondaryDetailTimeline { let ctx = ctx.with_scope_secondary_timeline(tenant_shard_id, &heatmap.timeline_id); @@ -1468,7 +1416,6 @@ async fn init_timeline_state( tenant_shard_id, &heatmap.timeline_id, remote_meta, - resident_metric, || file_path, ); } diff --git a/pageserver/src/tenant/secondary/heatmap_uploader.rs b/pageserver/src/tenant/secondary/heatmap_uploader.rs index 3375714a66..c3ce39a84e 100644 --- a/pageserver/src/tenant/secondary/heatmap_uploader.rs +++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs @@ -20,7 +20,6 @@ use super::scheduler::{ }; use super::{CommandRequest, SecondaryTenantError, UploadCommand}; use crate::TEMP_FILE_SUFFIX; -use crate::metrics::SECONDARY_MODE; use crate::tenant::Tenant; use crate::tenant::config::AttachmentMode; use crate::tenant::mgr::{GetTenantError, TenantManager}; @@ -221,14 +220,10 @@ impl JobGenerator // Guard for the barrier in [`WriteInProgress`] let _completion = completion; - let started_at = Instant::now(); + let uploaded = match upload_tenant_heatmap(remote_storage, &tenant, last_upload.clone()).await { Ok(UploadHeatmapOutcome::Uploaded(uploaded)) => { - let duration = Instant::now().duration_since(started_at); - SECONDARY_MODE - .upload_heatmap_duration - .observe(duration.as_secs_f64()); - SECONDARY_MODE.upload_heatmap.inc(); + Some(uploaded) } Ok(UploadHeatmapOutcome::NoChange | UploadHeatmapOutcome::Skipped) => last_upload, @@ -237,11 +232,8 @@ impl JobGenerator "Failed to upload heatmap for tenant {}: {e:#}", tenant.get_tenant_shard_id(), ); - let duration = Instant::now().duration_since(started_at); - SECONDARY_MODE - .upload_heatmap_duration - .observe(duration.as_secs_f64()); - SECONDARY_MODE.upload_heatmap_errors.inc(); + + last_upload } Err(UploadHeatmapError::Cancelled) => { diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index 0c555b09f1..dd993a4c5c 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -231,9 +231,7 @@ impl Layer { debug_assert!(owner.0.needs_download_blocking().unwrap().is_none()); - timeline - .metrics - .resident_physical_size_add(metadata.file_size); + ResidentLayer { downloaded, owner } } @@ -1141,9 +1139,7 @@ impl LayerInner { } }; tracing::info!(size=%self.desc.file_size, %latency_millis, "on-demand download successful"); - timeline - .metrics - .resident_physical_size_add(self.desc.file_size); + self.consecutive_failures.store(0, Ordering::Relaxed); self.access_stats.record_residence_event(); @@ -1421,7 +1417,7 @@ impl LayerInner { Self::spawn_blocking(move || { let _span = span.entered(); - let res = self.evict_blocking(&timeline, &gate, &permit); + let res = self.evict_blocking( &gate, &permit); let waiters = self.inner.initializer_count(); @@ -1438,7 +1434,6 @@ impl LayerInner { /// This is blocking only to do just one spawn_blocking hop compared to multiple via tokio::fs. fn evict_blocking( &self, - timeline: &Timeline, _gate: &gate::GateGuard, _permit: &heavier_once_cell::InitPermit, ) -> Result<(), EvictionCancelled> { @@ -1463,9 +1458,6 @@ impl LayerInner { tracing::info!("evicted layer after unknown residence period"); } } - timeline - .metrics - .resident_physical_size_sub(self.desc.file_size); } Err(e) if e.kind() == std::io::ErrorKind::NotFound => { tracing::error!( diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 54d8a8b8a1..263ec33d02 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -10,7 +10,7 @@ use std::time::{Duration, Instant}; use once_cell::sync::Lazy; use pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD; use rand::Rng; -use scopeguard::defer; + use tokio::sync::{Semaphore, SemaphorePermit}; use tokio_util::sync::CancellationToken; use tracing::*; @@ -19,7 +19,6 @@ use utils::completion::Barrier; use utils::pausable_failpoint; use crate::context::{DownloadBehavior, RequestContext}; -use crate::metrics::{self, TENANT_TASK_EVENTS}; use crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind}; use crate::tenant::throttle::Stats; use crate::tenant::timeline::CompactionError; @@ -131,8 +130,7 @@ pub fn start_background_loops(tenant: &Arc, can_start: Option<&Barrier>) _ = cancel.cancelled() => return Ok(()), _ = Barrier::maybe_wait(can_start) => {} }; - TENANT_TASK_EVENTS.with_label_values(&["start"]).inc(); - defer!(TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc()); + compaction_loop(tenant, cancel) // If you rename this span, change the RUST_LOG env variable in test_runner/performance/test_branch_creation.py .instrument(info_span!("compaction_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug())) @@ -157,8 +155,6 @@ pub fn start_background_loops(tenant: &Arc, can_start: Option<&Barrier>) _ = cancel.cancelled() => return Ok(()), _ = Barrier::maybe_wait(can_start) => {} }; - TENANT_TASK_EVENTS.with_label_values(&["start"]).inc(); - defer!(TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc()); gc_loop(tenant, cancel) .instrument(info_span!("gc_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug())) .await; @@ -182,8 +178,7 @@ pub fn start_background_loops(tenant: &Arc, can_start: Option<&Barrier>) _ = cancel.cancelled() => return Ok(()), _ = Barrier::maybe_wait(can_start) => {} }; - TENANT_TASK_EVENTS.with_label_values(&["start"]).inc(); - defer!(TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc()); + tenant_housekeeping_loop(tenant, cancel) .instrument(info_span!("tenant_housekeeping_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug())) .await; @@ -588,8 +583,5 @@ pub(crate) fn warn_when_period_overrun( ?task, "task iteration took longer than the configured period" ); - metrics::BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT - .with_label_values(&[task.into(), &format!("{}", period.as_secs())]) - .inc(); } } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index e92aaad976..1892810788 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -103,9 +103,7 @@ use crate::context::{ use crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate, finite_f32}; use crate::keyspace::{KeyPartitioning, KeySpace}; use crate::l0_flush::{self, L0FlushGlobalState}; -use crate::metrics::{ - DELTAS_PER_READ_GLOBAL, TimelineMetrics, -}; +use crate::metrics::TimelineMetrics; use crate::page_service::TenantManagerTypes; use crate::pgdatadir_mapping::{ CalculateLogicalSizeError, CollectKeySpaceError, DirectoryKind, LsnForTimestamp, @@ -1381,7 +1379,7 @@ impl Timeline { return (key, Err(err)); } }; - DELTAS_PER_READ_GLOBAL.observe(converted.num_deltas() as f64); + // The walredo module expects the records to be descending in terms of Lsn. // And we submit the IOs in that order, so, there shuold be no need to sort here. @@ -1485,9 +1483,6 @@ impl Timeline { guard.layer_size_sum() } - pub(crate) fn resident_physical_size(&self) -> u64 { - self.metrics.resident_physical_size_get() - } pub(crate) fn get_directory_metrics(&self) -> [u64; DirectoryKind::KINDS_NUM] { array::from_fn(|idx| self.directory_metrics[idx].load(AtomicOrdering::Relaxed)) @@ -1550,7 +1545,7 @@ impl Timeline { WaitLsnTimeout::Default => self.conf.wait_lsn_timeout, }; - let timer = crate::metrics::WAIT_LSN_TIME.start_timer(); + let wait_for_timeout = self.last_record_lsn.wait_for_timeout(lsn, timeout); let wait_for_timeout = std::pin::pin!(wait_for_timeout); // Use threshold of 1 because even 1 second of wait for ingest is very much abnormal. @@ -1597,7 +1592,6 @@ impl Timeline { let res = wait_for_timeout.await; // don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo drop(logging_permit); - drop(timer); match res { Ok(()) => Ok(()), Err(e) => { @@ -3571,7 +3565,7 @@ impl Timeline { async fn calculate_logical_size( &self, up_to_lsn: Lsn, - cause: LogicalSizeCalculationCause, + _cause: LogicalSizeCalculationCause, _guard: &GateGuard, ctx: &RequestContext, ) -> Result { @@ -3590,20 +3584,13 @@ impl Timeline { if let Some(size) = self.current_logical_size.initialized_size(up_to_lsn) { return Ok(size); } - let storage_time_metrics = match cause { - LogicalSizeCalculationCause::Initial - | LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize - | LogicalSizeCalculationCause::TenantSizeHandler => &self.metrics.logical_size_histo, - LogicalSizeCalculationCause::EvictionTaskImitation => { - &self.metrics.imitate_logical_size_histo - } - }; - let timer = storage_time_metrics.start_timer(); + + let logical_size = self .get_current_logical_size_non_incremental(up_to_lsn, ctx) .await?; debug!("calculated logical size: {logical_size}"); - timer.stop_and_record(); + Ok(logical_size) } @@ -4489,17 +4476,10 @@ impl Timeline { "stalling layer flushes for compaction backpressure at {l0_count} \ L0 layers ({frozen_count} frozen layers with {frozen_size} bytes)" ); - let stall_timer = self - .metrics - .flush_delay_histo - .start_timer() - .record_on_drop(); + tokio::select! { - result = watch_l0.wait_for(|l0| *l0 < stall_threshold) => { - if let Ok(l0) = result.as_deref() { - let delay = stall_timer.elapsed().as_secs_f64(); - info!("resuming layer flushes at {l0} L0 layers after {delay:.3}s"); - } + _result = watch_l0.wait_for(|l0| *l0 < stall_threshold) => { + }, _ = self.cancel.cancelled() => {}, } @@ -4542,11 +4522,7 @@ impl Timeline { "delaying layer flush by {delay:.3}s for compaction backpressure at \ {l0_count} L0 layers ({frozen_count} frozen layers with {frozen_size} bytes)" ); - let _delay_timer = self - .metrics - .flush_delay_histo - .start_timer() - .record_on_drop(); + tokio::select! { _ = tokio::time::sleep(flush_duration) => {}, _ = watch_l0.wait_for(|l0| *l0 < delay_threshold) => {}, @@ -5273,7 +5249,7 @@ impl Timeline { last_status: LastImageLayerCreationStatus, yield_for_l0: bool, ) -> Result<(Vec, LastImageLayerCreationStatus), CreateImageLayersError> { - let timer = self.metrics.create_images_time_histo.start_timer(); + if partitioning.parts.is_empty() { warn!("no partitions to create image layers for"); @@ -5306,8 +5282,8 @@ impl Timeline { let mut all_generated = true; - let mut partition_processed = 0; - let mut total_partitions = partitioning.parts.len(); + + let total_partitions = partitioning.parts.len(); let mut last_partition_processed = None; let mut partition_parts = partitioning.parts.clone(); @@ -5332,7 +5308,7 @@ impl Timeline { break; // with found=false } partition_parts = partition_parts.split_off(i + 1); // Remove the first i + 1 elements - total_partitions = partition_parts.len(); + // Update the start key to the partition start. start = partition_parts[0].start().unwrap(); found = true; @@ -5349,7 +5325,6 @@ impl Timeline { if self.cancel.is_cancelled() { return Err(CreateImageLayersError::Cancelled); } - partition_processed += 1; let img_range = start..partition.ranges.last().unwrap().end; let compact_metadata = partition.overlaps(&Key::metadata_key_range()); if compact_metadata { @@ -5512,28 +5487,16 @@ impl Timeline { .open_mut()? .track_new_image_layers(&image_layers, &self.metrics); drop_wlock(guard); - let duration = timer.stop_and_record(); + // Creating image layers may have caused some previously visible layers to be covered if !image_layers.is_empty() { self.update_layer_visibility().await?; } - let total_layer_size = image_layers - .iter() - .map(|l| l.metadata().file_size) - .sum::(); + - if !image_layers.is_empty() { - info!( - "created {} image layers ({} bytes) in {}s, processed {} out of {} partitions", - image_layers.len(), - total_layer_size, - duration.as_secs_f64(), - partition_processed, - total_partitions - ); - } + Ok(( image_layers, @@ -6068,11 +6031,7 @@ impl Timeline { cancel: &CancellationToken, ctx: &RequestContext, ) -> Result { - let _timer = self - .metrics - .find_gc_cutoffs_histo - .start_timer() - .record_on_drop(); + pausable_failpoint!("Timeline::find_gc_cutoffs-pausable"); @@ -6139,7 +6098,7 @@ impl Timeline { guard = self.gc_lock.lock() => guard, _ = self.cancel.cancelled() => return Ok(GcResult::default()), }; - let timer = self.metrics.garbage_collect_histo.start_timer(); + fail_point!("before-timeline-gc"); @@ -6211,8 +6170,7 @@ impl Timeline { ) .await?; - // only record successes - timer.stop_and_record(); + Ok(res) } diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 92b24a73c9..5f9f15c1c7 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1133,16 +1133,15 @@ impl Timeline { // 1. L0 Compact let l0_outcome = { - let timer = self.metrics.compact_time_histo.start_timer(); - let l0_outcome = self + + self .compact_level0( target_file_size, options.flags.contains(CompactFlags::ForceL0Compaction), ctx, ) - .await?; - timer.stop_and_record(); - l0_outcome + .await? + }; if options.flags.contains(CompactFlags::OnlyL0Compaction) { @@ -4041,7 +4040,7 @@ impl TimelineAdaptor { key_range: &Range, ctx: &RequestContext, ) -> Result<(), CreateImageLayersError> { - let timer = self.timeline.metrics.create_images_time_histo.start_timer(); + let image_layer_writer = ImageLayerWriter::new( self.timeline.conf, @@ -4087,7 +4086,7 @@ impl TimelineAdaptor { self.new_images.push(image_layer); } - timer.stop_and_record(); + Ok(()) } diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 70cfcb96f5..096c2144c9 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -126,7 +126,7 @@ impl Timeline { ) -> ControlFlow<(), Instant> { debug!("eviction iteration: {policy:?}"); let start = Instant::now(); - let (period, threshold) = match policy { + let (period, _) = match policy { EvictionPolicy::NoEviction => { // check again in 10 seconds; XXX config watch mechanism return ControlFlow::Continue(Instant::now() + Duration::from_secs(10)); @@ -159,16 +159,6 @@ impl Timeline { period, BackgroundLoopKind::Eviction, ); - // FIXME: if we were to mix policies on a pageserver, we would have no way to sense this. I - // don't think that is a relevant fear however, and regardless the imitation should be the - // most costly part. - crate::metrics::EVICTION_ITERATION_DURATION - .get_metric_with_label_values(&[ - &format!("{}", period.as_secs()), - &format!("{}", threshold.as_secs()), - ]) - .unwrap() - .observe(elapsed.as_secs_f64()); ControlFlow::Continue(start + period) } diff --git a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs index 11df232a10..ecccfd06a4 100644 --- a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs +++ b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs @@ -59,11 +59,7 @@ impl HeatmapLayersDownloader { return; }; - tracing::info!( - resident_size=%timeline.resident_physical_size(), - heatmap_layers=%heatmap.all_layers().count(), - "Starting heatmap layers download" - ); + let stream = futures::stream::iter(heatmap.all_layers().cloned().filter_map( |layer| { @@ -93,7 +89,7 @@ impl HeatmapLayersDownloader { tokio::select! { _ = stream.collect::<()>() => { tracing::info!( - resident_size=%timeline.resident_physical_size(), + "Heatmap layers download completed" ); }, diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index ed8a954369..1315e7bf55 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -40,10 +40,7 @@ use utils::sync::gate::GateError; use utils::sync::heavier_once_cell; use crate::config::PageServerConf; -use crate::metrics::{ - WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, - WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_TIME, -}; + /// The real implementation that uses a Postgres process to /// perform WAL replay. @@ -353,7 +350,7 @@ impl PostgresRedoManager { } }, Err(permit) => { - let start = Instant::now(); + // acquire guard before spawning process, so that we don't spawn new processes // if the gate is already closed. let _launched_processes_guard = match self.launched_processes.enter() { @@ -371,13 +368,9 @@ impl PostgresRedoManager { .context("launch walredo process")?, _launched_processes_guard, }); - let duration = start.elapsed(); - WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.observe(duration.as_secs_f64()); - info!( - elapsed_ms = duration.as_millis(), - pid = proc.id(), - "launched walredo process" - ); + + + self.redo_process .set(ProcessOnceCell::Spawned(Arc::clone(&proc)), permit); proc @@ -471,9 +464,7 @@ impl PostgresRedoManager { } }); - WAL_REDO_TIME.observe(duration.as_secs_f64()); - WAL_REDO_RECORDS_HISTOGRAM.observe(len as f64); - WAL_REDO_BYTES_HISTOGRAM.observe(nbytes as f64); + debug!( "postgres applied {} WAL records ({} bytes) in {} us to reconstruct page image at LSN {}", @@ -538,9 +529,7 @@ impl PostgresRedoManager { } // Success! let duration = start_time.elapsed(); - // FIXME: using the same metric here creates a bimodal distribution by default, and because - // there could be multiple batch sizes this would be N+1 modal. - WAL_REDO_TIME.observe(duration.as_secs_f64()); + debug!( "neon applied {} WAL records in {} us to reconstruct page image at LSN {}", diff --git a/pageserver/src/walredo/process.rs b/pageserver/src/walredo/process.rs index 6d4a38d4ff..b80160d888 100644 --- a/pageserver/src/walredo/process.rs +++ b/pageserver/src/walredo/process.rs @@ -21,7 +21,7 @@ use utils::poison::Poison; use self::no_leak_child::NoLeakChild; use crate::config::PageServerConf; -use crate::metrics::{WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER, WalRedoKillCause}; +use crate::metrics:: WalRedoKillCause; use crate::page_cache::PAGE_SZ; use crate::span::debug_assert_current_span_has_tenant_id; @@ -97,7 +97,6 @@ impl WalRedoProcess { // walredo request. .spawn_no_leak_child(tenant_shard_id) .context("spawn process")?; - WAL_REDO_PROCESS_COUNTERS.started.inc(); let mut child = scopeguard::guard(child, |child| { error!("killing wal-redo-postgres process due to a problem during launch"); child.kill_and_wait(WalRedoKillCause::Startup); @@ -118,12 +117,7 @@ impl WalRedoProcess { tokio::spawn( async move { - scopeguard::defer! { - debug!("wal-redo-postgres stderr_logger_task finished"); - crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_finished.inc(); - } debug!("wal-redo-postgres stderr_logger_task started"); - crate::metrics::WAL_REDO_PROCESS_COUNTERS.active_stderr_logger_tasks_started.inc(); use tokio::io::AsyncBufReadExt; let mut stderr_lines = tokio::io::BufReader::new(stderr); @@ -231,7 +225,7 @@ impl WalRedoProcess { } } protocol::build_get_page_msg(tag, &mut writebuf); - WAL_REDO_RECORD_COUNTER.inc_by(records.len() as u64); + let Ok(res) = tokio::time::timeout(wal_redo_timeout, self.apply_wal_records0(&writebuf)).await diff --git a/pageserver/src/walredo/process/no_leak_child.rs b/pageserver/src/walredo/process/no_leak_child.rs index 9939fc4b36..e381d86632 100644 --- a/pageserver/src/walredo/process/no_leak_child.rs +++ b/pageserver/src/walredo/process/no_leak_child.rs @@ -5,7 +5,7 @@ use std::process::{Child, Command}; use pageserver_api::shard::TenantShardId; use tracing::{error, info, instrument}; -use crate::metrics::{WAL_REDO_PROCESS_COUNTERS, WalRedoKillCause}; +use crate::metrics::WalRedoKillCause; /// Wrapper type around `std::process::Child` which guarantees that the child /// will be killed and waited-for by this process before being dropped. @@ -47,9 +47,6 @@ impl NoLeakChild { #[instrument(skip_all, fields(pid=child.id(), ?cause))] pub(crate) fn kill_and_wait_impl(mut child: Child, cause: WalRedoKillCause) { - scopeguard::defer! { - WAL_REDO_PROCESS_COUNTERS.killed_by_cause[cause].inc(); - } let res = child.kill(); if let Err(e) = res { // This branch is very unlikely because: