From 505aa242ace380fe7641935cc3ce98fd6b4c3411 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 5 Jul 2023 14:36:42 +0200 Subject: [PATCH] page cache: add size metrics (#4629) Make them a member of `struct PageCache` to prepare for a future where there's no global state. --- pageserver/src/metrics.rs | 49 +++++++++++++++++++++++++++++++-- pageserver/src/page_cache.rs | 44 ++++++++++++++++++++++++++++- test_runner/fixtures/metrics.py | 2 ++ 3 files changed, 91 insertions(+), 4 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index db5bccdbba..96d23e220f 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1,9 +1,9 @@ use metrics::metric_vec_duration::DurationResultObserver; use metrics::{ register_counter_vec, register_histogram, register_histogram_vec, register_int_counter, - register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec, - Counter, CounterVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, - UIntGauge, UIntGaugeVec, + register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge, + register_uint_gauge_vec, Counter, CounterVec, Histogram, HistogramVec, IntCounter, + IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec, }; use once_cell::sync::Lazy; use pageserver_api::models::TenantState; @@ -203,6 +203,49 @@ pub static PAGE_CACHE: Lazy = Lazy::new(|| PageCacheMetrics { }, }); +pub struct PageCacheSizeMetrics { + pub max_bytes: UIntGauge, + + pub current_bytes_ephemeral: UIntGauge, + pub current_bytes_immutable: UIntGauge, + pub current_bytes_materialized_page: UIntGauge, +} + +static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy = Lazy::new(|| { + register_uint_gauge_vec!( + "pageserver_page_cache_size_current_bytes", + "Current size of the page cache in bytes, by key kind", + &["key_kind"] + ) + .expect("failed to define a metric") +}); + +pub static PAGE_CACHE_SIZE: Lazy = Lazy::new(|| PageCacheSizeMetrics { + max_bytes: { + register_uint_gauge!( + "pageserver_page_cache_size_max_bytes", + "Maximum size of the page cache in bytes" + ) + .expect("failed to define a metric") + }, + + current_bytes_ephemeral: { + PAGE_CACHE_SIZE_CURRENT_BYTES + .get_metric_with_label_values(&["ephemeral"]) + .unwrap() + }, + current_bytes_immutable: { + PAGE_CACHE_SIZE_CURRENT_BYTES + .get_metric_with_label_values(&["immutable"]) + .unwrap() + }, + current_bytes_materialized_page: { + PAGE_CACHE_SIZE_CURRENT_BYTES + .get_metric_with_label_values(&["materialized_page"]) + .unwrap() + }, +}); + static WAIT_LSN_TIME: Lazy = Lazy::new(|| { register_histogram_vec!( "pageserver_wait_lsn_seconds", diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs index ef0e748d10..e29eb1d197 100644 --- a/pageserver/src/page_cache.rs +++ b/pageserver/src/page_cache.rs @@ -53,8 +53,8 @@ use utils::{ lsn::Lsn, }; -use crate::repository::Key; use crate::tenant::writeback_ephemeral_file; +use crate::{metrics::PageCacheSizeMetrics, repository::Key}; static PAGE_CACHE: OnceCell = OnceCell::new(); const TEST_PAGE_CACHE_SIZE: usize = 50; @@ -187,6 +187,8 @@ pub struct PageCache { /// Index of the next candidate to evict, for the Clock replacement algorithm. /// This is interpreted modulo the page cache size. next_evict_slot: AtomicUsize, + + size_metrics: &'static PageCacheSizeMetrics, } /// @@ -718,6 +720,9 @@ impl PageCache { if let Ok(version_idx) = versions.binary_search_by_key(old_lsn, |v| v.lsn) { versions.remove(version_idx); + self.size_metrics + .current_bytes_materialized_page + .sub_page_sz(1); if versions.is_empty() { old_entry.remove_entry(); } @@ -730,11 +735,13 @@ impl PageCache { let mut map = self.ephemeral_page_map.write().unwrap(); map.remove(&(*file_id, *blkno)) .expect("could not find old key in mapping"); + self.size_metrics.current_bytes_ephemeral.sub_page_sz(1); } CacheKey::ImmutableFilePage { file_id, blkno } => { let mut map = self.immutable_page_map.write().unwrap(); map.remove(&(*file_id, *blkno)) .expect("could not find old key in mapping"); + self.size_metrics.current_bytes_immutable.sub_page_sz(1); } } } @@ -762,6 +769,9 @@ impl PageCache { slot_idx, }, ); + self.size_metrics + .current_bytes_materialized_page + .add_page_sz(1); None } } @@ -772,6 +782,7 @@ impl PageCache { Entry::Occupied(entry) => Some(*entry.get()), Entry::Vacant(entry) => { entry.insert(slot_idx); + self.size_metrics.current_bytes_ephemeral.add_page_sz(1); None } } @@ -782,6 +793,7 @@ impl PageCache { Entry::Occupied(entry) => Some(*entry.get()), Entry::Vacant(entry) => { entry.insert(slot_idx); + self.size_metrics.current_bytes_immutable.add_page_sz(1); None } } @@ -881,6 +893,12 @@ impl PageCache { let page_buffer = Box::leak(vec![0u8; num_pages * PAGE_SZ].into_boxed_slice()); + let size_metrics = &crate::metrics::PAGE_CACHE_SIZE; + size_metrics.max_bytes.set_page_sz(num_pages); + size_metrics.current_bytes_ephemeral.set_page_sz(0); + size_metrics.current_bytes_immutable.set_page_sz(0); + size_metrics.current_bytes_materialized_page.set_page_sz(0); + let slots = page_buffer .chunks_exact_mut(PAGE_SZ) .map(|chunk| { @@ -903,6 +921,30 @@ impl PageCache { immutable_page_map: Default::default(), slots, next_evict_slot: AtomicUsize::new(0), + size_metrics, } } } + +trait PageSzBytesMetric { + fn set_page_sz(&self, count: usize); + fn add_page_sz(&self, count: usize); + fn sub_page_sz(&self, count: usize); +} + +#[inline(always)] +fn count_times_page_sz(count: usize) -> u64 { + u64::try_from(count).unwrap() * u64::try_from(PAGE_SZ).unwrap() +} + +impl PageSzBytesMetric for metrics::UIntGauge { + fn set_page_sz(&self, count: usize) { + self.set(count_times_page_sz(count)); + } + fn add_page_sz(&self, count: usize) { + self.add(count_times_page_sz(count)); + } + fn sub_page_sz(&self, count: usize) { + self.sub(count_times_page_sz(count)); + } +} diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index a2bc2e28e5..3f87aa10a3 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -61,6 +61,8 @@ PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = ( "pageserver_materialized_cache_hits_direct_total", "pageserver_page_cache_read_hits_total", "pageserver_page_cache_read_accesses_total", + "pageserver_page_cache_size_current_bytes", + "pageserver_page_cache_size_max_bytes", "pageserver_getpage_reconstruct_seconds_bucket", "pageserver_getpage_reconstruct_seconds_count", "pageserver_getpage_reconstruct_seconds_sum",