From 3322b6c5b0eafbb3f528211759ccc0564271ac96 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 26 Sep 2023 09:01:09 +0200 Subject: [PATCH] page cache: metrics: add page content kind dimension (#5373) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TaskKind dimension added in #5339 is insufficient to understand what kind of data causes the cache hits. Regarding performance considerations: I'm not too worried because we're moving from 3 to 4 one-byte sized fields; likely the space now used by the new field was padding before. Didn't check this, though, and it doesn't matter, we need the data. What I don't like about this PR is that we have an `Unknown` content type, and I also don't like that there's no compile-time way to assert that it's set to something != `Unknown` when calling the page cache. But, this is what I could come up with before tomorrow’s release, and I think it covers the hot paths. --- pageserver/src/context.rs | 23 +++++ pageserver/src/metrics.rs | 85 ++++++++++++------- pageserver/src/page_cache.rs | 12 ++- .../src/tenant/storage_layer/delta_layer.rs | 14 ++- .../src/tenant/storage_layer/image_layer.rs | 19 ++++- .../tenant/storage_layer/inmemory_layer.rs | 23 ++++- 6 files changed, 126 insertions(+), 50 deletions(-) diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index 2953208d1e..ee331ea154 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -94,6 +94,18 @@ pub struct RequestContext { task_kind: TaskKind, download_behavior: DownloadBehavior, access_stats_behavior: AccessStatsBehavior, + page_content_kind: PageContentKind, +} + +/// The kind of access to the page cache. +#[derive(Clone, Copy, PartialEq, Eq, Debug, enum_map::Enum, strum_macros::IntoStaticStr)] +pub enum PageContentKind { + Unknown, + DeltaLayerBtreeNode, + DeltaLayerValue, + ImageLayerBtreeNode, + ImageLayerValue, + InMemoryLayer, } /// Desired behavior if the operation requires an on-demand download @@ -137,6 +149,7 @@ impl RequestContextBuilder { task_kind, download_behavior: DownloadBehavior::Download, access_stats_behavior: AccessStatsBehavior::Update, + page_content_kind: PageContentKind::Unknown, }, } } @@ -149,6 +162,7 @@ impl RequestContextBuilder { task_kind: original.task_kind, download_behavior: original.download_behavior, access_stats_behavior: original.access_stats_behavior, + page_content_kind: original.page_content_kind, }, } } @@ -167,6 +181,11 @@ impl RequestContextBuilder { self } + pub(crate) fn page_content_kind(mut self, k: PageContentKind) -> Self { + self.inner.page_content_kind = k; + self + } + pub fn build(self) -> RequestContext { self.inner } @@ -263,4 +282,8 @@ impl RequestContext { pub(crate) fn access_stats_behavior(&self) -> AccessStatsBehavior { self.access_stats_behavior } + + pub(crate) fn page_content_kind(&self) -> PageContentKind { + self.page_content_kind + } } diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 1aa168f3be..98dee095a3 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -138,14 +138,14 @@ pub struct PageCacheMetricsForTaskKind { } pub struct PageCacheMetrics { - by_task_kind: EnumMap, + map: EnumMap>, } static PAGE_CACHE_READ_HITS: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_page_cache_read_hits_total", "Number of read accesses to the page cache that hit", - &["task_kind", "key_kind", "hit_kind"] + &["task_kind", "key_kind", "content_kind", "hit_kind"] ) .expect("failed to define a metric") }); @@ -154,52 +154,70 @@ static PAGE_CACHE_READ_ACCESSES: Lazy = Lazy::new(|| { register_int_counter_vec!( "pageserver_page_cache_read_accesses_total", "Number of read accesses to the page cache", - &["task_kind", "key_kind"] + &["task_kind", "key_kind", "content_kind"] ) .expect("failed to define a metric") }); pub static PAGE_CACHE: Lazy = Lazy::new(|| PageCacheMetrics { - by_task_kind: EnumMap::from_array(std::array::from_fn(|task_kind| { + map: EnumMap::from_array(std::array::from_fn(|task_kind| { let task_kind = ::from_usize(task_kind); let task_kind: &'static str = task_kind.into(); - PageCacheMetricsForTaskKind { - read_accesses_materialized_page: { - PAGE_CACHE_READ_ACCESSES - .get_metric_with_label_values(&[task_kind, "materialized_page"]) - .unwrap() - }, + EnumMap::from_array(std::array::from_fn(|content_kind| { + let content_kind = ::from_usize(content_kind); + let content_kind: &'static str = content_kind.into(); + PageCacheMetricsForTaskKind { + read_accesses_materialized_page: { + PAGE_CACHE_READ_ACCESSES + .get_metric_with_label_values(&[ + task_kind, + "materialized_page", + content_kind, + ]) + .unwrap() + }, - read_accesses_immutable: { - PAGE_CACHE_READ_ACCESSES - .get_metric_with_label_values(&[task_kind, "immutable"]) - .unwrap() - }, + read_accesses_immutable: { + PAGE_CACHE_READ_ACCESSES + .get_metric_with_label_values(&[task_kind, "immutable", content_kind]) + .unwrap() + }, - read_hits_immutable: { - PAGE_CACHE_READ_HITS - .get_metric_with_label_values(&[task_kind, "immutable", "-"]) - .unwrap() - }, + read_hits_immutable: { + PAGE_CACHE_READ_HITS + .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"]) + .unwrap() + }, - read_hits_materialized_page_exact: { - PAGE_CACHE_READ_HITS - .get_metric_with_label_values(&[task_kind, "materialized_page", "exact"]) - .unwrap() - }, + read_hits_materialized_page_exact: { + PAGE_CACHE_READ_HITS + .get_metric_with_label_values(&[ + task_kind, + "materialized_page", + content_kind, + "exact", + ]) + .unwrap() + }, - read_hits_materialized_page_older_lsn: { - PAGE_CACHE_READ_HITS - .get_metric_with_label_values(&[task_kind, "materialized_page", "older_lsn"]) - .unwrap() - }, - } + read_hits_materialized_page_older_lsn: { + PAGE_CACHE_READ_HITS + .get_metric_with_label_values(&[ + task_kind, + "materialized_page", + content_kind, + "older_lsn", + ]) + .unwrap() + }, + } + })) })), }); impl PageCacheMetrics { - pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> &PageCacheMetricsForTaskKind { - &self.by_task_kind[task_kind] + pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind { + &self.map[ctx.task_kind()][ctx.page_content_kind()] } } @@ -1283,6 +1301,7 @@ use std::sync::{Arc, Mutex}; use std::task::{Context, Poll}; use std::time::{Duration, Instant}; +use crate::context::{PageContentKind, RequestContext}; use crate::task_mgr::TaskKind; pub struct RemoteTimelineClientMetrics { diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs index 3dcdd73d02..38b169ea85 100644 --- a/pageserver/src/page_cache.rs +++ b/pageserver/src/page_cache.rs @@ -349,7 +349,7 @@ impl PageCache { ctx: &RequestContext, ) -> Option<(Lsn, PageReadGuard)> { crate::metrics::PAGE_CACHE - .for_task_kind(ctx.task_kind()) + .for_ctx(ctx) .read_accesses_materialized_page .inc(); @@ -370,12 +370,12 @@ impl PageCache { { if available_lsn == lsn { crate::metrics::PAGE_CACHE - .for_task_kind(ctx.task_kind()) + .for_ctx(ctx) .read_hits_materialized_page_exact .inc(); } else { crate::metrics::PAGE_CACHE - .for_task_kind(ctx.task_kind()) + .for_ctx(ctx) .read_hits_materialized_page_older_lsn .inc(); } @@ -513,11 +513,9 @@ impl PageCache { } CacheKey::ImmutableFilePage { .. } => ( &crate::metrics::PAGE_CACHE - .for_task_kind(ctx.task_kind()) + .for_ctx(ctx) .read_accesses_immutable, - &crate::metrics::PAGE_CACHE - .for_task_kind(ctx.task_kind()) - .read_hits_immutable, + &crate::metrics::PAGE_CACHE.for_ctx(ctx).read_hits_immutable, ), }; read_access.inc(); diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 9e5b6bd55f..fbc5ecc9c0 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -28,7 +28,7 @@ //! "values" part. //! use crate::config::PageServerConf; -use crate::context::RequestContext; +use crate::context::{PageContentKind, RequestContext, RequestContextBuilder}; use crate::page_cache::PAGE_SZ; use crate::repository::{Key, Value, KEY_SIZE}; use crate::tenant::blob_io::BlobWriter; @@ -915,10 +915,16 @@ impl DeltaLayerInner { !blob_ref.will_init() }, - ctx, + &RequestContextBuilder::extend(ctx) + .page_content_kind(PageContentKind::DeltaLayerBtreeNode) + .build(), ) .await?; + let ctx = &RequestContextBuilder::extend(ctx) + .page_content_kind(PageContentKind::DeltaLayerValue) + .build(); + // Ok, 'offsets' now contains the offsets of all the entries we need to read let cursor = file.block_cursor(); let mut buf = Vec::new(); @@ -1005,7 +1011,9 @@ impl DeltaLayerInner { all_keys.push(entry); true }, - ctx, + &RequestContextBuilder::extend(ctx) + .page_content_kind(PageContentKind::DeltaLayerBtreeNode) + .build(), ) .await?; if let Some(last) = all_keys.last_mut() { diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index f2c73ccf81..a5470a9f9d 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -24,7 +24,7 @@ //! mapping from Key to an offset in the "values" part. The //! actual page images are stored in the "values" part. use crate::config::PageServerConf; -use crate::context::RequestContext; +use crate::context::{PageContentKind, RequestContext, RequestContextBuilder}; use crate::page_cache::PAGE_SZ; use crate::repository::{Key, KEY_SIZE}; use crate::tenant::blob_io::BlobWriter; @@ -484,10 +484,23 @@ impl ImageLayerInner { let mut keybuf: [u8; KEY_SIZE] = [0u8; KEY_SIZE]; key.write_to_byte_slice(&mut keybuf); - if let Some(offset) = tree_reader.get(&keybuf, ctx).await? { + if let Some(offset) = tree_reader + .get( + &keybuf, + &RequestContextBuilder::extend(ctx) + .page_content_kind(PageContentKind::ImageLayerBtreeNode) + .build(), + ) + .await? + { let blob = file .block_cursor() - .read_blob(offset, ctx) + .read_blob( + offset, + &RequestContextBuilder::extend(ctx) + .page_content_kind(PageContentKind::ImageLayerValue) + .build(), + ) .await .with_context(|| format!("failed to read value from offset {}", offset))?; let value = Bytes::from(blob); diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer.rs b/pageserver/src/tenant/storage_layer/inmemory_layer.rs index 62da5a715c..764dc2c64e 100644 --- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs +++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs @@ -5,7 +5,7 @@ //! its position in the file, is kept in memory, though. //! use crate::config::PageServerConf; -use crate::context::RequestContext; +use crate::context::{PageContentKind, RequestContext, RequestContextBuilder}; use crate::repository::{Key, Value}; use crate::tenant::block_io::BlockReader; use crate::tenant::ephemeral_file::EphemeralFile; @@ -163,6 +163,10 @@ impl InMemoryLayer { ensure!(lsn_range.start >= self.start_lsn); let mut need_image = true; + let ctx = RequestContextBuilder::extend(ctx) + .page_content_kind(PageContentKind::InMemoryLayer) + .build(); + let inner = self.inner.read().await; let reader = inner.file.block_cursor(); @@ -171,7 +175,7 @@ impl InMemoryLayer { if let Some(vec_map) = inner.index.get(&key) { let slice = vec_map.slice_range(lsn_range); for (entry_lsn, pos) in slice.iter().rev() { - let buf = reader.read_blob(*pos, ctx).await?; + let buf = reader.read_blob(*pos, &ctx).await?; let value = Value::des(&buf)?; match value { Value::Image(img) => { @@ -281,7 +285,15 @@ impl InMemoryLayer { let mut buf = smallvec::SmallVec::<[u8; 256]>::new(); buf.clear(); val.ser_into(&mut buf)?; - inner.file.write_blob(&buf, ctx).await? + inner + .file + .write_blob( + &buf, + &RequestContextBuilder::extend(ctx) + .page_content_kind(PageContentKind::InMemoryLayer) + .build(), + ) + .await? }; let vec_map = inner.index.entry(key).or_default(); @@ -349,11 +361,14 @@ impl InMemoryLayer { let mut keys: Vec<(&Key, &VecMap)> = inner.index.iter().collect(); keys.sort_by_key(|k| k.0); + let ctx = RequestContextBuilder::extend(ctx) + .page_content_kind(PageContentKind::InMemoryLayer) + .build(); for (key, vec_map) in keys.iter() { let key = **key; // Write all page versions for (lsn, pos) in vec_map.as_slice() { - cursor.read_blob_into_buf(*pos, &mut buf, ctx).await?; + cursor.read_blob_into_buf(*pos, &mut buf, &ctx).await?; let will_init = Value::des(&buf)?.will_init(); delta_layer_writer .put_value_bytes(key, *lsn, &buf, will_init)