From 2e687bca5b4b541bdd483d6b0448367d547059c7 Mon Sep 17 00:00:00 2001 From: Alex Chi Z Date: Wed, 7 Jun 2023 11:28:18 -0400 Subject: [PATCH] refactor: use LayerDesc in layer map (part 1) (#4408) ## Problem part of https://github.com/neondatabase/neon/issues/4392 ## Summary of changes This PR adds a new HashMap that maps persistent layer desc to the layer object *inside* LayerMap. Originally I directly went towards adding such layer cache in Timeline, but the changes are too many and cannot be reviewed as a reasonably-sized PR. Therefore, we take this intermediate step to change part of the codebase to use persistent layer desc, and come up with other PRs to move this hash map of layer desc to the timeline struct. Also, file_size is now part of the layer desc. --------- Signed-off-by: Alex Chi Co-authored-by: bojanserafimov --- pageserver/benches/bench_layer_map.rs | 4 +- pageserver/src/tenant/layer_map.rs | 163 +++++++++++++----- pageserver/src/tenant/storage_layer.rs | 20 ++- .../src/tenant/storage_layer/delta_layer.rs | 16 +- .../src/tenant/storage_layer/image_layer.rs | 32 ++-- .../src/tenant/storage_layer/layer_desc.rs | 69 +++++++- .../src/tenant/storage_layer/remote_layer.rs | 6 +- pageserver/src/tenant/timeline.rs | 27 +-- 8 files changed, 247 insertions(+), 90 deletions(-) diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs index ee5980212e..45dc9fad4a 100644 --- a/pageserver/benches/bench_layer_map.rs +++ b/pageserver/benches/bench_layer_map.rs @@ -33,7 +33,7 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap { min_lsn = min(min_lsn, lsn_range.start); max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1)); - updates.insert_historic(Arc::new(layer)); + updates.insert_historic(layer.get_persistent_layer_desc(), Arc::new(layer)); } println!("min: {min_lsn}, max: {max_lsn}"); @@ -215,7 +215,7 @@ fn bench_sequential(c: &mut Criterion) { is_incremental: false, short_id: format!("Layer {}", i), }; - updates.insert_historic(Arc::new(layer)); + updates.insert_historic(layer.get_persistent_layer_desc(), Arc::new(layer)); } updates.flush(); println!("Finished layer map init in {:?}", now.elapsed()); diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 8d06ccd565..ca1a71b623 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -51,7 +51,9 @@ use crate::keyspace::KeyPartitioning; use crate::repository::Key; use crate::tenant::storage_layer::InMemoryLayer; use crate::tenant::storage_layer::Layer; +use anyhow::Context; use anyhow::Result; +use std::collections::HashMap; use std::collections::VecDeque; use std::ops::Range; use std::sync::Arc; @@ -61,6 +63,8 @@ use historic_layer_coverage::BufferedHistoricLayerCoverage; pub use historic_layer_coverage::Replacement; use super::storage_layer::range_eq; +use super::storage_layer::PersistentLayerDesc; +use super::storage_layer::PersistentLayerKey; /// /// LayerMap tracks what layers exist on a timeline. @@ -86,11 +90,16 @@ pub struct LayerMap { pub frozen_layers: VecDeque>, /// Index of the historic layers optimized for search - historic: BufferedHistoricLayerCoverage>, + historic: BufferedHistoricLayerCoverage>, /// L0 layers have key range Key::MIN..Key::MAX, and locating them using R-Tree search is very inefficient. /// So L0 layers are held in l0_delta_layers vector, in addition to the R-tree. - l0_delta_layers: Vec>, + l0_delta_layers: Vec>, + + /// Mapping from persistent layer key to the actual layer object. Currently, it stores delta, image, and + /// remote layers. In future refactors, this will be eventually moved out of LayerMap into Timeline, and + /// RemoteLayer will be removed. + mapping: HashMap>, } impl Default for LayerMap { @@ -101,6 +110,7 @@ impl Default for LayerMap { frozen_layers: VecDeque::default(), l0_delta_layers: Vec::default(), historic: BufferedHistoricLayerCoverage::default(), + mapping: HashMap::default(), } } } @@ -125,8 +135,9 @@ where /// /// Insert an on-disk layer. /// - pub fn insert_historic(&mut self, layer: Arc) { - self.layer_map.insert_historic_noflush(layer) + // TODO remove the `layer` argument when `mapping` is refactored out of `LayerMap` + pub fn insert_historic(&mut self, layer_desc: PersistentLayerDesc, layer: Arc) { + self.layer_map.insert_historic_noflush(layer_desc, layer) } /// @@ -134,8 +145,8 @@ where /// /// This should be called when the corresponding file on disk has been deleted. /// - pub fn remove_historic(&mut self, layer: Arc) { - self.layer_map.remove_historic_noflush(layer) + pub fn remove_historic(&mut self, layer_desc: PersistentLayerDesc, layer: Arc) { + self.layer_map.remove_historic_noflush(layer_desc, layer) } /// Replaces existing layer iff it is the `expected`. @@ -150,12 +161,15 @@ where /// that we can replace values only by updating a hashmap. pub fn replace_historic( &mut self, + expected_desc: PersistentLayerDesc, expected: &Arc, + new_desc: PersistentLayerDesc, new: Arc, ) -> anyhow::Result>> { fail::fail_point!("layermap-replace-notfound", |_| Ok(Replacement::NotFound)); - self.layer_map.replace_historic_noflush(expected, new) + self.layer_map + .replace_historic_noflush(expected_desc, expected, new_desc, new) } // We will flush on drop anyway, but this method makes it @@ -230,6 +244,7 @@ where (None, None) => None, (None, Some(image)) => { let lsn_floor = image.get_lsn_range().start; + let image = self.get_layer_from_mapping(&image.key()).clone(); Some(SearchResult { layer: image, lsn_floor, @@ -237,6 +252,7 @@ where } (Some(delta), None) => { let lsn_floor = delta.get_lsn_range().start; + let delta = self.get_layer_from_mapping(&delta.key()).clone(); Some(SearchResult { layer: delta, lsn_floor, @@ -247,6 +263,7 @@ where let image_is_newer = image.get_lsn_range().end >= delta.get_lsn_range().end; let image_exact_match = img_lsn + 1 == end_lsn; if image_is_newer || image_exact_match { + let image = self.get_layer_from_mapping(&image.key()).clone(); Some(SearchResult { layer: image, lsn_floor: img_lsn, @@ -254,6 +271,7 @@ where } else { let lsn_floor = std::cmp::max(delta.get_lsn_range().start, image.get_lsn_range().start + 1); + let delta = self.get_layer_from_mapping(&delta.key()).clone(); Some(SearchResult { layer: delta, lsn_floor, @@ -273,16 +291,33 @@ where /// /// Helper function for BatchedUpdates::insert_historic /// - pub(self) fn insert_historic_noflush(&mut self, layer: Arc) { + /// TODO(chi): remove L generic so that we do not need to pass layer object. + pub(self) fn insert_historic_noflush( + &mut self, + layer_desc: PersistentLayerDesc, + layer: Arc, + ) { + self.mapping.insert(layer_desc.key(), layer.clone()); + // TODO: See #3869, resulting #4088, attempted fix and repro #4094 - self.historic.insert( - historic_layer_coverage::LayerKey::from(&*layer), - Arc::clone(&layer), - ); if Self::is_l0(&layer) { - self.l0_delta_layers.push(layer); + self.l0_delta_layers.push(layer_desc.clone().into()); } + + self.historic.insert( + historic_layer_coverage::LayerKey::from(&*layer), + layer_desc.into(), + ); + } + + fn get_layer_from_mapping(&self, key: &PersistentLayerKey) -> &Arc { + let layer = self + .mapping + .get(key) + .with_context(|| format!("{key:?}")) + .expect("inconsistent layer mapping"); + layer } /// @@ -290,14 +325,16 @@ where /// /// Helper function for BatchedUpdates::remove_historic /// - pub fn remove_historic_noflush(&mut self, layer: Arc) { + pub fn remove_historic_noflush(&mut self, layer_desc: PersistentLayerDesc, layer: Arc) { self.historic .remove(historic_layer_coverage::LayerKey::from(&*layer)); - if Self::is_l0(&layer) { let len_before = self.l0_delta_layers.len(); - self.l0_delta_layers - .retain(|other| !Self::compare_arced_layers(other, &layer)); + let mut l0_delta_layers = std::mem::take(&mut self.l0_delta_layers); + l0_delta_layers.retain(|other| { + !Self::compare_arced_layers(self.get_layer_from_mapping(&other.key()), &layer) + }); + self.l0_delta_layers = l0_delta_layers; // this assertion is related to use of Arc::ptr_eq in Self::compare_arced_layers, // there's a chance that the comparison fails at runtime due to it comparing (pointer, // vtable) pairs. @@ -307,11 +344,14 @@ where "failed to locate removed historic layer from l0_delta_layers" ); } + self.mapping.remove(&layer_desc.key()); } pub(self) fn replace_historic_noflush( &mut self, + expected_desc: PersistentLayerDesc, expected: &Arc, + new_desc: PersistentLayerDesc, new: Arc, ) -> anyhow::Result>> { let key = historic_layer_coverage::LayerKey::from(&**expected); @@ -332,10 +372,9 @@ where let l0_index = if expected_l0 { // find the index in case replace worked, we need to replace that as well - let pos = self - .l0_delta_layers - .iter() - .position(|slot| Self::compare_arced_layers(slot, expected)); + let pos = self.l0_delta_layers.iter().position(|slot| { + Self::compare_arced_layers(self.get_layer_from_mapping(&slot.key()), expected) + }); if pos.is_none() { return Ok(Replacement::NotFound); @@ -345,16 +384,28 @@ where None }; - let replaced = self.historic.replace(&key, new.clone(), |existing| { - Self::compare_arced_layers(existing, expected) + let new_desc = Arc::new(new_desc); + let replaced = self.historic.replace(&key, new_desc.clone(), |existing| { + **existing == expected_desc }); if let Replacement::Replaced { .. } = &replaced { + self.mapping.remove(&expected_desc.key()); + self.mapping.insert(new_desc.key(), new); if let Some(index) = l0_index { - self.l0_delta_layers[index] = new; + self.l0_delta_layers[index] = new_desc; } } + let replaced = match replaced { + Replacement::Replaced { in_buffered } => Replacement::Replaced { in_buffered }, + Replacement::NotFound => Replacement::NotFound, + Replacement::RemovalBuffered => Replacement::RemovalBuffered, + Replacement::Unexpected(x) => { + Replacement::Unexpected(self.get_layer_from_mapping(&x.key()).clone()) + } + }; + Ok(replaced) } @@ -383,7 +434,7 @@ where let start = key.start.to_i128(); let end = key.end.to_i128(); - let layer_covers = |layer: Option>| match layer { + let layer_covers = |layer: Option>| match layer { Some(layer) => layer.get_lsn_range().start >= lsn.start, None => false, }; @@ -404,7 +455,9 @@ where } pub fn iter_historic_layers(&self) -> impl '_ + Iterator> { - self.historic.iter() + self.historic + .iter() + .map(|x| self.get_layer_from_mapping(&x.key()).clone()) } /// @@ -436,14 +489,24 @@ where // Loop through the change events and push intervals for (change_key, change_val) in version.image_coverage.range(start..end) { let kr = Key::from_i128(current_key)..Key::from_i128(change_key); - coverage.push((kr, current_val.take())); + coverage.push(( + kr, + current_val + .take() + .map(|l| self.get_layer_from_mapping(&l.key()).clone()), + )); current_key = change_key; current_val = change_val.clone(); } // Add the final interval let kr = Key::from_i128(current_key)..Key::from_i128(end); - coverage.push((kr, current_val.take())); + coverage.push(( + kr, + current_val + .take() + .map(|l| self.get_layer_from_mapping(&l.key()).clone()), + )); Ok(coverage) } @@ -532,7 +595,9 @@ where let kr = Key::from_i128(current_key)..Key::from_i128(change_key); let lr = lsn.start..val.get_lsn_range().start; if !kr.is_empty() { - let base_count = Self::is_reimage_worthy(&val, key) as usize; + let base_count = + Self::is_reimage_worthy(self.get_layer_from_mapping(&val.key()), key) + as usize; let new_limit = limit.map(|l| l - base_count); let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit)?; @@ -555,7 +620,9 @@ where let lr = lsn.start..val.get_lsn_range().start; if !kr.is_empty() { - let base_count = Self::is_reimage_worthy(&val, key) as usize; + let base_count = + Self::is_reimage_worthy(self.get_layer_from_mapping(&val.key()), key) + as usize; let new_limit = limit.map(|l| l - base_count); let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit)?; max_stacked_deltas = std::cmp::max( @@ -706,7 +773,11 @@ where /// Return all L0 delta layers pub fn get_level0_deltas(&self) -> Result>> { - Ok(self.l0_delta_layers.clone()) + Ok(self + .l0_delta_layers + .iter() + .map(|x| self.get_layer_from_mapping(&x.key()).clone()) + .collect()) } /// debugging function to print out the contents of the layer map @@ -809,12 +880,17 @@ mod tests { let layer = LayerDescriptor::from(layer); // same skeletan construction; see scenario below - let not_found: Arc = Arc::new(layer.clone()); - let new_version: Arc = Arc::new(layer); + let not_found = Arc::new(layer.clone()); + let new_version = Arc::new(layer); let mut map = LayerMap::default(); - let res = map.batch_update().replace_historic(¬_found, new_version); + let res = map.batch_update().replace_historic( + not_found.get_persistent_layer_desc(), + ¬_found, + new_version.get_persistent_layer_desc(), + new_version, + ); assert!(matches!(res, Ok(Replacement::NotFound)), "{res:?}"); } @@ -823,8 +899,8 @@ mod tests { let name = LayerFileName::from_str(layer_name).unwrap(); let skeleton = LayerDescriptor::from(name); - let remote: Arc = Arc::new(skeleton.clone()); - let downloaded: Arc = Arc::new(skeleton); + let remote = Arc::new(skeleton.clone()); + let downloaded = Arc::new(skeleton); let mut map = LayerMap::default(); @@ -834,12 +910,18 @@ mod tests { let expected_in_counts = (1, usize::from(expected_l0)); - map.batch_update().insert_historic(remote.clone()); + map.batch_update() + .insert_historic(remote.get_persistent_layer_desc(), remote.clone()); assert_eq!(count_layer_in(&map, &remote), expected_in_counts); let replaced = map .batch_update() - .replace_historic(&remote, downloaded.clone()) + .replace_historic( + remote.get_persistent_layer_desc(), + &remote, + downloaded.get_persistent_layer_desc(), + downloaded.clone(), + ) .expect("name derived attributes are the same"); assert!( matches!(replaced, Replacement::Replaced { .. }), @@ -847,11 +929,12 @@ mod tests { ); assert_eq!(count_layer_in(&map, &downloaded), expected_in_counts); - map.batch_update().remove_historic(downloaded.clone()); + map.batch_update() + .remove_historic(downloaded.get_persistent_layer_desc(), downloaded.clone()); assert_eq!(count_layer_in(&map, &downloaded), (0, 0)); } - fn count_layer_in(map: &LayerMap, layer: &Arc) -> (usize, usize) { + fn count_layer_in(map: &LayerMap, layer: &Arc) -> (usize, usize) { let historic = map .iter_historic_layers() .filter(|x| LayerMap::compare_arced_layers(x, layer)) diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index 7c071463de..6ac4fd9470 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -38,7 +38,7 @@ pub use delta_layer::{DeltaLayer, DeltaLayerWriter}; pub use filename::{DeltaFileName, ImageFileName, LayerFileName}; pub use image_layer::{ImageLayer, ImageLayerWriter}; pub use inmemory_layer::InMemoryLayer; -pub use layer_desc::PersistentLayerDesc; +pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey}; pub use remote_layer::RemoteLayer; use super::layer_map::BatchedUpdates; @@ -454,7 +454,9 @@ pub trait PersistentLayer: Layer { /// /// Should not change over the lifetime of the layer object because /// current_physical_size is computed as the som of this value. - fn file_size(&self) -> u64; + fn file_size(&self) -> u64 { + self.layer_desc().file_size + } fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo; @@ -483,6 +485,20 @@ pub struct LayerDescriptor { pub short_id: String, } +impl LayerDescriptor { + /// `LayerDescriptor` is only used for testing purpose so it does not matter whether it is image / delta, + /// and the tenant / timeline id does not matter. + pub fn get_persistent_layer_desc(&self) -> PersistentLayerDesc { + PersistentLayerDesc::new_delta( + TenantId::from_array([0; 16]), + TimelineId::from_array([0; 16]), + self.key.clone(), + self.lsn.clone(), + 233, + ) + } +} + impl Layer for LayerDescriptor { fn get_key_range(&self) -> Range { self.key.clone() diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 5f2fb1ebea..624fe8dac4 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -182,8 +182,6 @@ pub struct DeltaLayer { pub desc: PersistentLayerDesc, - pub file_size: u64, - access_stats: LayerAccessStats, inner: RwLock, @@ -196,7 +194,7 @@ impl std::fmt::Debug for DeltaLayer { f.debug_struct("DeltaLayer") .field("key_range", &RangeDisplayDebug(&self.desc.key_range)) .field("lsn_range", &self.desc.lsn_range) - .field("file_size", &self.file_size) + .field("file_size", &self.desc.file_size) .field("inner", &self.inner) .finish() } @@ -439,10 +437,6 @@ impl PersistentLayer for DeltaLayer { Ok(()) } - fn file_size(&self) -> u64 { - self.file_size - } - fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo { let layer_file_name = self.filename().file_name(); let lsn_range = self.get_lsn_range(); @@ -451,7 +445,7 @@ impl PersistentLayer for DeltaLayer { HistoricLayerInfo::Delta { layer_file_name, - layer_file_size: self.file_size, + layer_file_size: self.desc.file_size, lsn_start: lsn_range.start, lsn_end: lsn_range.end, remote: false, @@ -602,8 +596,8 @@ impl DeltaLayer { timeline_id, filename.key_range.clone(), filename.lsn_range.clone(), + file_size, ), - file_size, access_stats, inner: RwLock::new(DeltaLayerInner { loaded: false, @@ -634,8 +628,8 @@ impl DeltaLayer { summary.timeline_id, summary.key_range, summary.lsn_range, + metadata.len(), ), - file_size: metadata.len(), access_stats: LayerAccessStats::empty_will_record_residence_event_later(), inner: RwLock::new(DeltaLayerInner { loaded: false, @@ -803,8 +797,8 @@ impl DeltaLayerWriterInner { self.timeline_id, self.key_start..key_end, self.lsn_range.clone(), + metadata.len(), ), - file_size: metadata.len(), access_stats: LayerAccessStats::empty_will_record_residence_event_later(), inner: RwLock::new(DeltaLayerInner { loaded: false, diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index b55dd08a6d..07a16a7de2 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -109,8 +109,6 @@ pub struct ImageLayer { // This entry contains an image of all pages as of this LSN, should be the same as desc.lsn pub lsn: Lsn, - pub file_size: u64, - access_stats: LayerAccessStats, inner: RwLock, @@ -122,7 +120,7 @@ impl std::fmt::Debug for ImageLayer { f.debug_struct("ImageLayer") .field("key_range", &RangeDisplayDebug(&self.desc.key_range)) - .field("file_size", &self.file_size) + .field("file_size", &self.desc.file_size) .field("lsn", &self.lsn) .field("inner", &self.inner) .finish() @@ -258,17 +256,13 @@ impl PersistentLayer for ImageLayer { Ok(()) } - fn file_size(&self) -> u64 { - self.file_size - } - fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo { let layer_file_name = self.filename().file_name(); let lsn_range = self.get_lsn_range(); HistoricLayerInfo::Image { layer_file_name, - layer_file_size: self.file_size, + layer_file_size: self.desc.file_size, lsn_start: lsn_range.start, remote: false, access_stats: self.access_stats.as_api_model(reset), @@ -411,9 +405,9 @@ impl ImageLayer { filename.key_range.clone(), filename.lsn, false, + file_size, ), // Now we assume image layer ALWAYS covers the full range. This may change in the future. lsn: filename.lsn, - file_size, access_stats, inner: RwLock::new(ImageLayerInner { loaded: false, @@ -443,9 +437,9 @@ impl ImageLayer { summary.key_range, summary.lsn, false, + metadata.len(), ), // Now we assume image layer ALWAYS covers the full range. This may change in the future. lsn: summary.lsn, - file_size: metadata.len(), access_stats: LayerAccessStats::empty_will_record_residence_event_later(), inner: RwLock::new(ImageLayerInner { file: None, @@ -578,14 +572,6 @@ impl ImageLayerWriterInner { file.write_all(buf.as_ref())?; } - let desc = PersistentLayerDesc::new_img( - self.tenant_id, - self.timeline_id, - self.key_range.clone(), - self.lsn, - self.is_incremental, // for now, image layer ALWAYS covers the full range - ); - // Fill in the summary on blk 0 let summary = Summary { magic: IMAGE_FILE_MAGIC, @@ -604,6 +590,15 @@ impl ImageLayerWriterInner { .metadata() .context("get metadata to determine file size")?; + let desc = PersistentLayerDesc::new_img( + self.tenant_id, + self.timeline_id, + self.key_range.clone(), + self.lsn, + self.is_incremental, // for now, image layer ALWAYS covers the full range + metadata.len(), + ); + // Note: Because we open the file in write-only mode, we cannot // reuse the same VirtualFile for reading later. That's why we don't // set inner.file here. The first read will have to re-open it. @@ -611,7 +606,6 @@ impl ImageLayerWriterInner { path_or_conf: PathOrConf::Conf(self.conf), desc, lsn: self.lsn, - file_size: metadata.len(), access_stats: LayerAccessStats::empty_will_record_residence_event_later(), inner: RwLock::new(ImageLayerInner { loaded: false, diff --git a/pageserver/src/tenant/storage_layer/layer_desc.rs b/pageserver/src/tenant/storage_layer/layer_desc.rs index a9859681d3..d1cef70253 100644 --- a/pageserver/src/tenant/storage_layer/layer_desc.rs +++ b/pageserver/src/tenant/storage_layer/layer_desc.rs @@ -1,10 +1,11 @@ +use anyhow::Result; use std::ops::Range; use utils::{ id::{TenantId, TimelineId}, lsn::Lsn, }; -use crate::repository::Key; +use crate::{context::RequestContext, repository::Key}; use super::{DeltaFileName, ImageFileName, LayerFileName}; @@ -24,9 +25,27 @@ pub struct PersistentLayerDesc { /// always be equal to `is_delta`. If we land the partial image layer PR someday, image layer could also be /// incremental. pub is_incremental: bool, + /// File size + pub file_size: u64, +} + +/// A unique identifier of a persistent layer within the context of one timeline. +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +pub struct PersistentLayerKey { + pub key_range: Range, + pub lsn_range: Range, + pub is_delta: bool, } impl PersistentLayerDesc { + pub fn key(&self) -> PersistentLayerKey { + PersistentLayerKey { + key_range: self.key_range.clone(), + lsn_range: self.lsn_range.clone(), + is_delta: self.is_delta, + } + } + pub fn short_id(&self) -> String { self.filename().file_name() } @@ -37,6 +56,7 @@ impl PersistentLayerDesc { key_range: Range, lsn: Lsn, is_incremental: bool, + file_size: u64, ) -> Self { Self { tenant_id, @@ -45,6 +65,7 @@ impl PersistentLayerDesc { lsn_range: Self::image_layer_lsn_range(lsn), is_delta: false, is_incremental, + file_size, } } @@ -53,6 +74,7 @@ impl PersistentLayerDesc { timeline_id: TimelineId, key_range: Range, lsn_range: Range, + file_size: u64, ) -> Self { Self { tenant_id, @@ -61,6 +83,7 @@ impl PersistentLayerDesc { lsn_range, is_delta: true, is_incremental: true, + file_size, } } @@ -106,4 +129,48 @@ impl PersistentLayerDesc { self.image_file_name().into() } } + + // TODO: remove this in the future once we refactor timeline APIs. + + pub fn get_lsn_range(&self) -> Range { + self.lsn_range.clone() + } + + pub fn get_key_range(&self) -> Range { + self.key_range.clone() + } + + pub fn get_timeline_id(&self) -> TimelineId { + self.timeline_id + } + + pub fn get_tenant_id(&self) -> TenantId { + self.tenant_id + } + + pub fn is_incremental(&self) -> bool { + self.is_incremental + } + + pub fn is_delta(&self) -> bool { + self.is_delta + } + + pub fn dump(&self, _verbose: bool, _ctx: &RequestContext) -> Result<()> { + println!( + "----- layer for ten {} tli {} keys {}-{} lsn {}-{} ----", + self.tenant_id, + self.timeline_id, + self.key_range.start, + self.key_range.end, + self.lsn_range.start, + self.lsn_range.end + ); + + Ok(()) + } + + pub fn file_size(&self) -> u64 { + self.file_size + } } diff --git a/pageserver/src/tenant/storage_layer/remote_layer.rs b/pageserver/src/tenant/storage_layer/remote_layer.rs index ff0f44da92..387bae5b1f 100644 --- a/pageserver/src/tenant/storage_layer/remote_layer.rs +++ b/pageserver/src/tenant/storage_layer/remote_layer.rs @@ -142,10 +142,6 @@ impl PersistentLayer for RemoteLayer { true } - fn file_size(&self) -> u64 { - self.layer_metadata.file_size() - } - fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo { let layer_file_name = self.filename().file_name(); let lsn_range = self.get_lsn_range(); @@ -190,6 +186,7 @@ impl RemoteLayer { fname.key_range.clone(), fname.lsn, false, + layer_metadata.file_size(), ), layer_metadata: layer_metadata.clone(), ongoing_download: Arc::new(tokio::sync::Semaphore::new(1)), @@ -211,6 +208,7 @@ impl RemoteLayer { timelineid, fname.key_range.clone(), fname.lsn_range.clone(), + layer_metadata.file_size(), ), layer_metadata: layer_metadata.clone(), ongoing_download: Arc::new(tokio::sync::Semaphore::new(1)), diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 507f0de4f3..2a50a26a23 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -1211,7 +1211,12 @@ impl Timeline { ), }); - let replaced = match batch_updates.replace_historic(local_layer, new_remote_layer)? { + let replaced = match batch_updates.replace_historic( + local_layer.layer_desc().clone(), + local_layer, + new_remote_layer.layer_desc().clone(), + new_remote_layer, + )? { Replacement::Replaced { .. } => { if let Err(e) = local_layer.delete_resident_layer_file() { error!("failed to remove layer file on evict after replacement: {e:#?}"); @@ -1607,7 +1612,7 @@ impl Timeline { trace!("found layer {}", layer.path().display()); total_physical_size += file_size; - updates.insert_historic(Arc::new(layer)); + updates.insert_historic(layer.layer_desc().clone(), Arc::new(layer)); num_layers += 1; } else if let Some(deltafilename) = DeltaFileName::parse_str(&fname) { // Create a DeltaLayer struct for each delta file. @@ -1639,7 +1644,7 @@ impl Timeline { trace!("found layer {}", layer.path().display()); total_physical_size += file_size; - updates.insert_historic(Arc::new(layer)); + updates.insert_historic(layer.layer_desc().clone(), Arc::new(layer)); num_layers += 1; } else if fname == METADATA_FILE_NAME || fname.ends_with(".old") { // ignore these @@ -1738,7 +1743,7 @@ impl Timeline { anyhow::bail!("could not rename file {local_layer_path:?}: {err:?}"); } else { self.metrics.resident_physical_size_gauge.sub(local_size); - updates.remove_historic(local_layer); + updates.remove_historic(local_layer.layer_desc().clone(), local_layer); // fall-through to adding the remote layer } } else { @@ -1777,7 +1782,7 @@ impl Timeline { ); let remote_layer = Arc::new(remote_layer); - updates.insert_historic(remote_layer); + updates.insert_historic(remote_layer.layer_desc().clone(), remote_layer); } LayerFileName::Delta(deltafilename) => { // Create a RemoteLayer for the delta file. @@ -1804,7 +1809,7 @@ impl Timeline { ), ); let remote_layer = Arc::new(remote_layer); - updates.insert_historic(remote_layer); + updates.insert_historic(remote_layer.layer_desc().clone(), remote_layer); } } } @@ -2252,7 +2257,7 @@ impl Timeline { // won't be needed for page reconstruction for this timeline, // and mark what we can't delete yet as deleted from the layer // map index without actually rebuilding the index. - updates.remove_historic(layer); + updates.remove_historic(layer.layer_desc().clone(), layer); Ok(()) } @@ -2962,7 +2967,7 @@ impl Timeline { LayerResidenceStatus::Resident, LayerResidenceEventReason::LayerCreate, ); - batch_updates.insert_historic(l); + batch_updates.insert_historic(l.layer_desc().clone(), l); batch_updates.flush(); // update the timeline's physical size @@ -3210,7 +3215,7 @@ impl Timeline { LayerResidenceStatus::Resident, LayerResidenceEventReason::LayerCreate, ); - updates.insert_historic(l); + updates.insert_historic(l.layer_desc().clone(), l); } updates.flush(); drop(layers); @@ -3657,7 +3662,7 @@ impl Timeline { LayerResidenceStatus::Resident, LayerResidenceEventReason::LayerCreate, ); - updates.insert_historic(x); + updates.insert_historic(x.layer_desc().clone(), x); } // Now that we have reshuffled the data to set of new delta layers, we can @@ -4192,7 +4197,7 @@ impl Timeline { { use crate::tenant::layer_map::Replacement; let l: Arc = remote_layer.clone(); - let failure = match updates.replace_historic(&l, new_layer) { + let failure = match updates.replace_historic(l.layer_desc().clone(), &l, new_layer.layer_desc().clone(), new_layer) { Ok(Replacement::Replaced { .. }) => false, Ok(Replacement::NotFound) => { // TODO: the downloaded file should probably be removed, otherwise