From ca154d9cd843dcc10d234266be0effff091e71e7 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 8 May 2024 17:50:21 +0100 Subject: [PATCH] pageserver: local layer path followups (#7640) - Rename "filename" types which no longer map directly to a filename (LayerFileName -> LayerName) - Add a -v1- part to local layer paths to smooth the path to future updates (we anticipate a -v2- that uses checksums later) - Rename methods that refer to the string-ized version of a LayerName to no longer be called "filename" - Refactor reconcile() function to use a LocalLayerFileMetadata type that includes the local path, rather than carrying local path separately in a tuple and unwrap()'ing it later. --- pageserver/benches/bench_layer_map.rs | 4 +- pageserver/ctl/src/index_part.rs | 4 +- pageserver/src/deletion_queue.rs | 16 +-- pageserver/src/deletion_queue/list_writer.rs | 4 +- pageserver/src/disk_usage_eviction_task.rs | 8 +- pageserver/src/http/routes.rs | 6 +- .../src/tenant/remote_timeline_client.rs | 64 +++++----- .../tenant/remote_timeline_client/download.rs | 4 +- .../tenant/remote_timeline_client/index.rs | 6 +- pageserver/src/tenant/secondary.rs | 4 +- pageserver/src/tenant/secondary/downloader.rs | 12 +- pageserver/src/tenant/secondary/heatmap.rs | 8 +- pageserver/src/tenant/storage_layer.rs | 20 +-- .../src/tenant/storage_layer/delta_layer.rs | 14 ++- .../src/tenant/storage_layer/image_layer.rs | 21 ++-- pageserver/src/tenant/storage_layer/layer.rs | 39 +++--- .../src/tenant/storage_layer/layer_desc.rs | 28 ++--- .../{filename.rs => layer_name.rs} | 115 +++++++++--------- pageserver/src/tenant/timeline.rs | 71 +++++------ .../src/tenant/timeline/detach_ancestor.rs | 2 +- pageserver/src/tenant/timeline/init.rs | 93 ++++++++------ .../src/tenant/timeline/layer_manager.rs | 2 +- pageserver/src/tenant/upload_queue.rs | 8 +- s3_scrubber/src/checks.rs | 23 ++-- s3_scrubber/src/tenant_snapshot.rs | 18 +-- test_runner/fixtures/neon_fixtures.py | 4 +- test_runner/fixtures/pageserver/types.py | 29 +++-- .../regress/test_layers_from_future.py | 8 +- .../regress/test_pageserver_generations.py | 4 +- 29 files changed, 324 insertions(+), 315 deletions(-) rename pageserver/src/tenant/storage_layer/{filename.rs => layer_name.rs} (72%) diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs index 5d05af0c00..1d02aa7709 100644 --- a/pageserver/benches/bench_layer_map.rs +++ b/pageserver/benches/bench_layer_map.rs @@ -1,7 +1,7 @@ use pageserver::keyspace::{KeyPartitioning, KeySpace}; use pageserver::repository::Key; use pageserver::tenant::layer_map::LayerMap; -use pageserver::tenant::storage_layer::LayerFileName; +use pageserver::tenant::storage_layer::LayerName; use pageserver::tenant::storage_layer::PersistentLayerDesc; use pageserver_api::shard::TenantShardId; use rand::prelude::{SeedableRng, SliceRandom, StdRng}; @@ -28,7 +28,7 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap { let mut updates = layer_map.batch_update(); for fname in filenames { let fname = fname.unwrap(); - let fname = LayerFileName::from_str(&fname).unwrap(); + let fname = LayerName::from_str(&fname).unwrap(); let layer = PersistentLayerDesc::from(fname); let lsn_range = layer.get_lsn_range(); diff --git a/pageserver/ctl/src/index_part.rs b/pageserver/ctl/src/index_part.rs index 20e5572914..0d010eb009 100644 --- a/pageserver/ctl/src/index_part.rs +++ b/pageserver/ctl/src/index_part.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use anyhow::Context; use camino::Utf8PathBuf; use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata; -use pageserver::tenant::storage_layer::LayerFileName; +use pageserver::tenant::storage_layer::LayerName; use pageserver::tenant::{metadata::TimelineMetadata, IndexPart}; use utils::lsn::Lsn; @@ -19,7 +19,7 @@ pub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> { let des: IndexPart = IndexPart::from_s3_bytes(&bytes).context("deserialize")?; #[derive(serde::Serialize)] struct Output<'a> { - layer_metadata: &'a HashMap, + layer_metadata: &'a HashMap, disk_consistent_lsn: Lsn, timeline_metadata: &'a TimelineMetadata, } diff --git a/pageserver/src/deletion_queue.rs b/pageserver/src/deletion_queue.rs index e3c11cb299..c937309d83 100644 --- a/pageserver/src/deletion_queue.rs +++ b/pageserver/src/deletion_queue.rs @@ -38,7 +38,7 @@ use deleter::DeleterMessage; use list_writer::ListWriterQueueMessage; use validator::ValidatorQueueMessage; -use crate::{config::PageServerConf, tenant::storage_layer::LayerFileName}; +use crate::{config::PageServerConf, tenant::storage_layer::LayerName}; // TODO: configurable for how long to wait before executing deletions @@ -479,7 +479,7 @@ impl DeletionQueueClient { tenant_shard_id: TenantShardId, timeline_id: TimelineId, current_generation: Generation, - layers: Vec<(LayerFileName, LayerFileMetadata)>, + layers: Vec<(LayerName, LayerFileMetadata)>, ) -> Result<(), DeletionQueueError> { if current_generation.is_none() { debug!("Enqueuing deletions in legacy mode, skipping queue"); @@ -511,7 +511,7 @@ impl DeletionQueueClient { tenant_shard_id: TenantShardId, timeline_id: TimelineId, current_generation: Generation, - layers: Vec<(LayerFileName, LayerFileMetadata)>, + layers: Vec<(LayerName, LayerFileMetadata)>, ) -> Result<(), DeletionQueueError> { metrics::DELETION_QUEUE .keys_submitted @@ -734,20 +734,20 @@ mod test { use crate::{ control_plane_client::RetryForeverError, repository::Key, - tenant::{harness::TenantHarness, storage_layer::DeltaFileName}, + tenant::{harness::TenantHarness, storage_layer::DeltaLayerName}, }; use super::*; pub const TIMELINE_ID: TimelineId = TimelineId::from_array(hex!("11223344556677881122334455667788")); - pub const EXAMPLE_LAYER_NAME: LayerFileName = LayerFileName::Delta(DeltaFileName { + pub const EXAMPLE_LAYER_NAME: LayerName = LayerName::Delta(DeltaLayerName { key_range: Key::from_i128(0x0)..Key::from_i128(0xFFFFFFFFFFFFFFFF), lsn_range: Lsn(0x00000000016B59D8)..Lsn(0x00000000016B5A51), }); // When you need a second layer in a test. - pub const EXAMPLE_LAYER_NAME_ALT: LayerFileName = LayerFileName::Delta(DeltaFileName { + pub const EXAMPLE_LAYER_NAME_ALT: LayerName = LayerName::Delta(DeltaLayerName { key_range: Key::from_i128(0x0)..Key::from_i128(0xFFFFFFFFFFFFFFFF), lsn_range: Lsn(0x00000000016B5A51)..Lsn(0x00000000016B5A61), }); @@ -797,7 +797,7 @@ mod test { /// Returns remote layer file name, suitable for use in assert_remote_files fn write_remote_layer( &self, - file_name: LayerFileName, + file_name: LayerName, gen: Generation, ) -> anyhow::Result { let tenant_shard_id = self.harness.tenant_shard_id; @@ -952,7 +952,7 @@ mod test { let client = ctx.deletion_queue.new_client(); client.recover(HashMap::new())?; - let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(); + let layer_file_name_1: LayerName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(); let tenant_shard_id = ctx.harness.tenant_shard_id; let content: Vec = "victim1 contents".into(); diff --git a/pageserver/src/deletion_queue/list_writer.rs b/pageserver/src/deletion_queue/list_writer.rs index 3a3d600ac2..ae3b2c9180 100644 --- a/pageserver/src/deletion_queue/list_writer.rs +++ b/pageserver/src/deletion_queue/list_writer.rs @@ -34,7 +34,7 @@ use crate::deletion_queue::TEMP_SUFFIX; use crate::metrics; use crate::tenant::remote_timeline_client::remote_layer_path; use crate::tenant::remote_timeline_client::LayerFileMetadata; -use crate::tenant::storage_layer::LayerFileName; +use crate::tenant::storage_layer::LayerName; use crate::virtual_file::on_fatal_io_error; use crate::virtual_file::MaybeFatalIo; @@ -59,7 +59,7 @@ pub(super) struct DeletionOp { // `layers` and `objects` are both just lists of objects. `layers` is used if you do not // have a config object handy to project it to a remote key, and need the consuming worker // to do it for you. - pub(super) layers: Vec<(LayerFileName, LayerFileMetadata)>, + pub(super) layers: Vec<(LayerName, LayerFileMetadata)>, pub(super) objects: Vec, /// The _current_ generation of the Tenant shard attachment in which we are enqueuing diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs index 400930245b..ebeb8bbb20 100644 --- a/pageserver/src/disk_usage_eviction_task.rs +++ b/pageserver/src/disk_usage_eviction_task.rs @@ -64,7 +64,7 @@ use crate::{ mgr::TenantManager, remote_timeline_client::LayerFileMetadata, secondary::SecondaryTenant, - storage_layer::{AsLayerDesc, EvictionError, Layer, LayerFileName}, + storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName}, }, }; @@ -604,7 +604,7 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl( pub(crate) struct EvictionSecondaryLayer { pub(crate) secondary_tenant: Arc, pub(crate) timeline_id: TimelineId, - pub(crate) name: LayerFileName, + pub(crate) name: LayerName, pub(crate) metadata: LayerFileMetadata, } @@ -637,9 +637,9 @@ impl EvictionLayer { } } - pub(crate) fn get_name(&self) -> LayerFileName { + pub(crate) fn get_name(&self) -> LayerName { match self { - Self::Attached(l) => l.layer_desc().filename(), + Self::Attached(l) => l.layer_desc().layer_name(), Self::Secondary(sl) => sl.name.clone(), } } diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 83b7b8a45e..a8ca642dc5 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -63,7 +63,7 @@ use crate::tenant::remote_timeline_client::list_remote_timelines; use crate::tenant::secondary::SecondaryController; use crate::tenant::size::ModelInputs; use crate::tenant::storage_layer::LayerAccessStatsReset; -use crate::tenant::storage_layer::LayerFileName; +use crate::tenant::storage_layer::LayerName; use crate::tenant::timeline::CompactFlags; use crate::tenant::timeline::Timeline; use crate::tenant::SpawnMode; @@ -1229,7 +1229,7 @@ async fn layer_download_handler( let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; let layer_file_name = get_request_param(&request, "layer_file_name")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; - let layer_name = LayerFileName::from_str(layer_file_name) + let layer_name = LayerName::from_str(layer_file_name) .map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?; let state = get_state(&request); @@ -1261,7 +1261,7 @@ async fn evict_timeline_layer_handler( let layer_file_name = get_request_param(&request, "layer_file_name")?; let state = get_state(&request); - let layer_name = LayerFileName::from_str(layer_file_name) + let layer_name = LayerName::from_str(layer_file_name) .map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?; let timeline = diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 356a0dc51c..bbe4e16378 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -240,7 +240,7 @@ use utils::id::{TenantId, TimelineId}; use self::index::IndexPart; use super::metadata::MetadataUpdate; -use super::storage_layer::{Layer, LayerFileName, ResidentLayer}; +use super::storage_layer::{Layer, LayerName, ResidentLayer}; use super::upload_queue::SetDeletedFlagProgress; use super::Generation; @@ -503,7 +503,7 @@ impl RemoteTimelineClient { /// On success, returns the size of the downloaded file. pub async fn download_layer_file( &self, - layer_file_name: &LayerFileName, + layer_file_name: &LayerName, layer_metadata: &LayerFileMetadata, cancel: &CancellationToken, ctx: &RequestContext, @@ -677,7 +677,7 @@ impl RemoteTimelineClient { for layer in layers { upload_queue .latest_files - .insert(layer.layer_desc().filename(), layer.metadata()); + .insert(layer.layer_desc().layer_name(), layer.metadata()); } self.schedule_index_upload(upload_queue); @@ -713,7 +713,7 @@ impl RemoteTimelineClient { upload_queue .latest_files - .insert(layer.layer_desc().filename(), metadata.clone()); + .insert(layer.layer_desc().layer_name(), metadata.clone()); upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1; info!( @@ -737,7 +737,7 @@ impl RemoteTimelineClient { /// successfully. pub fn schedule_layer_file_deletion( self: &Arc, - names: &[LayerFileName], + names: &[LayerName], ) -> anyhow::Result<()> { let mut guard = self.upload_queue.lock().unwrap(); let upload_queue = guard.initialized_mut()?; @@ -765,7 +765,7 @@ impl RemoteTimelineClient { // the layer files as "dangling". this is fine, at worst case we create work for the // scrubber. - let names = gc_layers.iter().map(|x| x.layer_desc().filename()); + let names = gc_layers.iter().map(|x| x.layer_desc().layer_name()); self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names); @@ -780,9 +780,9 @@ impl RemoteTimelineClient { self: &Arc, upload_queue: &mut UploadQueueInitialized, names: I, - ) -> Vec<(LayerFileName, LayerFileMetadata)> + ) -> Vec<(LayerName, LayerFileMetadata)> where - I: IntoIterator, + I: IntoIterator, { // Decorate our list of names with each name's metadata, dropping // names that are unexpectedly missing from our metadata. This metadata @@ -832,7 +832,7 @@ impl RemoteTimelineClient { /// `index_part.json` with [`Self::schedule_gc_update`] or [`Self::schedule_compaction_update`]. pub(crate) fn schedule_deletion_of_unlinked( self: &Arc, - layers: Vec<(LayerFileName, LayerFileMetadata)>, + layers: Vec<(LayerName, LayerFileMetadata)>, ) -> anyhow::Result<()> { let mut guard = self.upload_queue.lock().unwrap(); let upload_queue = guard.initialized_mut()?; @@ -845,7 +845,7 @@ impl RemoteTimelineClient { fn schedule_deletion_of_unlinked0( self: &Arc, upload_queue: &mut UploadQueueInitialized, - mut with_metadata: Vec<(LayerFileName, LayerFileMetadata)>, + mut with_metadata: Vec<(LayerName, LayerFileMetadata)>, ) { // Filter out any layers which were not created by this tenant shard. These are // layers that originate from some ancestor shard after a split, and may still @@ -914,7 +914,7 @@ impl RemoteTimelineClient { self.schedule_layer_file_upload0(upload_queue, layer.clone()); } - let names = compacted_from.iter().map(|x| x.layer_desc().filename()); + let names = compacted_from.iter().map(|x| x.layer_desc().layer_name()); self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names); self.launch_queued_tasks(upload_queue); @@ -1144,7 +1144,7 @@ impl RemoteTimelineClient { &self.tenant_shard_id.tenant_id, &self.timeline_id, self.tenant_shard_id.to_index(), - &uploaded.layer_desc().filename(), + &uploaded.layer_desc().layer_name(), uploaded.metadata().generation, ); @@ -1185,7 +1185,7 @@ impl RemoteTimelineClient { .get_timeline_id() .expect("Source timeline should be alive"), self.tenant_shard_id.to_index(), - &adopted.layer_desc().filename(), + &adopted.layer_desc().layer_name(), adopted.metadata().generation, ); @@ -1193,7 +1193,7 @@ impl RemoteTimelineClient { &self.tenant_shard_id.tenant_id, &self.timeline_id, self.tenant_shard_id.to_index(), - &adopted_as.layer_desc().filename(), + &adopted_as.layer_desc().layer_name(), adopted_as.metadata().generation, ); @@ -1527,7 +1527,7 @@ impl RemoteTimelineClient { &self.tenant_shard_id.tenant_id, &self.timeline_id, layer_metadata.shard, - &layer.layer_desc().filename(), + &layer.layer_desc().layer_name(), layer_metadata.generation, ); @@ -1896,14 +1896,14 @@ pub fn remote_layer_path( tenant_id: &TenantId, timeline_id: &TimelineId, shard: ShardIndex, - layer_file_name: &LayerFileName, + layer_file_name: &LayerName, generation: Generation, ) -> RemotePath { // Generation-aware key format let path = format!( "tenants/{tenant_id}{0}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{1}{2}", shard.get_suffix(), - layer_file_name.file_name(), + layer_file_name, generation.get_suffix() ); @@ -2000,8 +2000,8 @@ mod tests { TimelineMetadata::from_bytes(&metadata.to_bytes().unwrap()).unwrap() } - fn assert_file_list(a: &HashSet, b: &[&str]) { - let mut avec: Vec = a.iter().map(|x| x.file_name()).collect(); + fn assert_file_list(a: &HashSet, b: &[&str]) { + let mut avec: Vec = a.iter().map(|x| x.to_string()).collect(); avec.sort(); let mut bvec = b.to_vec(); @@ -2127,7 +2127,7 @@ mod tests { .layer_metadata .keys() .map(|f| f.to_owned()) - .collect::>(); + .collect::>(); let initial_layer = { assert!(initial_layers.len() == 1); initial_layers.into_iter().next().unwrap() @@ -2153,7 +2153,7 @@ mod tests { ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59DA-00000000016B5A53".parse().unwrap(), dummy_contents("baz")) ] .into_iter() - .map(|(name, contents): (LayerFileName, Vec)| { + .map(|(name, contents): (LayerName, Vec)| { let local_path = local_layer_path( harness.conf, @@ -2234,9 +2234,9 @@ mod tests { .map(|f| f.to_owned()) .collect(), &[ - &initial_layer.file_name(), - &layers[0].layer_desc().filename().file_name(), - &layers[1].layer_desc().filename().file_name(), + &initial_layer.to_string(), + &layers[0].layer_desc().layer_name().to_string(), + &layers[1].layer_desc().layer_name().to_string(), ], ); assert_eq!(index_part.metadata, metadata); @@ -2250,7 +2250,7 @@ mod tests { // keep using schedule_layer_file_deletion because we don't have a way to wait for the // spawn_blocking started by the drop. client - .schedule_layer_file_deletion(&[layers[0].layer_desc().filename()]) + .schedule_layer_file_deletion(&[layers[0].layer_desc().layer_name()]) .unwrap(); { let mut guard = client.upload_queue.lock().unwrap(); @@ -2268,9 +2268,9 @@ mod tests { } assert_remote_files( &[ - &initial_layer.file_name(), - &layers[0].layer_desc().filename().file_name(), - &layers[1].layer_desc().filename().file_name(), + &initial_layer.to_string(), + &layers[0].layer_desc().layer_name().to_string(), + &layers[1].layer_desc().layer_name().to_string(), "index_part.json", ], &remote_timeline_dir, @@ -2283,9 +2283,9 @@ mod tests { assert_remote_files( &[ - &initial_layer.file_name(), - &layers[1].layer_desc().filename().file_name(), - &layers[2].layer_desc().filename().file_name(), + &initial_layer.to_string(), + &layers[1].layer_desc().layer_name().to_string(), + &layers[2].layer_desc().layer_name().to_string(), "index_part.json", ], &remote_timeline_dir, @@ -2305,7 +2305,7 @@ mod tests { } = TestSetup::new("metrics").await.unwrap(); let client = timeline.remote_client.as_ref().unwrap(); - let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(); + let layer_file_name_1: LayerName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(); let local_path = local_layer_path( harness.conf, &timeline.tenant_shard_id, diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index c86b22d481..b464437422 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -22,7 +22,7 @@ use crate::context::RequestContext; use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path}; use crate::tenant::storage_layer::layer::local_layer_path; -use crate::tenant::storage_layer::LayerFileName; +use crate::tenant::storage_layer::LayerName; use crate::tenant::Generation; use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile}; use crate::TEMP_FILE_SUFFIX; @@ -48,7 +48,7 @@ pub async fn download_layer_file<'a>( storage: &'a GenericRemoteStorage, tenant_shard_id: TenantShardId, timeline_id: TimelineId, - layer_file_name: &'a LayerFileName, + layer_file_name: &'a LayerName, layer_metadata: &'a LayerFileMetadata, cancel: &CancellationToken, ctx: &RequestContext, diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs index 7e0619945f..3e05905afa 100644 --- a/pageserver/src/tenant/remote_timeline_client/index.rs +++ b/pageserver/src/tenant/remote_timeline_client/index.rs @@ -8,7 +8,7 @@ use chrono::NaiveDateTime; use serde::{Deserialize, Serialize}; use crate::tenant::metadata::TimelineMetadata; -use crate::tenant::storage_layer::LayerFileName; +use crate::tenant::storage_layer::LayerName; use crate::tenant::upload_queue::UploadQueueInitialized; use crate::tenant::Generation; use pageserver_api::shard::ShardIndex; @@ -75,7 +75,7 @@ pub struct IndexPart { /// /// Older versions of `IndexPart` will not have this property or have only a part of metadata /// that latest version stores. - pub layer_metadata: HashMap, + pub layer_metadata: HashMap, // 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata. // It's duplicated for convenience when reading the serialized structure, but is @@ -104,7 +104,7 @@ impl IndexPart { pub const FILE_NAME: &'static str = "index_part.json"; fn new( - layers_and_metadata: &HashMap, + layers_and_metadata: &HashMap, disk_consistent_lsn: Lsn, metadata: TimelineMetadata, ) -> Self { diff --git a/pageserver/src/tenant/secondary.rs b/pageserver/src/tenant/secondary.rs index 0bb25f0ace..7075044baf 100644 --- a/pageserver/src/tenant/secondary.rs +++ b/pageserver/src/tenant/secondary.rs @@ -23,7 +23,7 @@ use super::{ mgr::TenantManager, remote_timeline_client::LayerFileMetadata, span::debug_assert_current_span_has_tenant_id, - storage_layer::{layer::local_layer_path, LayerFileName}, + storage_layer::{layer::local_layer_path, LayerName}, }; use pageserver_api::{ @@ -182,7 +182,7 @@ impl SecondaryTenant { self: &Arc, conf: &PageServerConf, timeline_id: TimelineId, - name: LayerFileName, + name: LayerName, metadata: LayerFileMetadata, ) { debug_assert_current_span_has_tenant_id(); diff --git a/pageserver/src/tenant/secondary/downloader.rs b/pageserver/src/tenant/secondary/downloader.rs index 092630e74d..2a8f83be95 100644 --- a/pageserver/src/tenant/secondary/downloader.rs +++ b/pageserver/src/tenant/secondary/downloader.rs @@ -22,7 +22,7 @@ use crate::{ FAILED_REMOTE_OP_RETRIES, }, span::debug_assert_current_span_has_tenant_id, - storage_layer::{layer::local_layer_path, LayerFileName}, + storage_layer::{layer::local_layer_path, LayerName}, tasks::{warn_when_period_overrun, BackgroundLoopKind}, }, virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile}, @@ -111,7 +111,7 @@ impl OnDiskState { _conf: &'static PageServerConf, _tenant_shard_id: &TenantShardId, _imeline_id: &TimelineId, - _ame: LayerFileName, + _ame: LayerName, metadata: LayerFileMetadata, access_time: SystemTime, ) -> Self { @@ -124,10 +124,10 @@ impl OnDiskState { #[derive(Debug, Clone, Default)] pub(super) struct SecondaryDetailTimeline { - pub(super) on_disk_layers: HashMap, + pub(super) on_disk_layers: HashMap, /// We remember when layers were evicted, to prevent re-downloading them. - pub(super) evicted_at: HashMap, + pub(super) evicted_at: HashMap, } /// This state is written by the secondary downloader, it is opaque @@ -997,7 +997,7 @@ async fn init_timeline_state( // As we iterate through layers found on disk, we will look up their metadata from this map. // Layers not present in metadata will be discarded. - let heatmap_metadata: HashMap<&LayerFileName, &HeatMapLayer> = + let heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> = heatmap.layers.iter().map(|l| (&l.name, l)).collect(); while let Some(dentry) = dir @@ -1034,7 +1034,7 @@ async fn init_timeline_state( continue; } - match LayerFileName::from_str(file_name) { + match LayerName::from_str(file_name) { Ok(name) => { let remote_meta = heatmap_metadata.get(&name); match remote_meta { diff --git a/pageserver/src/tenant/secondary/heatmap.rs b/pageserver/src/tenant/secondary/heatmap.rs index 73cdf6c6d4..ca91ec24c6 100644 --- a/pageserver/src/tenant/secondary/heatmap.rs +++ b/pageserver/src/tenant/secondary/heatmap.rs @@ -1,8 +1,6 @@ use std::time::SystemTime; -use crate::tenant::{ - remote_timeline_client::index::IndexLayerMetadata, storage_layer::LayerFileName, -}; +use crate::tenant::{remote_timeline_client::index::IndexLayerMetadata, storage_layer::LayerName}; use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr, TimestampSeconds}; @@ -31,7 +29,7 @@ pub(crate) struct HeatMapTimeline { #[serde_as] #[derive(Serialize, Deserialize)] pub(crate) struct HeatMapLayer { - pub(super) name: LayerFileName, + pub(super) name: LayerName, pub(super) metadata: IndexLayerMetadata, #[serde_as(as = "TimestampSeconds")] @@ -42,7 +40,7 @@ pub(crate) struct HeatMapLayer { impl HeatMapLayer { pub(crate) fn new( - name: LayerFileName, + name: LayerName, metadata: IndexLayerMetadata, access_time: SystemTime, ) -> Self { diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index 4f1b56ef9f..94a5e9ec47 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -1,11 +1,11 @@ //! Common traits and structs for layers pub mod delta_layer; -mod filename; pub mod image_layer; pub(crate) mod inmemory_layer; pub(crate) mod layer; mod layer_desc; +mod layer_name; use crate::context::{AccessStatsBehavior, RequestContext}; use crate::repository::Value; @@ -34,10 +34,10 @@ use utils::rate_limit::RateLimit; use utils::{id::TimelineId, lsn::Lsn}; pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef}; -pub use filename::{DeltaFileName, ImageFileName, LayerFileName}; pub use image_layer::{ImageLayer, ImageLayerWriter}; pub use inmemory_layer::InMemoryLayer; pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey}; +pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName}; pub(crate) use layer::{EvictionError, Layer, ResidentLayer}; @@ -646,8 +646,8 @@ pub mod tests { use super::*; - impl From for PersistentLayerDesc { - fn from(value: DeltaFileName) -> Self { + impl From for PersistentLayerDesc { + fn from(value: DeltaLayerName) -> Self { PersistentLayerDesc::new_delta( TenantShardId::from([0; 18]), TimelineId::from_array([0; 16]), @@ -658,8 +658,8 @@ pub mod tests { } } - impl From for PersistentLayerDesc { - fn from(value: ImageFileName) -> Self { + impl From for PersistentLayerDesc { + fn from(value: ImageLayerName) -> Self { PersistentLayerDesc::new_img( TenantShardId::from([0; 18]), TimelineId::from_array([0; 16]), @@ -670,11 +670,11 @@ pub mod tests { } } - impl From for PersistentLayerDesc { - fn from(value: LayerFileName) -> Self { + impl From for PersistentLayerDesc { + fn from(value: LayerName) -> Self { match value { - LayerFileName::Delta(d) => Self::from(d), - LayerFileName::Image(i) => Self::from(i), + LayerName::Delta(d) => Self::from(d), + LayerName::Image(i) => Self::from(i), } } } diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 6fd96b0e2f..c38c9bb656 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -57,6 +57,7 @@ use std::fs::File; use std::io::SeekFrom; use std::ops::Range; use std::os::unix::fs::FileExt; +use std::str::FromStr; use std::sync::Arc; use tokio::sync::OnceCell; use tracing::*; @@ -68,7 +69,8 @@ use utils::{ }; use super::{ - AsLayerDesc, LayerAccessStats, PersistentLayerDesc, ResidentLayer, ValuesReconstructState, + AsLayerDesc, LayerAccessStats, LayerName, PersistentLayerDesc, ResidentLayer, + ValuesReconstructState, }; /// @@ -309,13 +311,13 @@ impl DeltaLayer { .and_then(|res| res)?; // not production code - let actual_filename = path.file_name().unwrap().to_owned(); - let expected_filename = self.layer_desc().filename().file_name(); + let actual_layer_name = LayerName::from_str(path.file_name().unwrap()).unwrap(); + let expected_layer_name = self.layer_desc().layer_name(); - if actual_filename != expected_filename { + if actual_layer_name != expected_layer_name { println!("warning: filename does not match what is expected from in-file summary"); - println!("actual: {:?}", actual_filename); - println!("expected: {:?}", expected_filename); + println!("actual: {:?}", actual_layer_name.to_string()); + println!("expected: {:?}", expected_layer_name.to_string()); } Ok(Arc::new(loaded)) diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 1477a1fc33..c9874873e4 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -54,6 +54,7 @@ use std::fs::File; use std::io::SeekFrom; use std::ops::Range; use std::os::unix::prelude::FileExt; +use std::str::FromStr; use std::sync::Arc; use tokio::sync::OnceCell; use tokio_stream::StreamExt; @@ -65,8 +66,10 @@ use utils::{ lsn::Lsn, }; -use super::filename::ImageFileName; -use super::{AsLayerDesc, Layer, PersistentLayerDesc, ResidentLayer, ValuesReconstructState}; +use super::layer_name::ImageLayerName; +use super::{ + AsLayerDesc, Layer, LayerName, PersistentLayerDesc, ResidentLayer, ValuesReconstructState, +}; /// /// Header stored in the beginning of the file @@ -231,7 +234,7 @@ impl ImageLayer { conf: &PageServerConf, timeline_id: TimelineId, tenant_shard_id: TenantShardId, - fname: &ImageFileName, + fname: &ImageLayerName, ) -> Utf8PathBuf { let rand_string: String = rand::thread_rng() .sample_iter(&Alphanumeric) @@ -267,13 +270,13 @@ impl ImageLayer { .and_then(|res| res)?; // not production code - let actual_filename = path.file_name().unwrap().to_owned(); - let expected_filename = self.layer_desc().filename().file_name(); + let actual_layer_name = LayerName::from_str(path.file_name().unwrap()).unwrap(); + let expected_layer_name = self.layer_desc().layer_name(); - if actual_filename != expected_filename { + if actual_layer_name != expected_layer_name { println!("warning: filename does not match what is expected from in-file summary"); - println!("actual: {:?}", actual_filename); - println!("expected: {:?}", expected_filename); + println!("actual: {:?}", actual_layer_name.to_string()); + println!("expected: {:?}", expected_layer_name.to_string()); } Ok(loaded) @@ -635,7 +638,7 @@ impl ImageLayerWriterInner { conf, timeline_id, tenant_shard_id, - &ImageFileName { + &ImageLayerName { key_range: key_range.clone(), lsn, }, diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index b5e69db7f4..b5b0260327 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -25,7 +25,7 @@ use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline}; use super::delta_layer::{self, DeltaEntry}; use super::image_layer; use super::{ - AsLayerDesc, LayerAccessStats, LayerAccessStatsReset, LayerFileName, PersistentLayerDesc, + AsLayerDesc, LayerAccessStats, LayerAccessStatsReset, LayerName, PersistentLayerDesc, ValueReconstructResult, ValueReconstructState, ValuesReconstructState, }; @@ -128,19 +128,20 @@ pub(crate) fn local_layer_path( conf: &PageServerConf, tenant_shard_id: &TenantShardId, timeline_id: &TimelineId, - layer_file_name: &LayerFileName, + layer_file_name: &LayerName, _generation: &Generation, ) -> Utf8PathBuf { let timeline_path = conf.timeline_path(tenant_shard_id, timeline_id); - timeline_path.join(layer_file_name.file_name()) + timeline_path.join(layer_file_name.to_string()) - // TOOD: include generation in the name in now+1 releases. - // timeline_path.join(format!( - // "{}{}", - // layer_file_name.file_name(), - // generation.get_suffix() - // )) + // TODO: switch to enabling new-style layer paths after next release + // if generation.is_none() { + // // Without a generation, we may only use legacy path style + // timeline_path.join(layer_file_name.to_string()) + // } else { + // timeline_path.join(format!("{}-v1{}", layer_file_name, generation.get_suffix())) + // } } impl Layer { @@ -148,7 +149,7 @@ impl Layer { pub(crate) fn for_evicted( conf: &'static PageServerConf, timeline: &Arc, - file_name: LayerFileName, + file_name: LayerName, metadata: LayerFileMetadata, ) -> Self { let local_path = local_layer_path( @@ -189,7 +190,7 @@ impl Layer { conf: &'static PageServerConf, timeline: &Arc, local_path: Utf8PathBuf, - file_name: LayerFileName, + file_name: LayerName, metadata: LayerFileMetadata, ) -> ResidentLayer { let desc = PersistentLayerDesc::from_filename( @@ -261,7 +262,7 @@ impl Layer { conf, &timeline.tenant_shard_id, &timeline.timeline_id, - &desc.filename(), + &desc.layer_name(), &timeline.generation, ); @@ -689,7 +690,7 @@ impl Drop for LayerInner { let span = tracing::info_span!(parent: None, "layer_delete", tenant_id = %self.layer_desc().tenant_shard_id.tenant_id, shard_id=%self.layer_desc().tenant_shard_id.shard_slug(), timeline_id = %self.layer_desc().timeline_id); let path = std::mem::take(&mut self.path); - let file_name = self.layer_desc().filename(); + let file_name = self.layer_desc().layer_name(); let file_size = self.layer_desc().file_size; let timeline = self.timeline.clone(); let meta = self.metadata(); @@ -782,7 +783,9 @@ impl LayerInner { LayerInner { conf, - debug_str: { format!("timelines/{}/{}", timeline.timeline_id, desc.filename()).into() }, + debug_str: { + format!("timelines/{}/{}", timeline.timeline_id, desc.layer_name()).into() + }, path: local_path, desc, timeline: Arc::downgrade(timeline), @@ -1120,7 +1123,7 @@ impl LayerInner { let result = client .download_layer_file( - &self.desc.filename(), + &self.desc.layer_name(), &self.metadata(), &timeline.cancel, ctx, @@ -1257,7 +1260,7 @@ impl LayerInner { } fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo { - let layer_file_name = self.desc.filename().file_name(); + let layer_name = self.desc.layer_name().to_string(); let resident = self .inner @@ -1271,7 +1274,7 @@ impl LayerInner { let lsn_range = &self.desc.lsn_range; HistoricLayerInfo::Delta { - layer_file_name, + layer_file_name: layer_name, layer_file_size: self.desc.file_size, lsn_start: lsn_range.start, lsn_end: lsn_range.end, @@ -1282,7 +1285,7 @@ impl LayerInner { let lsn = self.desc.image_layer_lsn(); HistoricLayerInfo::Image { - layer_file_name, + layer_file_name: layer_name, layer_file_size: self.desc.file_size, lsn_start: lsn, remote: !resident, diff --git a/pageserver/src/tenant/storage_layer/layer_desc.rs b/pageserver/src/tenant/storage_layer/layer_desc.rs index c375923e81..a89b66e4a1 100644 --- a/pageserver/src/tenant/storage_layer/layer_desc.rs +++ b/pageserver/src/tenant/storage_layer/layer_desc.rs @@ -5,7 +5,7 @@ use utils::{id::TimelineId, lsn::Lsn}; use crate::repository::Key; -use super::{DeltaFileName, ImageFileName, LayerFileName}; +use super::{DeltaLayerName, ImageLayerName, LayerName}; use serde::{Deserialize, Serialize}; @@ -51,7 +51,7 @@ impl PersistentLayerDesc { } pub fn short_id(&self) -> impl Display { - self.filename() + self.layer_name() } #[cfg(test)] @@ -103,14 +103,14 @@ impl PersistentLayerDesc { pub fn from_filename( tenant_shard_id: TenantShardId, timeline_id: TimelineId, - filename: LayerFileName, + filename: LayerName, file_size: u64, ) -> Self { match filename { - LayerFileName::Image(i) => { + LayerName::Image(i) => { Self::new_img(tenant_shard_id, timeline_id, i.key_range, i.lsn, file_size) } - LayerFileName::Delta(d) => Self::new_delta( + LayerName::Delta(d) => Self::new_delta( tenant_shard_id, timeline_id, d.key_range, @@ -132,34 +132,34 @@ impl PersistentLayerDesc { lsn..(lsn + 1) } - /// Get a delta file name for this layer. + /// Get a delta layer name for this layer. /// /// Panic: if this is not a delta layer. - pub fn delta_file_name(&self) -> DeltaFileName { + pub fn delta_layer_name(&self) -> DeltaLayerName { assert!(self.is_delta); - DeltaFileName { + DeltaLayerName { key_range: self.key_range.clone(), lsn_range: self.lsn_range.clone(), } } - /// Get a delta file name for this layer. + /// Get a image layer name for this layer. /// /// Panic: if this is not an image layer, or the lsn range is invalid - pub fn image_file_name(&self) -> ImageFileName { + pub fn image_layer_name(&self) -> ImageLayerName { assert!(!self.is_delta); assert!(self.lsn_range.start + 1 == self.lsn_range.end); - ImageFileName { + ImageLayerName { key_range: self.key_range.clone(), lsn: self.lsn_range.start, } } - pub fn filename(&self) -> LayerFileName { + pub fn layer_name(&self) -> LayerName { if self.is_delta { - self.delta_file_name().into() + self.delta_layer_name().into() } else { - self.image_file_name().into() + self.image_layer_name().into() } } diff --git a/pageserver/src/tenant/storage_layer/filename.rs b/pageserver/src/tenant/storage_layer/layer_name.rs similarity index 72% rename from pageserver/src/tenant/storage_layer/filename.rs rename to pageserver/src/tenant/storage_layer/layer_name.rs index fff66a9d07..c733404693 100644 --- a/pageserver/src/tenant/storage_layer/filename.rs +++ b/pageserver/src/tenant/storage_layer/layer_name.rs @@ -15,29 +15,29 @@ use super::PersistentLayerDesc; // Note: Timeline::load_layer_map() relies on this sort order #[derive(PartialEq, Eq, Clone, Hash)] -pub struct DeltaFileName { +pub struct DeltaLayerName { pub key_range: Range, pub lsn_range: Range, } -impl std::fmt::Debug for DeltaFileName { +impl std::fmt::Debug for DeltaLayerName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use super::RangeDisplayDebug; - f.debug_struct("DeltaFileName") + f.debug_struct("DeltaLayerName") .field("key_range", &RangeDisplayDebug(&self.key_range)) .field("lsn_range", &self.lsn_range) .finish() } } -impl PartialOrd for DeltaFileName { +impl PartialOrd for DeltaLayerName { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl Ord for DeltaFileName { +impl Ord for DeltaLayerName { fn cmp(&self, other: &Self) -> Ordering { let mut cmp = self.key_range.start.cmp(&other.key_range.start); if cmp != Ordering::Equal { @@ -57,16 +57,14 @@ impl Ord for DeltaFileName { } } -/// Represents the filename of a DeltaLayer +/// Represents the region of the LSN-Key space covered by a DeltaLayer /// /// ```text /// -__- /// ``` -impl DeltaFileName { - /// - /// Parse a string as a delta file name. Returns None if the filename does not - /// match the expected pattern. - /// +impl DeltaLayerName { + /// Parse the part of a delta layer's file name that represents the LayerName. Returns None + /// if the filename does not match the expected pattern. pub fn parse_str(fname: &str) -> Option { let mut parts = fname.split("__"); let mut key_parts = parts.next()?.split('-'); @@ -105,14 +103,14 @@ impl DeltaFileName { // or panic? } - Some(DeltaFileName { + Some(DeltaLayerName { key_range: key_start..key_end, lsn_range: start_lsn..end_lsn, }) } } -impl fmt::Display for DeltaFileName { +impl fmt::Display for DeltaLayerName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, @@ -126,29 +124,29 @@ impl fmt::Display for DeltaFileName { } #[derive(PartialEq, Eq, Clone, Hash)] -pub struct ImageFileName { +pub struct ImageLayerName { pub key_range: Range, pub lsn: Lsn, } -impl std::fmt::Debug for ImageFileName { +impl std::fmt::Debug for ImageLayerName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use super::RangeDisplayDebug; - f.debug_struct("ImageFileName") + f.debug_struct("ImageLayerName") .field("key_range", &RangeDisplayDebug(&self.key_range)) .field("lsn", &self.lsn) .finish() } } -impl PartialOrd for ImageFileName { +impl PartialOrd for ImageLayerName { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl Ord for ImageFileName { +impl Ord for ImageLayerName { fn cmp(&self, other: &Self) -> Ordering { let mut cmp = self.key_range.start.cmp(&other.key_range.start); if cmp != Ordering::Equal { @@ -164,7 +162,7 @@ impl Ord for ImageFileName { } } -impl ImageFileName { +impl ImageLayerName { pub fn lsn_as_range(&self) -> Range { // Saves from having to copypaste this all over PersistentLayerDesc::image_layer_lsn_range(self.lsn) @@ -172,16 +170,14 @@ impl ImageFileName { } /// -/// Represents the filename of an ImageLayer +/// Represents the part of the Key-LSN space covered by an ImageLayer /// /// ```text /// -__ /// ``` -impl ImageFileName { - /// - /// Parse a string as an image file name. Returns None if the filename does not - /// match the expected pattern. - /// +impl ImageLayerName { + /// Parse a string as then LayerName part of an image layer file name. Returns None if the + /// filename does not match the expected pattern. pub fn parse_str(fname: &str) -> Option { let mut parts = fname.split("__"); let mut key_parts = parts.next()?.split('-'); @@ -202,14 +198,14 @@ impl ImageFileName { let lsn = Lsn::from_hex(lsn_str).ok()?; - Some(ImageFileName { + Some(ImageLayerName { key_range: key_start..key_end, lsn, }) } } -impl fmt::Display for ImageFileName { +impl fmt::Display for ImageLayerName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, @@ -220,21 +216,24 @@ impl fmt::Display for ImageFileName { ) } } + +/// LayerName is the logical identity of a layer within a LayerMap at a moment in time. The +/// LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations +/// over time (e.g. across shard splits or compression). The physical filenames of layers in local +/// storage and object names in remote storage consist of the LayerName plus some extra qualifiers +/// that uniquely identify the physical incarnation of a layer (see [crate::tenant::remote_timeline_client::remote_layer_path]) +/// and [`crate::tenant::storage_layer::layer::local_layer_path`]) #[derive(Debug, PartialEq, Eq, Hash, Clone)] -pub enum LayerFileName { - Image(ImageFileName), - Delta(DeltaFileName), +pub enum LayerName { + Image(ImageLayerName), + Delta(DeltaLayerName), } -impl LayerFileName { - pub fn file_name(&self) -> String { - self.to_string() - } - +impl LayerName { /// Determines if this layer file is considered to be in future meaning we will discard these /// layers during timeline initialization from the given disk_consistent_lsn. pub(crate) fn is_in_future(&self, disk_consistent_lsn: Lsn) -> bool { - use LayerFileName::*; + use LayerName::*; match self { Image(file_name) if file_name.lsn > disk_consistent_lsn => true, Delta(file_name) if file_name.lsn_range.end > disk_consistent_lsn + 1 => true, @@ -243,7 +242,7 @@ impl LayerFileName { } pub(crate) fn kind(&self) -> &'static str { - use LayerFileName::*; + use LayerName::*; match self { Delta(_) => "delta", Image(_) => "image", @@ -251,7 +250,7 @@ impl LayerFileName { } } -impl fmt::Display for LayerFileName { +impl fmt::Display for LayerName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Image(fname) => write!(f, "{fname}"), @@ -260,25 +259,25 @@ impl fmt::Display for LayerFileName { } } -impl From for LayerFileName { - fn from(fname: ImageFileName) -> Self { +impl From for LayerName { + fn from(fname: ImageLayerName) -> Self { Self::Image(fname) } } -impl From for LayerFileName { - fn from(fname: DeltaFileName) -> Self { +impl From for LayerName { + fn from(fname: DeltaLayerName) -> Self { Self::Delta(fname) } } -impl FromStr for LayerFileName { +impl FromStr for LayerName { type Err = String; /// Conversion from either a physical layer filename, or the string-ization of /// Self. When loading a physical layer filename, we drop any extra information /// not needed to build Self. fn from_str(value: &str) -> Result { - let gen_suffix_regex = Regex::new("^(?.+)-(?[0-9a-f]{8})$").unwrap(); + let gen_suffix_regex = Regex::new("^(?.+)(?-v1-[0-9a-f]{8})$").unwrap(); let file_name: Cow = match gen_suffix_regex.captures(value) { Some(captures) => captures .name("base") @@ -288,8 +287,8 @@ impl FromStr for LayerFileName { None => value.into(), }; - let delta = DeltaFileName::parse_str(&file_name); - let image = ImageFileName::parse_str(&file_name); + let delta = DeltaLayerName::parse_str(&file_name); + let image = ImageLayerName::parse_str(&file_name); let ok = match (delta, image) { (None, None) => { return Err(format!( @@ -304,7 +303,7 @@ impl FromStr for LayerFileName { } } -impl serde::Serialize for LayerFileName { +impl serde::Serialize for LayerName { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, @@ -316,19 +315,19 @@ impl serde::Serialize for LayerFileName { } } -impl<'de> serde::Deserialize<'de> for LayerFileName { +impl<'de> serde::Deserialize<'de> for LayerName { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { - deserializer.deserialize_string(LayerFileNameVisitor) + deserializer.deserialize_string(LayerNameVisitor) } } -struct LayerFileNameVisitor; +struct LayerNameVisitor; -impl<'de> serde::de::Visitor<'de> for LayerFileNameVisitor { - type Value = LayerFileName; +impl<'de> serde::de::Visitor<'de> for LayerNameVisitor { + type Value = LayerName; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { write!( @@ -349,16 +348,16 @@ mod test { use super::*; #[test] fn image_layer_parse() -> anyhow::Result<()> { - let expected = LayerFileName::Image(ImageFileName { + let expected = LayerName::Image(ImageLayerName { key_range: Key::from_i128(0) ..Key::from_hex("000000067F00000001000004DF0000000006").unwrap(), lsn: Lsn::from_hex("00000000014FED58").unwrap(), }); - let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-00000001").map_err(|s| anyhow::anyhow!(s))?; + let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-v1-00000001").map_err(|s| anyhow::anyhow!(s))?; assert_eq!(parsed, expected,); // Omitting generation suffix is valid - let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58").map_err(|s| anyhow::anyhow!(s))?; + let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58").map_err(|s| anyhow::anyhow!(s))?; assert_eq!(parsed, expected,); Ok(()) @@ -366,17 +365,17 @@ mod test { #[test] fn delta_layer_parse() -> anyhow::Result<()> { - let expected = LayerFileName::Delta(DeltaFileName { + let expected = LayerName::Delta(DeltaLayerName { key_range: Key::from_i128(0) ..Key::from_hex("000000067F00000001000004DF0000000006").unwrap(), lsn_range: Lsn::from_hex("00000000014FED58").unwrap() ..Lsn::from_hex("000000000154C481").unwrap(), }); - let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-00000001").map_err(|s| anyhow::anyhow!(s))?; + let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-v1-00000001").map_err(|s| anyhow::anyhow!(s))?; assert_eq!(parsed, expected); // Omitting generation suffix is valid - let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481").map_err(|s| anyhow::anyhow!(s))?; + let parsed = LayerName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481").map_err(|s| anyhow::anyhow!(s))?; assert_eq!(parsed, expected); Ok(()) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index d6d012c70c..7edb922069 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -60,7 +60,7 @@ use std::{ ops::ControlFlow, }; -use crate::tenant::storage_layer::layer::local_layer_path; +use crate::tenant::timeline::init::LocalLayerFileMetadata; use crate::tenant::{ layer_map::{LayerMap, SearchResult}, metadata::TimelineMetadata, @@ -75,7 +75,7 @@ use crate::{ disk_usage_eviction_task::finite_f32, tenant::storage_layer::{ AsLayerDesc, DeltaLayerWriter, EvictionError, ImageLayerWriter, InMemoryLayer, Layer, - LayerAccessStatsReset, LayerFileName, ResidentLayer, ValueReconstructResult, + LayerAccessStatsReset, LayerName, ResidentLayer, ValueReconstructResult, ValueReconstructState, ValuesReconstructState, }, }; @@ -1905,7 +1905,7 @@ impl Timeline { #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))] pub(crate) async fn download_layer( &self, - layer_file_name: &LayerFileName, + layer_file_name: &LayerName, ) -> anyhow::Result> { let Some(layer) = self.find_layer(layer_file_name).await else { return Ok(None); @@ -1925,7 +1925,7 @@ impl Timeline { /// Returns `Ok(None)` in the case where the layer could not be found by its `layer_file_name`. pub(crate) async fn evict_layer( &self, - layer_file_name: &LayerFileName, + layer_file_name: &LayerName, ) -> anyhow::Result> { let _gate = self .gate @@ -2387,13 +2387,13 @@ impl Timeline { index_part: Option, ) -> anyhow::Result<()> { use init::{Decision::*, Discovered, DismissedLayer}; - use LayerFileName::*; + use LayerName::*; let mut guard = self.layers.write().await; let timer = self.metrics.load_layer_map_histo.start_timer(); - // Scan timeline directory and create ImageFileName and DeltaFilename + // Scan timeline directory and create ImageLayerName and DeltaFilename // structs representing all files on disk let timeline_path = self .conf @@ -2463,33 +2463,35 @@ impl Timeline { let mut needs_cleanup = Vec::new(); let mut total_physical_size = 0; - for (name, local_path, decision) in decided { + for (name, decision) in decided { let decision = match decision { Ok(UseRemote { local, remote }) => { // Remote is authoritative, but we may still choose to retain // the local file if the contents appear to match - if local.file_size() == remote.file_size() { + if local.metadata.file_size() == remote.file_size() { // Use the local file, but take the remote metadata so that we pick up // the correct generation. - UseLocal(remote) + UseLocal( + LocalLayerFileMetadata { + metadata: remote, + local_path: local.local_path + } + ) } else { - let local_path = local_path.as_ref().expect("Locally found layer must have path"); - init::cleanup_local_file_for_remote(local_path, &local, &remote)?; + init::cleanup_local_file_for_remote(&local, &remote)?; UseRemote { local, remote } } } Ok(decision) => decision, Err(DismissedLayer::Future { local }) => { - if local.is_some() { - let local_path = local_path.expect("Locally found layer must have path"); - init::cleanup_future_layer(&local_path, &name, disk_consistent_lsn)?; + if let Some(local) = local { + init::cleanup_future_layer(&local.local_path, &name, disk_consistent_lsn)?; } needs_cleanup.push(name); continue; } Err(DismissedLayer::LocalOnly(local)) => { - let local_path = local_path.expect("Locally found layer must have path"); - init::cleanup_local_only_file(&local_path, &name, &local)?; + init::cleanup_local_only_file(&name, &local)?; // this file never existed remotely, we will have to do rework continue; } @@ -2503,20 +2505,9 @@ impl Timeline { tracing::debug!(layer=%name, ?decision, "applied"); let layer = match decision { - UseLocal(m) => { - total_physical_size += m.file_size(); - - let local_path = local_path.unwrap_or_else(|| { - local_layer_path( - conf, - &this.tenant_shard_id, - &this.timeline_id, - &name, - &m.generation, - ) - }); - - Layer::for_resident(conf, &this, local_path, name, m).drop_eviction_guard() + UseLocal(local) => { + total_physical_size += local.metadata.file_size(); + Layer::for_resident(conf, &this, local.local_path, name, local.metadata).drop_eviction_guard() } Evicted(remote) | UseRemote { remote, .. } => { Layer::for_evicted(conf, &this, name, remote) @@ -2997,10 +2988,10 @@ impl Timeline { } } - async fn find_layer(&self, layer_name: &LayerFileName) -> Option { + async fn find_layer(&self, layer_name: &LayerName) -> Option { let guard = self.layers.read().await; for historic_layer in guard.layer_map().iter_historic_layers() { - let historic_layer_name = historic_layer.filename(); + let historic_layer_name = historic_layer.layer_name(); if layer_name == &historic_layer_name { return Some(guard.get_from_desc(&historic_layer)); } @@ -3030,7 +3021,7 @@ impl Timeline { let last_activity_ts = layer.access_stats().latest_activity_or_now(); HeatMapLayer::new( - layer.layer_desc().filename(), + layer.layer_desc().layer_name(), (&layer.metadata()).into(), last_activity_ts, ) @@ -3177,7 +3168,7 @@ impl Timeline { if let Some(open_layer) = &layers.open_layer { let start_lsn = open_layer.get_lsn_range().start; if cont_lsn > start_lsn { - //info!("CHECKING for {} at {} on open layer {}", key, cont_lsn, open_layer.filename().display()); + //info!("CHECKING for {} at {} on open layer {}", key, cont_lsn, open_layer.layer_name().display()); // Get all the data needed to reconstruct the page version from this layer. // But if we have an older cached page image, no need to go past that. let lsn_floor = max(cached_lsn + 1, start_lsn); @@ -3206,7 +3197,7 @@ impl Timeline { for frozen_layer in layers.frozen_layers.iter().rev() { let start_lsn = frozen_layer.get_lsn_range().start; if cont_lsn > start_lsn { - //info!("CHECKING for {} at {} on frozen layer {}", key, cont_lsn, frozen_layer.filename().display()); + //info!("CHECKING for {} at {} on frozen layer {}", key, cont_lsn, frozen_layer.layer_name().display()); let lsn_floor = max(cached_lsn + 1, start_lsn); let frozen_layer = frozen_layer.clone(); @@ -4731,7 +4722,7 @@ impl Timeline { if l.get_lsn_range().end > horizon_cutoff { debug!( "keeping {} because it's newer than horizon_cutoff {}", - l.filename(), + l.layer_name(), horizon_cutoff, ); result.layers_needed_by_cutoff += 1; @@ -4742,7 +4733,7 @@ impl Timeline { if l.get_lsn_range().end > pitr_cutoff { debug!( "keeping {} because it's newer than pitr_cutoff {}", - l.filename(), + l.layer_name(), pitr_cutoff, ); result.layers_needed_by_pitr += 1; @@ -4761,7 +4752,7 @@ impl Timeline { if &l.get_lsn_range().start <= retain_lsn { debug!( "keeping {} because it's still might be referenced by child branch forked at {} is_dropped: xx is_incremental: {}", - l.filename(), + l.layer_name(), retain_lsn, l.is_incremental(), ); @@ -4792,7 +4783,7 @@ impl Timeline { if !layers .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff)) { - debug!("keeping {} because it is the latest layer", l.filename()); + debug!("keeping {} because it is the latest layer", l.layer_name()); result.layers_not_updated += 1; continue 'outer; } @@ -4800,7 +4791,7 @@ impl Timeline { // We didn't find any reason to keep this file, so remove it. debug!( "garbage collecting {} is_dropped: xx is_incremental: {}", - l.filename(), + l.layer_name(), l.is_incremental(), ); layers_to_remove.push(l); diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index 5c2b25da56..2641bf3d13 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -407,7 +407,7 @@ async fn remote_copy( let owned = crate::tenant::storage_layer::Layer::for_evicted( adoptee.conf, adoptee, - adopted.layer_desc().filename(), + adopted.layer_desc().layer_name(), metadata, ); diff --git a/pageserver/src/tenant/timeline/init.rs b/pageserver/src/tenant/timeline/init.rs index 9c33981807..66aa765015 100644 --- a/pageserver/src/tenant/timeline/init.rs +++ b/pageserver/src/tenant/timeline/init.rs @@ -6,7 +6,7 @@ use crate::{ self, index::{IndexPart, LayerFileMetadata}, }, - storage_layer::LayerFileName, + storage_layer::LayerName, Generation, }, METADATA_FILE_NAME, @@ -20,7 +20,7 @@ use utils::lsn::Lsn; /// Identified files in the timeline directory. pub(super) enum Discovered { /// The only one we care about - Layer(LayerFileName, Utf8PathBuf, u64), + Layer(LayerName, Utf8PathBuf, u64), /// Old ephmeral files from previous launches, should be removed Ephemeral(String), /// Old temporary timeline files, unsure what these really are, should be removed @@ -43,7 +43,7 @@ pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result { let file_size = direntry.metadata()?.len(); Discovered::Layer(file_name, direntry.path().to_owned(), file_size) @@ -72,6 +72,28 @@ pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result Self { + Self { + local_path, + metadata: LayerFileMetadata::new(file_size, generation, shard), + } + } +} + /// Decision on what to do with a layer file after considering its local and remote metadata. #[derive(Clone, Debug)] pub(super) enum Decision { @@ -80,11 +102,11 @@ pub(super) enum Decision { /// The layer is present locally, but local metadata does not match remote; we must /// delete it and treat it as evicted. UseRemote { - local: LayerFileMetadata, + local: LocalLayerFileMetadata, remote: LayerFileMetadata, }, /// The layer is present locally, and metadata matches. - UseLocal(LayerFileMetadata), + UseLocal(LocalLayerFileMetadata), } /// A layer needs to be left out of the layer map. @@ -92,39 +114,29 @@ pub(super) enum Decision { pub(super) enum DismissedLayer { /// The related layer is is in future compared to disk_consistent_lsn, it must not be loaded. Future { - /// The local metadata. `None` if the layer is only known through [`IndexPart`]. - local: Option, + /// `None` if the layer is only known through [`IndexPart`]. + local: Option, }, /// The layer only exists locally. /// /// In order to make crash safe updates to layer map, we must dismiss layers which are only /// found locally or not yet included in the remote `index_part.json`. - LocalOnly(LayerFileMetadata), + LocalOnly(LocalLayerFileMetadata), } /// Merges local discoveries and remote [`IndexPart`] to a collection of decisions. pub(super) fn reconcile( - discovered: Vec<(LayerFileName, Utf8PathBuf, u64)>, + discovered: Vec<(LayerName, Utf8PathBuf, u64)>, index_part: Option<&IndexPart>, disk_consistent_lsn: Lsn, generation: Generation, shard: ShardIndex, -) -> Vec<( - LayerFileName, - Option, - Result, -)> { +) -> Vec<(LayerName, Result)> { use Decision::*; - // name => (local_path, local_metadata, remote_metadata) - type Collected = HashMap< - LayerFileName, - ( - Option, - Option, - Option, - ), - >; + // name => (local_metadata, remote_metadata) + type Collected = + HashMap, Option)>; let mut discovered = discovered .into_iter() @@ -135,8 +147,9 @@ pub(super) fn reconcile( // it is not in IndexPart, in which case using our current generation makes sense // because it will be uploaded in this generation. ( - Some(local_path), - Some(LayerFileMetadata::new(file_size, generation, shard)), + Some(LocalLayerFileMetadata::new( + local_path, file_size, generation, shard, + )), None, ), ) @@ -152,20 +165,20 @@ pub(super) fn reconcile( .map(|(name, metadata)| (name, LayerFileMetadata::from(metadata))) .for_each(|(name, metadata)| { if let Some(existing) = discovered.get_mut(name) { - existing.2 = Some(metadata); + existing.1 = Some(metadata); } else { - discovered.insert(name.to_owned(), (None, None, Some(metadata))); + discovered.insert(name.to_owned(), (None, Some(metadata))); } }); discovered .into_iter() - .map(|(name, (local_path, local, remote))| { + .map(|(name, (local, remote))| { let decision = if name.is_in_future(disk_consistent_lsn) { Err(DismissedLayer::Future { local }) } else { match (local, remote) { - (Some(local), Some(remote)) if local != remote => { + (Some(local), Some(remote)) if local.metadata != remote => { Ok(UseRemote { local, remote }) } (Some(x), Some(_)) => Ok(UseLocal(x)), @@ -177,7 +190,7 @@ pub(super) fn reconcile( } }; - (name, local_path, decision) + (name, decision) }) .collect::>() } @@ -189,12 +202,12 @@ pub(super) fn cleanup(path: &Utf8Path, kind: &str) -> anyhow::Result<()> { } pub(super) fn cleanup_local_file_for_remote( - path: &Utf8Path, - local: &LayerFileMetadata, + local: &LocalLayerFileMetadata, remote: &LayerFileMetadata, ) -> anyhow::Result<()> { - let local_size = local.file_size(); + let local_size = local.metadata.file_size(); let remote_size = remote.file_size(); + let path = &local.local_path; let file_name = path.file_name().expect("must be file path"); tracing::warn!("removing local file {file_name:?} because it has unexpected length {local_size}; length in remote index is {remote_size}"); @@ -211,7 +224,7 @@ pub(super) fn cleanup_local_file_for_remote( pub(super) fn cleanup_future_layer( path: &Utf8Path, - name: &LayerFileName, + name: &LayerName, disk_consistent_lsn: Lsn, ) -> anyhow::Result<()> { // future image layers are allowed to be produced always for not yet flushed to disk @@ -223,12 +236,14 @@ pub(super) fn cleanup_future_layer( } pub(super) fn cleanup_local_only_file( - path: &Utf8Path, - name: &LayerFileName, - local: &LayerFileMetadata, + name: &LayerName, + local: &LocalLayerFileMetadata, ) -> anyhow::Result<()> { let kind = name.kind(); - tracing::info!("found local-only {kind} layer {name}, metadata {local:?}"); - std::fs::remove_file(path)?; + tracing::info!( + "found local-only {kind} layer {name}, metadata {:?}", + local.metadata + ); + std::fs::remove_file(&local.local_path)?; Ok(()) } diff --git a/pageserver/src/tenant/timeline/layer_manager.rs b/pageserver/src/tenant/timeline/layer_manager.rs index 8e8d64e0c6..a72eb1b3bf 100644 --- a/pageserver/src/tenant/timeline/layer_manager.rs +++ b/pageserver/src/tenant/timeline/layer_manager.rs @@ -294,7 +294,7 @@ impl LayerFileManager { // A layer's descriptor is present in the LayerMap => the LayerFileManager contains a layer for the descriptor. self.0 .get(&desc.key()) - .with_context(|| format!("get layer from desc: {}", desc.filename())) + .with_context(|| format!("get layer from desc: {}", desc.layer_name())) .expect("not found") .clone() } diff --git a/pageserver/src/tenant/upload_queue.rs b/pageserver/src/tenant/upload_queue.rs index 0bf4d1e599..7797117e0f 100644 --- a/pageserver/src/tenant/upload_queue.rs +++ b/pageserver/src/tenant/upload_queue.rs @@ -1,4 +1,4 @@ -use super::storage_layer::LayerFileName; +use super::storage_layer::LayerName; use super::storage_layer::ResidentLayer; use crate::tenant::metadata::TimelineMetadata; use crate::tenant::remote_timeline_client::index::IndexPart; @@ -45,7 +45,7 @@ pub(crate) struct UploadQueueInitialized { /// All layer files stored in the remote storage, taking into account all /// in-progress and queued operations - pub(crate) latest_files: HashMap, + pub(crate) latest_files: HashMap, /// How many file uploads or deletions been scheduled, since the /// last (scheduling of) metadata index upload? @@ -89,7 +89,7 @@ pub(crate) struct UploadQueueInitialized { /// Putting this behind a testing feature to catch problems in tests, but assuming we could have a /// bug causing leaks, then it's better to not leave this enabled for production builds. #[cfg(feature = "testing")] - pub(crate) dangling_files: HashMap, + pub(crate) dangling_files: HashMap, /// Set to true when we have inserted the `UploadOp::Shutdown` into the `inprogress_tasks`. pub(crate) shutting_down: bool, @@ -281,7 +281,7 @@ pub(crate) struct UploadTask { /// for timeline deletion, which skips this queue and goes directly to DeletionQueue. #[derive(Debug)] pub(crate) struct Delete { - pub(crate) layers: Vec<(LayerFileName, LayerFileMetadata)>, + pub(crate) layers: Vec<(LayerName, LayerFileMetadata)>, } #[derive(Debug)] diff --git a/s3_scrubber/src/checks.rs b/s3_scrubber/src/checks.rs index 7c0f699958..68133fc0a9 100644 --- a/s3_scrubber/src/checks.rs +++ b/s3_scrubber/src/checks.rs @@ -13,7 +13,7 @@ use crate::metadata_stream::stream_listing; use crate::{download_object_with_retries, RootTarget, TenantShardTimelineId}; use futures_util::StreamExt; use pageserver::tenant::remote_timeline_client::parse_remote_index_path; -use pageserver::tenant::storage_layer::LayerFileName; +use pageserver::tenant::storage_layer::LayerName; use pageserver::tenant::IndexPart; use remote_storage::RemotePath; @@ -110,7 +110,7 @@ pub(crate) fn branch_cleanup_and_check_errors( for (layer, metadata) in index_part.layer_metadata { if metadata.file_size == 0 { result.errors.push(format!( - "index_part.json contains a layer {} that has 0 size in its layer metadata", layer.file_name(), + "index_part.json contains a layer {} that has 0 size in its layer metadata", layer, )) } @@ -121,7 +121,7 @@ pub(crate) fn branch_cleanup_and_check_errors( // layer we think is missing. result.errors.push(format!( "index_part.json contains a layer {}{} (shard {}) that is not present in remote storage", - layer.file_name(), + layer, metadata.generation.get_suffix(), metadata.shard )) @@ -170,8 +170,7 @@ pub(crate) struct LayerRef { /// the tenant to query whether an object exists. #[derive(Default)] pub(crate) struct TenantObjectListing { - shard_timelines: - HashMap<(ShardIndex, TimelineId), HashMap<(LayerFileName, Generation), LayerRef>>, + shard_timelines: HashMap<(ShardIndex, TimelineId), HashMap<(LayerName, Generation), LayerRef>>, } impl TenantObjectListing { @@ -180,7 +179,7 @@ impl TenantObjectListing { pub(crate) fn push( &mut self, ttid: TenantShardTimelineId, - layers: HashSet<(LayerFileName, Generation)>, + layers: HashSet<(LayerName, Generation)>, ) { let shard_index = ShardIndex::new( ttid.tenant_shard_id.shard_number, @@ -208,7 +207,7 @@ impl TenantObjectListing { pub(crate) fn check_ref( &mut self, timeline_id: TimelineId, - layer_file: &LayerFileName, + layer_file: &LayerName, metadata: &IndexLayerMetadata, ) -> bool { let Some(shard_tl) = self.shard_timelines.get_mut(&(metadata.shard, timeline_id)) else { @@ -224,7 +223,7 @@ impl TenantObjectListing { true } - pub(crate) fn get_orphans(&self) -> Vec<(ShardIndex, TimelineId, LayerFileName, Generation)> { + pub(crate) fn get_orphans(&self) -> Vec<(ShardIndex, TimelineId, LayerName, Generation)> { let mut result = Vec::new(); for ((shard_index, timeline_id), layers) in &self.shard_timelines { for ((layer_file, generation), layer_ref) in layers { @@ -249,23 +248,23 @@ pub(crate) enum BlobDataParseResult { Parsed { index_part: IndexPart, index_part_generation: Generation, - s3_layers: HashSet<(LayerFileName, Generation)>, + s3_layers: HashSet<(LayerName, Generation)>, }, /// The remains of a deleted Timeline (i.e. an initdb archive only) Relic, Incorrect(Vec), } -fn parse_layer_object_name(name: &str) -> Result<(LayerFileName, Generation), String> { +fn parse_layer_object_name(name: &str) -> Result<(LayerName, Generation), String> { match name.rsplit_once('-') { // FIXME: this is gross, just use a regex? Some((layer_filename, gen)) if gen.len() == 8 => { - let layer = layer_filename.parse::()?; + let layer = layer_filename.parse::()?; let gen = Generation::parse_suffix(gen).ok_or("Malformed generation suffix".to_string())?; Ok((layer, gen)) } - _ => Ok((name.parse::()?, Generation::none())), + _ => Ok((name.parse::()?, Generation::none())), } } diff --git a/s3_scrubber/src/tenant_snapshot.rs b/s3_scrubber/src/tenant_snapshot.rs index 4eccad381b..2c93a8490a 100644 --- a/s3_scrubber/src/tenant_snapshot.rs +++ b/s3_scrubber/src/tenant_snapshot.rs @@ -12,7 +12,7 @@ use aws_sdk_s3::Client; use camino::Utf8PathBuf; use futures::{StreamExt, TryStreamExt}; use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata; -use pageserver::tenant::storage_layer::LayerFileName; +use pageserver::tenant::storage_layer::LayerName; use pageserver::tenant::IndexPart; use pageserver_api::shard::TenantShardId; use utils::generation::Generation; @@ -48,16 +48,16 @@ impl SnapshotDownloader { async fn download_layer( &self, ttid: TenantShardTimelineId, - layer_name: LayerFileName, + layer_name: LayerName, layer_metadata: IndexLayerMetadata, - ) -> anyhow::Result<(LayerFileName, IndexLayerMetadata)> { + ) -> anyhow::Result<(LayerName, IndexLayerMetadata)> { // Note this is local as in a local copy of S3 data, not local as in the pageserver's local format. They use // different layer names (remote-style has the generation suffix) let local_path = self.output_path.join(format!( "{}/timelines/{}/{}{}", ttid.tenant_shard_id, ttid.timeline_id, - layer_name.file_name(), + layer_name, layer_metadata.generation.get_suffix() )); @@ -76,7 +76,7 @@ impl SnapshotDownloader { let remote_layer_path = format!( "{}{}{}", timeline_root.prefix_in_bucket, - layer_name.file_name(), + layer_name, layer_metadata.generation.get_suffix() ); @@ -110,7 +110,7 @@ impl SnapshotDownloader { async fn download_layers( &self, ttid: TenantShardTimelineId, - layers: Vec<(LayerFileName, IndexLayerMetadata)>, + layers: Vec<(LayerName, IndexLayerMetadata)>, ) -> anyhow::Result<()> { let layer_count = layers.len(); tracing::info!("Downloading {} layers for timeline {ttid}...", layer_count); @@ -138,7 +138,7 @@ impl SnapshotDownloader { tracing::info!( "[{download_count}/{layer_count}] OK: {} bytes {ttid} {}", layer_metadata.file_size, - layer_name.file_name() + layer_name ); } Err(e) => { @@ -163,7 +163,7 @@ impl SnapshotDownloader { index_part_generation: Generation, ancestor_layers: &mut HashMap< TenantShardTimelineId, - HashMap, + HashMap, >, ) -> anyhow::Result<()> { let index_bytes = serde_json::to_string(&index_part).unwrap(); @@ -234,7 +234,7 @@ impl SnapshotDownloader { // happen if this tenant has been split at some point) let mut ancestor_layers: HashMap< TenantShardTimelineId, - HashMap, + HashMap, > = Default::default(); for shard in shards.into_iter().filter(|s| s.shard_count == shard_count) { diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index f618c508bc..390b94c2ea 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -54,7 +54,7 @@ from fixtures.pageserver.allowed_errors import ( DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS, ) from fixtures.pageserver.http import PageserverHttpClient -from fixtures.pageserver.types import IndexPartDump, LayerFileName, parse_layer_file_name +from fixtures.pageserver.types import IndexPartDump, LayerName, parse_layer_file_name from fixtures.pageserver.utils import ( wait_for_last_record_lsn, wait_for_upload, @@ -2664,7 +2664,7 @@ class NeonPageserver(PgProtocol, LogUtils): ) def layer_exists( - self, tenant_id: TenantId, timeline_id: TimelineId, layer_name: LayerFileName + self, tenant_id: TenantId, timeline_id: TimelineId, layer_name: LayerName ) -> bool: layers = self.list_layers(tenant_id, timeline_id) return layer_name in [parse_layer_file_name(p.name) for p in layers] diff --git a/test_runner/fixtures/pageserver/types.py b/test_runner/fixtures/pageserver/types.py index fd018cb778..1fb618f445 100644 --- a/test_runner/fixtures/pageserver/types.py +++ b/test_runner/fixtures/pageserver/types.py @@ -12,7 +12,7 @@ class IndexLayerMetadata: @dataclass(frozen=True) -class ImageLayerFileName: +class ImageLayerName: lsn: Lsn key_start: Key key_end: Key @@ -26,7 +26,7 @@ class ImageLayerFileName: @dataclass(frozen=True) -class DeltaLayerFileName: +class DeltaLayerName: lsn_start: Lsn lsn_end: Lsn key_start: Key @@ -41,14 +41,16 @@ class DeltaLayerFileName: return ret -LayerFileName = Union[ImageLayerFileName, DeltaLayerFileName] +LayerName = Union[ImageLayerName, DeltaLayerName] class InvalidFileName(Exception): pass -IMAGE_LAYER_FILE_NAME = re.compile("^([A-F0-9]{36})-([A-F0-9]{36})__([A-F0-9]{16})(-[a-f0-9]{8})?$") +IMAGE_LAYER_FILE_NAME = re.compile( + "^([A-F0-9]{36})-([A-F0-9]{36})__([A-F0-9]{16})(-v1-[a-f0-9]{8})?$" +) def parse_image_layer(f_name: str) -> Tuple[int, int, int]: @@ -62,7 +64,7 @@ def parse_image_layer(f_name: str) -> Tuple[int, int, int]: DELTA_LAYER_FILE_NAME = re.compile( - "^([A-F0-9]{36})-([A-F0-9]{36})__([A-F0-9]{16})-([A-F0-9]{16})(-[a-f0-9]{8})?$" + "^([A-F0-9]{36})-([A-F0-9]{36})__([A-F0-9]{16})-([A-F0-9]{16})(-v1-[a-f0-9]{8})?$" ) @@ -80,16 +82,16 @@ def parse_delta_layer(f_name: str) -> Tuple[int, int, int, int]: ) -def parse_layer_file_name(file_name: str) -> LayerFileName: +def parse_layer_file_name(file_name: str) -> LayerName: try: key_start, key_end, lsn = parse_image_layer(file_name) - return ImageLayerFileName(lsn=Lsn(lsn), key_start=Key(key_start), key_end=Key(key_end)) + return ImageLayerName(lsn=Lsn(lsn), key_start=Key(key_start), key_end=Key(key_end)) except InvalidFileName: pass try: key_start, key_end, lsn_start, lsn_end = parse_delta_layer(file_name) - return DeltaLayerFileName( + return DeltaLayerName( lsn_start=Lsn(lsn_start), lsn_end=Lsn(lsn_end), key_start=Key(key_start), @@ -101,18 +103,15 @@ def parse_layer_file_name(file_name: str) -> LayerFileName: raise InvalidFileName("neither image nor delta layer") -def is_future_layer(layer_file_name: LayerFileName, disk_consistent_lsn: Lsn): +def is_future_layer(layer_file_name: LayerName, disk_consistent_lsn: Lsn): """ Determines if this layer file is considered to be in future meaning we will discard these layers during timeline initialization from the given disk_consistent_lsn. """ - if ( - isinstance(layer_file_name, ImageLayerFileName) - and layer_file_name.lsn > disk_consistent_lsn - ): + if isinstance(layer_file_name, ImageLayerName) and layer_file_name.lsn > disk_consistent_lsn: return True elif ( - isinstance(layer_file_name, DeltaLayerFileName) + isinstance(layer_file_name, DeltaLayerName) and layer_file_name.lsn_end > disk_consistent_lsn + 1 ): return True @@ -122,7 +121,7 @@ def is_future_layer(layer_file_name: LayerFileName, disk_consistent_lsn: Lsn): @dataclass class IndexPartDump: - layer_metadata: Dict[LayerFileName, IndexLayerMetadata] + layer_metadata: Dict[LayerName, IndexLayerMetadata] disk_consistent_lsn: Lsn @classmethod diff --git a/test_runner/regress/test_layers_from_future.py b/test_runner/regress/test_layers_from_future.py index f311a8bf2c..cc34fd83e9 100644 --- a/test_runner/regress/test_layers_from_future.py +++ b/test_runner/regress/test_layers_from_future.py @@ -3,8 +3,8 @@ import time from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder, flush_ep_to_pageserver from fixtures.pageserver.types import ( - DeltaLayerFileName, - ImageLayerFileName, + DeltaLayerName, + ImageLayerName, is_future_layer, ) from fixtures.pageserver.utils import ( @@ -81,7 +81,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder): current = get_index_part() assert len(set(current.layer_metadata.keys())) == 1 layer_file_name = list(current.layer_metadata.keys())[0] - assert isinstance(layer_file_name, DeltaLayerFileName) + assert isinstance(layer_file_name, DeltaLayerName) assert layer_file_name.is_l0(), f"{layer_file_name}" log.info("force image layer creation in the future by writing some data into in-memory layer") @@ -146,7 +146,7 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder): future_layers = get_future_layers() assert len(future_layers) == 1 future_layer = future_layers[0] - assert isinstance(future_layer, ImageLayerFileName) + assert isinstance(future_layer, ImageLayerName) assert future_layer.lsn == last_record_lsn log.info( f"got layer from the future: lsn={future_layer.lsn} disk_consistent_lsn={ip.disk_consistent_lsn} last_record_lsn={last_record_lsn}" diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py index adcf7de8d4..58eaf404d3 100644 --- a/test_runner/regress/test_pageserver_generations.py +++ b/test_runner/regress/test_pageserver_generations.py @@ -729,8 +729,8 @@ def test_upgrade_generationless_local_file_paths( for filename in os.listdir(timeline_dir): path = os.path.join(timeline_dir, filename) log.info(f"Found file {path}") - if path.endswith("-00000001"): - new_path = path[:-9] + if path.endswith("-v1-00000001"): + new_path = path[:-12] os.rename(path, new_path) log.info(f"Renamed {path} -> {new_path}") files_renamed += 1