diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs index 6248424cee..400930245b 100644 --- a/pageserver/src/disk_usage_eviction_task.rs +++ b/pageserver/src/disk_usage_eviction_task.rs @@ -540,7 +540,12 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl( js.spawn(async move { layer .secondary_tenant - .evict_layer(tenant_manager.get_conf(), layer.timeline_id, layer.name) + .evict_layer( + tenant_manager.get_conf(), + layer.timeline_id, + layer.name, + layer.metadata, + ) .await; Ok(file_size) }); diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index ea4c7f1e3b..83b7b8a45e 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -63,6 +63,7 @@ use crate::tenant::remote_timeline_client::list_remote_timelines; use crate::tenant::secondary::SecondaryController; use crate::tenant::size::ModelInputs; use crate::tenant::storage_layer::LayerAccessStatsReset; +use crate::tenant::storage_layer::LayerFileName; use crate::tenant::timeline::CompactFlags; use crate::tenant::timeline::Timeline; use crate::tenant::SpawnMode; @@ -1228,13 +1229,15 @@ async fn layer_download_handler( let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; let layer_file_name = get_request_param(&request, "layer_file_name")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; + let layer_name = LayerFileName::from_str(layer_file_name) + .map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?; let state = get_state(&request); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; let downloaded = timeline - .download_layer(layer_file_name) + .download_layer(&layer_name) .await .map_err(ApiError::InternalServerError)?; @@ -1258,11 +1261,14 @@ async fn evict_timeline_layer_handler( let layer_file_name = get_request_param(&request, "layer_file_name")?; let state = get_state(&request); + let layer_name = LayerFileName::from_str(layer_file_name) + .map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?; + let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; let evicted = timeline - .evict_layer(layer_file_name) + .evict_layer(&layer_name) .await .map_err(ApiError::InternalServerError)?; diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 40712e4895..256f2f334c 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -2929,6 +2929,8 @@ pub fn preinitialize_metrics() { &WALRECEIVER_CANDIDATES_REMOVED, &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES, &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES, + &REMOTE_ONDEMAND_DOWNLOADED_LAYERS, + &REMOTE_ONDEMAND_DOWNLOADED_BYTES, ] .into_iter() .for_each(|c| { diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 255449c049..356a0dc51c 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -1140,15 +1140,21 @@ impl RemoteTimelineClient { uploaded: &ResidentLayer, cancel: &CancellationToken, ) -> anyhow::Result<()> { + let remote_path = remote_layer_path( + &self.tenant_shard_id.tenant_id, + &self.timeline_id, + self.tenant_shard_id.to_index(), + &uploaded.layer_desc().filename(), + uploaded.metadata().generation, + ); + backoff::retry( || async { - let m = uploaded.metadata(); upload::upload_timeline_layer( - self.conf, &self.storage_impl, uploaded.local_path(), - &uploaded.metadata(), - m.generation, + &remote_path, + uploaded.metadata().file_size(), cancel, ) .await @@ -1173,15 +1179,30 @@ impl RemoteTimelineClient { adopted_as: &Layer, cancel: &CancellationToken, ) -> anyhow::Result<()> { + let source_remote_path = remote_layer_path( + &self.tenant_shard_id.tenant_id, + &adopted + .get_timeline_id() + .expect("Source timeline should be alive"), + self.tenant_shard_id.to_index(), + &adopted.layer_desc().filename(), + adopted.metadata().generation, + ); + + let target_remote_path = remote_layer_path( + &self.tenant_shard_id.tenant_id, + &self.timeline_id, + self.tenant_shard_id.to_index(), + &adopted_as.layer_desc().filename(), + adopted_as.metadata().generation, + ); + backoff::retry( || async { upload::copy_timeline_layer( - self.conf, &self.storage_impl, - adopted.local_path(), - &adopted.metadata(), - adopted_as.local_path(), - &adopted_as.metadata(), + &source_remote_path, + &target_remote_path, cancel, ) .await @@ -1496,13 +1517,25 @@ impl RemoteTimelineClient { let upload_result: anyhow::Result<()> = match &task.op { UploadOp::UploadLayer(ref layer, ref layer_metadata) => { - let path = layer.local_path(); + let local_path = layer.local_path(); + + // We should only be uploading layers created by this `Tenant`'s lifetime, so + // the metadata in the upload should always match our current generation. + assert_eq!(layer_metadata.generation, self.generation); + + let remote_path = remote_layer_path( + &self.tenant_shard_id.tenant_id, + &self.timeline_id, + layer_metadata.shard, + &layer.layer_desc().filename(), + layer_metadata.generation, + ); + upload::upload_timeline_layer( - self.conf, &self.storage_impl, - path, - layer_metadata, - self.generation, + local_path, + &remote_path, + layer_metadata.file_size(), &self.cancel, ) .measure_remote_op( @@ -1931,29 +1964,6 @@ pub fn parse_remote_index_path(path: RemotePath) -> Option { } } -/// Files on the remote storage are stored with paths, relative to the workdir. -/// That path includes in itself both tenant and timeline ids, allowing to have a unique remote storage path. -/// -/// Errors if the path provided does not start from pageserver's workdir. -pub(crate) fn remote_path( - conf: &PageServerConf, - local_path: &Utf8Path, - generation: Generation, -) -> anyhow::Result { - let stripped = local_path - .strip_prefix(&conf.workdir) - .context("Failed to strip workdir prefix")?; - - let suffixed = format!("{0}{1}", stripped, generation.get_suffix()); - - RemotePath::new(Utf8Path::new(&suffixed)).with_context(|| { - format!( - "to resolve remote part of path {:?} for base {:?}", - local_path, conf.workdir - ) - }) -} - #[cfg(test)] mod tests { use super::*; @@ -1961,6 +1971,7 @@ mod tests { context::RequestContext, tenant::{ harness::{TenantHarness, TIMELINE_ID}, + storage_layer::layer::local_layer_path, Tenant, Timeline, }, DEFAULT_PG_VERSION, @@ -2143,11 +2154,20 @@ mod tests { ] .into_iter() .map(|(name, contents): (LayerFileName, Vec)| { - std::fs::write(timeline_path.join(name.file_name()), &contents).unwrap(); + + let local_path = local_layer_path( + harness.conf, + &timeline.tenant_shard_id, + &timeline.timeline_id, + &name, + &generation, + ); + std::fs::write(&local_path, &contents).unwrap(); Layer::for_resident( harness.conf, &timeline, + local_path, name, LayerFileMetadata::new(contents.len() as u64, generation, shard), ) @@ -2284,19 +2304,22 @@ mod tests { .. } = TestSetup::new("metrics").await.unwrap(); let client = timeline.remote_client.as_ref().unwrap(); - let timeline_path = harness.timeline_path(&TIMELINE_ID); let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(); + let local_path = local_layer_path( + harness.conf, + &timeline.tenant_shard_id, + &timeline.timeline_id, + &layer_file_name_1, + &harness.generation, + ); let content_1 = dummy_contents("foo"); - std::fs::write( - timeline_path.join(layer_file_name_1.file_name()), - &content_1, - ) - .unwrap(); + std::fs::write(&local_path, &content_1).unwrap(); let layer_file_1 = Layer::for_resident( harness.conf, &timeline, + local_path, layer_file_name_1.clone(), LayerFileMetadata::new(content_1.len() as u64, harness.generation, harness.shard), ); diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index b038f264f5..c86b22d481 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -21,6 +21,7 @@ use crate::config::PageServerConf; use crate::context::RequestContext; use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path}; +use crate::tenant::storage_layer::layer::local_layer_path; use crate::tenant::storage_layer::LayerFileName; use crate::tenant::Generation; use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile}; @@ -55,7 +56,13 @@ pub async fn download_layer_file<'a>( debug_assert_current_span_has_tenant_and_timeline_id(); let timeline_path = conf.timeline_path(&tenant_shard_id, &timeline_id); - let local_path = timeline_path.join(layer_file_name.file_name()); + let local_path = local_layer_path( + conf, + &tenant_shard_id, + &timeline_id, + layer_file_name, + &layer_metadata.generation, + ); let remote_path = remote_layer_path( &tenant_shard_id.tenant_id, diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index a988369b6a..caa843316f 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -12,18 +12,13 @@ use tokio_util::sync::CancellationToken; use utils::backoff; use super::Generation; -use crate::{ - config::PageServerConf, - tenant::remote_timeline_client::{ - index::IndexPart, remote_index_path, remote_initdb_archive_path, - remote_initdb_preserved_archive_path, remote_path, - }, +use crate::tenant::remote_timeline_client::{ + index::IndexPart, remote_index_path, remote_initdb_archive_path, + remote_initdb_preserved_archive_path, }; -use remote_storage::{GenericRemoteStorage, TimeTravelError}; +use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError}; use utils::id::{TenantId, TimelineId}; -use super::index::LayerFileMetadata; - use tracing::info; /// Serializes and uploads the given index part data to the remote storage. @@ -65,11 +60,10 @@ pub(crate) async fn upload_index_part<'a>( /// /// On an error, bumps the retries count and reschedules the entire task. pub(super) async fn upload_timeline_layer<'a>( - conf: &'static PageServerConf, storage: &'a GenericRemoteStorage, - source_path: &'a Utf8Path, - known_metadata: &'a LayerFileMetadata, - generation: Generation, + local_path: &'a Utf8Path, + remote_path: &'a RemotePath, + metadata_size: u64, cancel: &CancellationToken, ) -> anyhow::Result<()> { fail_point!("before-upload-layer", |_| { @@ -78,8 +72,7 @@ pub(super) async fn upload_timeline_layer<'a>( pausable_failpoint!("before-upload-layer-pausable"); - let storage_path = remote_path(conf, source_path, generation)?; - let source_file_res = fs::File::open(&source_path).await; + let source_file_res = fs::File::open(&local_path).await; let source_file = match source_file_res { Ok(source_file) => source_file, Err(e) if e.kind() == ErrorKind::NotFound => { @@ -90,43 +83,37 @@ pub(super) async fn upload_timeline_layer<'a>( // it has been written to disk yet. // // This is tested against `test_compaction_delete_before_upload` - info!(path = %source_path, "File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more."); + info!(path = %local_path, "File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more."); return Ok(()); } - Err(e) => { - Err(e).with_context(|| format!("open a source file for layer {source_path:?}"))? - } + Err(e) => Err(e).with_context(|| format!("open a source file for layer {local_path:?}"))?, }; let fs_size = source_file .metadata() .await - .with_context(|| format!("get the source file metadata for layer {source_path:?}"))? + .with_context(|| format!("get the source file metadata for layer {local_path:?}"))? .len(); - let metadata_size = known_metadata.file_size(); if metadata_size != fs_size { - bail!("File {source_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}"); + bail!("File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}"); } let fs_size = usize::try_from(fs_size) - .with_context(|| format!("convert {source_path:?} size {fs_size} usize"))?; + .with_context(|| format!("convert {local_path:?} size {fs_size} usize"))?; let reader = tokio_util::io::ReaderStream::with_capacity(source_file, super::BUFFER_SIZE); storage - .upload(reader, fs_size, &storage_path, None, cancel) + .upload(reader, fs_size, remote_path, None, cancel) .await - .with_context(|| format!("upload layer from local path '{source_path}'")) + .with_context(|| format!("upload layer from local path '{local_path}'")) } pub(super) async fn copy_timeline_layer( - conf: &'static PageServerConf, storage: &GenericRemoteStorage, - source_path: &Utf8Path, - source_metadata: &LayerFileMetadata, - target_path: &Utf8Path, - target_metadata: &LayerFileMetadata, + source_path: &RemotePath, + target_path: &RemotePath, cancel: &CancellationToken, ) -> anyhow::Result<()> { fail_point!("before-copy-layer", |_| { @@ -135,11 +122,8 @@ pub(super) async fn copy_timeline_layer( pausable_failpoint!("before-copy-layer-pausable"); - let source_path = remote_path(conf, source_path, source_metadata.generation)?; - let target_path = remote_path(conf, target_path, target_metadata.generation)?; - storage - .copy_object(&source_path, &target_path, cancel) + .copy_object(source_path, target_path, cancel) .await .with_context(|| format!("copy layer {source_path} to {target_path}")) } diff --git a/pageserver/src/tenant/secondary.rs b/pageserver/src/tenant/secondary.rs index 5c46df268a..0bb25f0ace 100644 --- a/pageserver/src/tenant/secondary.rs +++ b/pageserver/src/tenant/secondary.rs @@ -21,8 +21,9 @@ use self::{ use super::{ config::{SecondaryLocationConfig, TenantConfOpt}, mgr::TenantManager, + remote_timeline_client::LayerFileMetadata, span::debug_assert_current_span_has_tenant_id, - storage_layer::LayerFileName, + storage_layer::{layer::local_layer_path, LayerFileName}, }; use pageserver_api::{ @@ -182,6 +183,7 @@ impl SecondaryTenant { conf: &PageServerConf, timeline_id: TimelineId, name: LayerFileName, + metadata: LayerFileMetadata, ) { debug_assert_current_span_has_tenant_id(); @@ -195,9 +197,13 @@ impl SecondaryTenant { let now = SystemTime::now(); - let path = conf - .timeline_path(&self.tenant_shard_id, &timeline_id) - .join(name.file_name()); + let local_path = local_layer_path( + conf, + &self.tenant_shard_id, + &timeline_id, + &name, + &metadata.generation, + ); let this = self.clone(); @@ -208,7 +214,7 @@ impl SecondaryTenant { // it, the secondary downloader could have seen an updated heatmap that // resulted in a layer being deleted. // Other local I/O errors are process-fatal: these should never happen. - let deleted = std::fs::remove_file(path); + let deleted = std::fs::remove_file(local_path); let not_found = deleted .as_ref() diff --git a/pageserver/src/tenant/secondary/downloader.rs b/pageserver/src/tenant/secondary/downloader.rs index fb8907b5a8..092630e74d 100644 --- a/pageserver/src/tenant/secondary/downloader.rs +++ b/pageserver/src/tenant/secondary/downloader.rs @@ -22,7 +22,7 @@ use crate::{ FAILED_REMOTE_OP_RETRIES, }, span::debug_assert_current_span_has_tenant_id, - storage_layer::LayerFileName, + storage_layer::{layer::local_layer_path, LayerFileName}, tasks::{warn_when_period_overrun, BackgroundLoopKind}, }, virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile}, @@ -621,12 +621,12 @@ impl<'a> TenantDownloader<'a> { let layers_in_heatmap = heatmap_timeline .layers .iter() - .map(|l| &l.name) + .map(|l| (&l.name, l.metadata.generation)) .collect::>(); let layers_on_disk = timeline_state .on_disk_layers .iter() - .map(|l| l.0) + .map(|l| (l.0, l.1.metadata.generation)) .collect::>(); let mut layer_count = layers_on_disk.len(); @@ -637,16 +637,24 @@ impl<'a> TenantDownloader<'a> { .sum(); // Remove on-disk layers that are no longer present in heatmap - for layer in layers_on_disk.difference(&layers_in_heatmap) { + for (layer_file_name, generation) in layers_on_disk.difference(&layers_in_heatmap) { layer_count -= 1; layer_byte_count -= timeline_state .on_disk_layers - .get(layer) + .get(layer_file_name) .unwrap() .metadata .file_size(); - delete_layers.push((*timeline_id, (*layer).clone())); + let local_path = local_layer_path( + self.conf, + self.secondary_state.get_tenant_shard_id(), + timeline_id, + layer_file_name, + generation, + ); + + delete_layers.push((*timeline_id, (*layer_file_name).clone(), local_path)); } progress.bytes_downloaded += layer_byte_count; @@ -661,11 +669,7 @@ impl<'a> TenantDownloader<'a> { } // Execute accumulated deletions - for (timeline_id, layer_name) in delete_layers { - let timeline_path = self - .conf - .timeline_path(self.secondary_state.get_tenant_shard_id(), &timeline_id); - let local_path = timeline_path.join(layer_name.to_string()); + for (timeline_id, layer_name, local_path) in delete_layers { tracing::info!(timeline_id=%timeline_id, "Removing secondary local layer {layer_name} because it's absent in heatmap",); tokio::fs::remove_file(&local_path) @@ -754,9 +758,6 @@ impl<'a> TenantDownloader<'a> { ) -> Result<(), UpdateError> { debug_assert_current_span_has_tenant_and_timeline_id(); let tenant_shard_id = self.secondary_state.get_tenant_shard_id(); - let timeline_path = self - .conf - .timeline_path(tenant_shard_id, &timeline.timeline_id); // Accumulate updates to the state let mut touched = Vec::new(); @@ -806,10 +807,14 @@ impl<'a> TenantDownloader<'a> { if cfg!(debug_assertions) { // Debug for https://github.com/neondatabase/neon/issues/6966: check that the files we think // are already present on disk are really there. - let local_path = self - .conf - .timeline_path(tenant_shard_id, &timeline.timeline_id) - .join(layer.name.file_name()); + let local_path = local_layer_path( + self.conf, + tenant_shard_id, + &timeline.timeline_id, + &layer.name, + &layer.metadata.generation, + ); + match tokio::fs::metadata(&local_path).await { Ok(meta) => { tracing::debug!( @@ -903,7 +908,13 @@ impl<'a> TenantDownloader<'a> { }; if downloaded_bytes != layer.metadata.file_size { - let local_path = timeline_path.join(layer.name.to_string()); + let local_path = local_layer_path( + self.conf, + tenant_shard_id, + &timeline.timeline_id, + &layer.name, + &layer.metadata.generation, + ); tracing::warn!( "Downloaded layer {} with unexpected size {} != {}. Removing download.", diff --git a/pageserver/src/tenant/storage_layer/filename.rs b/pageserver/src/tenant/storage_layer/filename.rs index a98be0842b..fff66a9d07 100644 --- a/pageserver/src/tenant/storage_layer/filename.rs +++ b/pageserver/src/tenant/storage_layer/filename.rs @@ -2,11 +2,13 @@ //! Helper functions for dealing with filenames of the image and delta layer files. //! use crate::repository::Key; +use std::borrow::Cow; use std::cmp::Ordering; use std::fmt; use std::ops::Range; use std::str::FromStr; +use regex::Regex; use utils::lsn::Lsn; use super::PersistentLayerDesc; @@ -74,10 +76,19 @@ impl DeltaFileName { let key_end_str = key_parts.next()?; let lsn_start_str = lsn_parts.next()?; let lsn_end_str = lsn_parts.next()?; + if parts.next().is_some() || key_parts.next().is_some() || key_parts.next().is_some() { return None; } + if key_start_str.len() != 36 + || key_end_str.len() != 36 + || lsn_start_str.len() != 16 + || lsn_end_str.len() != 16 + { + return None; + } + let key_start = Key::from_hex(key_start_str).ok()?; let key_end = Key::from_hex(key_end_str).ok()?; @@ -182,6 +193,10 @@ impl ImageFileName { return None; } + if key_start_str.len() != 36 || key_end_str.len() != 36 || lsn_str.len() != 16 { + return None; + } + let key_start = Key::from_hex(key_start_str).ok()?; let key_end = Key::from_hex(key_end_str).ok()?; @@ -259,9 +274,22 @@ impl From for LayerFileName { impl FromStr for LayerFileName { type Err = String; + /// Conversion from either a physical layer filename, or the string-ization of + /// Self. When loading a physical layer filename, we drop any extra information + /// not needed to build Self. fn from_str(value: &str) -> Result { - let delta = DeltaFileName::parse_str(value); - let image = ImageFileName::parse_str(value); + let gen_suffix_regex = Regex::new("^(?.+)-(?[0-9a-f]{8})$").unwrap(); + let file_name: Cow = match gen_suffix_regex.captures(value) { + Some(captures) => captures + .name("base") + .expect("Non-optional group") + .as_str() + .into(), + None => value.into(), + }; + + let delta = DeltaFileName::parse_str(&file_name); + let image = ImageFileName::parse_str(&file_name); let ok = match (delta, image) { (None, None) => { return Err(format!( @@ -315,3 +343,42 @@ impl<'de> serde::de::Visitor<'de> for LayerFileNameVisitor { v.parse().map_err(|e| E::custom(e)) } } + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn image_layer_parse() -> anyhow::Result<()> { + let expected = LayerFileName::Image(ImageFileName { + key_range: Key::from_i128(0) + ..Key::from_hex("000000067F00000001000004DF0000000006").unwrap(), + lsn: Lsn::from_hex("00000000014FED58").unwrap(), + }); + let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-00000001").map_err(|s| anyhow::anyhow!(s))?; + assert_eq!(parsed, expected,); + + // Omitting generation suffix is valid + let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58").map_err(|s| anyhow::anyhow!(s))?; + assert_eq!(parsed, expected,); + + Ok(()) + } + + #[test] + fn delta_layer_parse() -> anyhow::Result<()> { + let expected = LayerFileName::Delta(DeltaFileName { + key_range: Key::from_i128(0) + ..Key::from_hex("000000067F00000001000004DF0000000006").unwrap(), + lsn_range: Lsn::from_hex("00000000014FED58").unwrap() + ..Lsn::from_hex("000000000154C481").unwrap(), + }); + let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-00000001").map_err(|s| anyhow::anyhow!(s))?; + assert_eq!(parsed, expected); + + // Omitting generation suffix is valid + let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481").map_err(|s| anyhow::anyhow!(s))?; + assert_eq!(parsed, expected); + + Ok(()) + } +} diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index 27faa507ca..b5e69db7f4 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -4,12 +4,13 @@ use pageserver_api::keyspace::KeySpace; use pageserver_api::models::{ HistoricLayerInfo, LayerAccessKind, LayerResidenceEventReason, LayerResidenceStatus, }; -use pageserver_api::shard::ShardIndex; +use pageserver_api::shard::{ShardIndex, TenantShardId}; use std::ops::Range; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::{Arc, Weak}; use std::time::{Duration, SystemTime}; use tracing::Instrument; +use utils::id::TimelineId; use utils::lsn::Lsn; use utils::sync::heavier_once_cell; @@ -123,6 +124,25 @@ impl PartialEq for Layer { } } +pub(crate) fn local_layer_path( + conf: &PageServerConf, + tenant_shard_id: &TenantShardId, + timeline_id: &TimelineId, + layer_file_name: &LayerFileName, + _generation: &Generation, +) -> Utf8PathBuf { + let timeline_path = conf.timeline_path(tenant_shard_id, timeline_id); + + timeline_path.join(layer_file_name.file_name()) + + // TOOD: include generation in the name in now+1 releases. + // timeline_path.join(format!( + // "{}{}", + // layer_file_name.file_name(), + // generation.get_suffix() + // )) +} + impl Layer { /// Creates a layer value for a file we know to not be resident. pub(crate) fn for_evicted( @@ -131,6 +151,14 @@ impl Layer { file_name: LayerFileName, metadata: LayerFileMetadata, ) -> Self { + let local_path = local_layer_path( + conf, + &timeline.tenant_shard_id, + &timeline.timeline_id, + &file_name, + &metadata.generation, + ); + let desc = PersistentLayerDesc::from_filename( timeline.tenant_shard_id, timeline.timeline_id, @@ -143,6 +171,7 @@ impl Layer { let owner = Layer(Arc::new(LayerInner::new( conf, timeline, + local_path, access_stats, desc, None, @@ -159,6 +188,7 @@ impl Layer { pub(crate) fn for_resident( conf: &'static PageServerConf, timeline: &Arc, + local_path: Utf8PathBuf, file_name: LayerFileName, metadata: LayerFileMetadata, ) -> ResidentLayer { @@ -184,6 +214,7 @@ impl Layer { LayerInner::new( conf, timeline, + local_path, access_stats, desc, Some(inner), @@ -225,9 +256,19 @@ impl Layer { LayerResidenceStatus::Resident, LayerResidenceEventReason::LayerCreate, ); + + let local_path = local_layer_path( + conf, + &timeline.tenant_shard_id, + &timeline.timeline_id, + &desc.filename(), + &timeline.generation, + ); + LayerInner::new( conf, timeline, + local_path, access_stats, desc, Some(inner), @@ -410,6 +451,13 @@ impl Layer { self.0.metadata() } + pub(crate) fn get_timeline_id(&self) -> Option { + self.0 + .timeline + .upgrade() + .map(|timeline| timeline.timeline_id) + } + /// Traditional debug dumping facility #[allow(unused)] pub(crate) async fn dump(&self, verbose: bool, ctx: &RequestContext) -> anyhow::Result<()> { @@ -709,19 +757,17 @@ impl Drop for LayerInner { } impl LayerInner { + #[allow(clippy::too_many_arguments)] fn new( conf: &'static PageServerConf, timeline: &Arc, + local_path: Utf8PathBuf, access_stats: LayerAccessStats, desc: PersistentLayerDesc, downloaded: Option>, generation: Generation, shard: ShardIndex, ) -> Self { - let path = conf - .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id) - .join(desc.filename().to_string()); - let (inner, version, init_status) = if let Some(inner) = downloaded { let version = inner.version; let resident = ResidentOrWantedEvicted::Resident(inner); @@ -737,7 +783,7 @@ impl LayerInner { LayerInner { conf, debug_str: { format!("timelines/{}/{}", timeline.timeline_id, desc.filename()).into() }, - path, + path: local_path, desc, timeline: Arc::downgrade(timeline), have_remote_client: timeline.remote_client.is_some(), diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 7213ff8f75..d6d012c70c 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -60,6 +60,7 @@ use std::{ ops::ControlFlow, }; +use crate::tenant::storage_layer::layer::local_layer_path; use crate::tenant::{ layer_map::{LayerMap, SearchResult}, metadata::TimelineMetadata, @@ -1904,7 +1905,7 @@ impl Timeline { #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))] pub(crate) async fn download_layer( &self, - layer_file_name: &str, + layer_file_name: &LayerFileName, ) -> anyhow::Result> { let Some(layer) = self.find_layer(layer_file_name).await else { return Ok(None); @@ -1922,7 +1923,10 @@ impl Timeline { /// Evict just one layer. /// /// Returns `Ok(None)` in the case where the layer could not be found by its `layer_file_name`. - pub(crate) async fn evict_layer(&self, layer_file_name: &str) -> anyhow::Result> { + pub(crate) async fn evict_layer( + &self, + layer_file_name: &LayerFileName, + ) -> anyhow::Result> { let _gate = self .gate .enter() @@ -2413,8 +2417,8 @@ impl Timeline { for discovered in discovered { let (name, kind) = match discovered { - Discovered::Layer(file_name, file_size) => { - discovered_layers.push((file_name, file_size)); + Discovered::Layer(layer_file_name, local_path, file_size) => { + discovered_layers.push((layer_file_name, local_path, file_size)); continue; } Discovered::Metadata => { @@ -2459,7 +2463,7 @@ impl Timeline { let mut needs_cleanup = Vec::new(); let mut total_physical_size = 0; - for (name, decision) in decided { + for (name, local_path, decision) in decided { let decision = match decision { Ok(UseRemote { local, remote }) => { // Remote is authoritative, but we may still choose to retain @@ -2469,26 +2473,23 @@ impl Timeline { // the correct generation. UseLocal(remote) } else { - path.push(name.file_name()); - init::cleanup_local_file_for_remote(&path, &local, &remote)?; - path.pop(); + let local_path = local_path.as_ref().expect("Locally found layer must have path"); + init::cleanup_local_file_for_remote(local_path, &local, &remote)?; UseRemote { local, remote } } } Ok(decision) => decision, Err(DismissedLayer::Future { local }) => { if local.is_some() { - path.push(name.file_name()); - init::cleanup_future_layer(&path, &name, disk_consistent_lsn)?; - path.pop(); + let local_path = local_path.expect("Locally found layer must have path"); + init::cleanup_future_layer(&local_path, &name, disk_consistent_lsn)?; } needs_cleanup.push(name); continue; } Err(DismissedLayer::LocalOnly(local)) => { - path.push(name.file_name()); - init::cleanup_local_only_file(&path, &name, &local)?; - path.pop(); + let local_path = local_path.expect("Locally found layer must have path"); + init::cleanup_local_only_file(&local_path, &name, &local)?; // this file never existed remotely, we will have to do rework continue; } @@ -2504,7 +2505,18 @@ impl Timeline { let layer = match decision { UseLocal(m) => { total_physical_size += m.file_size(); - Layer::for_resident(conf, &this, name, m).drop_eviction_guard() + + let local_path = local_path.unwrap_or_else(|| { + local_layer_path( + conf, + &this.tenant_shard_id, + &this.timeline_id, + &name, + &m.generation, + ) + }); + + Layer::for_resident(conf, &this, local_path, name, m).drop_eviction_guard() } Evicted(remote) | UseRemote { remote, .. } => { Layer::for_evicted(conf, &this, name, remote) @@ -2985,11 +2997,11 @@ impl Timeline { } } - async fn find_layer(&self, layer_file_name: &str) -> Option { + async fn find_layer(&self, layer_name: &LayerFileName) -> Option { let guard = self.layers.read().await; for historic_layer in guard.layer_map().iter_historic_layers() { - let historic_layer_name = historic_layer.filename().file_name(); - if layer_file_name == historic_layer_name { + let historic_layer_name = historic_layer.filename(); + if layer_name == &historic_layer_name { return Some(guard.get_from_desc(&historic_layer)); } } diff --git a/pageserver/src/tenant/timeline/init.rs b/pageserver/src/tenant/timeline/init.rs index 916ebfc6d9..9c33981807 100644 --- a/pageserver/src/tenant/timeline/init.rs +++ b/pageserver/src/tenant/timeline/init.rs @@ -12,7 +12,7 @@ use crate::{ METADATA_FILE_NAME, }; use anyhow::Context; -use camino::Utf8Path; +use camino::{Utf8Path, Utf8PathBuf}; use pageserver_api::shard::ShardIndex; use std::{collections::HashMap, str::FromStr}; use utils::lsn::Lsn; @@ -20,7 +20,7 @@ use utils::lsn::Lsn; /// Identified files in the timeline directory. pub(super) enum Discovered { /// The only one we care about - Layer(LayerFileName, u64), + Layer(LayerFileName, Utf8PathBuf, u64), /// Old ephmeral files from previous launches, should be removed Ephemeral(String), /// Old temporary timeline files, unsure what these really are, should be removed @@ -46,7 +46,7 @@ pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result { let file_size = direntry.metadata()?.len(); - Discovered::Layer(file_name, file_size) + Discovered::Layer(file_name, direntry.path().to_owned(), file_size) } Err(_) => { if file_name == METADATA_FILE_NAME { @@ -104,26 +104,38 @@ pub(super) enum DismissedLayer { /// Merges local discoveries and remote [`IndexPart`] to a collection of decisions. pub(super) fn reconcile( - discovered: Vec<(LayerFileName, u64)>, + discovered: Vec<(LayerFileName, Utf8PathBuf, u64)>, index_part: Option<&IndexPart>, disk_consistent_lsn: Lsn, generation: Generation, shard: ShardIndex, -) -> Vec<(LayerFileName, Result)> { +) -> Vec<( + LayerFileName, + Option, + Result, +)> { use Decision::*; - // name => (local, remote) - type Collected = HashMap, Option)>; + // name => (local_path, local_metadata, remote_metadata) + type Collected = HashMap< + LayerFileName, + ( + Option, + Option, + Option, + ), + >; let mut discovered = discovered .into_iter() - .map(|(name, file_size)| { + .map(|(layer_name, local_path, file_size)| { ( - name, + layer_name, // The generation and shard here will be corrected to match IndexPart in the merge below, unless // it is not in IndexPart, in which case using our current generation makes sense // because it will be uploaded in this generation. ( + Some(local_path), Some(LayerFileMetadata::new(file_size, generation, shard)), None, ), @@ -140,15 +152,15 @@ pub(super) fn reconcile( .map(|(name, metadata)| (name, LayerFileMetadata::from(metadata))) .for_each(|(name, metadata)| { if let Some(existing) = discovered.get_mut(name) { - existing.1 = Some(metadata); + existing.2 = Some(metadata); } else { - discovered.insert(name.to_owned(), (None, Some(metadata))); + discovered.insert(name.to_owned(), (None, None, Some(metadata))); } }); discovered .into_iter() - .map(|(name, (local, remote))| { + .map(|(name, (local_path, local, remote))| { let decision = if name.is_in_future(disk_consistent_lsn) { Err(DismissedLayer::Future { local }) } else { @@ -165,7 +177,7 @@ pub(super) fn reconcile( } }; - (name, decision) + (name, local_path, decision) }) .collect::>() } diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index fc66822eb9..30cec4c726 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -54,7 +54,7 @@ from fixtures.pageserver.allowed_errors import ( DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS, ) from fixtures.pageserver.http import PageserverHttpClient -from fixtures.pageserver.types import IndexPartDump +from fixtures.pageserver.types import IndexPartDump, LayerFileName, parse_layer_file_name from fixtures.pageserver.utils import ( wait_for_last_record_lsn, wait_for_upload, @@ -2652,6 +2652,37 @@ class NeonPageserver(PgProtocol, LogUtils): tenant_id, generation=self.env.storage_controller.attach_hook_issue(tenant_id, self.id) ) + def list_layers(self, tenant_id: TenantId, timeline_id: TimelineId) -> list[Path]: + """ + Inspect local storage on a pageserver to discover which layer files are present. + + :return: list of relative paths to layers, from the timeline root. + """ + timeline_path = self.timeline_dir(tenant_id, timeline_id) + + def relative(p: Path) -> Path: + return p.relative_to(timeline_path) + + return sorted( + list( + map( + relative, + filter( + lambda path: path.name != "metadata" + and "ephemeral" not in path.name + and "temp" not in path.name, + timeline_path.glob("*"), + ), + ) + ) + ) + + def layer_exists( + self, tenant_id: TenantId, timeline_id: TimelineId, layer_name: LayerFileName + ) -> bool: + layers = self.list_layers(tenant_id, timeline_id) + return layer_name in [parse_layer_file_name(p.name) for p in layers] + class PgBin: """A helper class for executing postgres binaries""" diff --git a/test_runner/fixtures/pageserver/types.py b/test_runner/fixtures/pageserver/types.py index 72fa30a2f2..fd018cb778 100644 --- a/test_runner/fixtures/pageserver/types.py +++ b/test_runner/fixtures/pageserver/types.py @@ -1,3 +1,4 @@ +import re from dataclasses import dataclass from typing import Any, Dict, Tuple, Union @@ -47,46 +48,36 @@ class InvalidFileName(Exception): pass +IMAGE_LAYER_FILE_NAME = re.compile("^([A-F0-9]{36})-([A-F0-9]{36})__([A-F0-9]{16})(-[a-f0-9]{8})?$") + + def parse_image_layer(f_name: str) -> Tuple[int, int, int]: """Parse an image layer file name. Return key start, key end, and snapshot lsn""" - parts = f_name.split("__") - if len(parts) != 2: - raise InvalidFileName(f"expecting two parts separated by '__', got: {parts}") - key_parts = parts[0].split("-") - if len(key_parts) != 2: - raise InvalidFileName( - f"expecting two key parts separated by '--' in parts[0], got: {key_parts}" - ) - try: - return int(key_parts[0], 16), int(key_parts[1], 16), int(parts[1], 16) - except ValueError as e: - raise InvalidFileName(f"conversion error: {f_name}") from e + + match = IMAGE_LAYER_FILE_NAME.match(f_name) + if match is None: + raise InvalidFileName(f"'{f_name}' is not an image layer filename") + + return int(match.group(1), 16), int(match.group(2), 16), int(match.group(3), 16) + + +DELTA_LAYER_FILE_NAME = re.compile( + "^([A-F0-9]{36})-([A-F0-9]{36})__([A-F0-9]{16})-([A-F0-9]{16})(-[a-f0-9]{8})?$" +) def parse_delta_layer(f_name: str) -> Tuple[int, int, int, int]: """Parse a delta layer file name. Return key start, key end, lsn start, and lsn end""" - parts = f_name.split("__") - if len(parts) != 2: - raise InvalidFileName(f"expecting two parts separated by '__', got: {parts}") - key_parts = parts[0].split("-") - if len(key_parts) != 2: - raise InvalidFileName( - f"expecting two key parts separated by '--' in parts[0], got: {key_parts}" - ) - lsn_parts = parts[1].split("-") - if len(lsn_parts) != 2: - raise InvalidFileName( - f"expecting two lsn parts separated by '--' in parts[1], got: {lsn_parts}" - ) - try: - return ( - int(key_parts[0], 16), - int(key_parts[1], 16), - int(lsn_parts[0], 16), - int(lsn_parts[1], 16), - ) - except ValueError as e: - raise InvalidFileName(f"conversion error: {f_name}") from e + match = DELTA_LAYER_FILE_NAME.match(f_name) + if match is None: + raise InvalidFileName(f"'{f_name}' is not an delta layer filename") + + return ( + int(match.group(1), 16), + int(match.group(2), 16), + int(match.group(3), 16), + int(match.group(4), 16), + ) def parse_layer_file_name(file_name: str) -> LayerFileName: diff --git a/test_runner/regress/test_duplicate_layers.py b/test_runner/regress/test_duplicate_layers.py index cb4fa43be7..7471338ce5 100644 --- a/test_runner/regress/test_duplicate_layers.py +++ b/test_runner/regress/test_duplicate_layers.py @@ -2,6 +2,7 @@ import time import pytest from fixtures.neon_fixtures import NeonEnvBuilder, PgBin, wait_for_last_flush_lsn +from fixtures.pageserver.types import parse_layer_file_name from fixtures.pageserver.utils import ( wait_for_last_record_lsn, wait_for_upload_queue_empty, @@ -86,14 +87,7 @@ def test_actually_duplicated_l1(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) # path = env.remote_storage.timeline_path(tenant_id, timeline_id) l1_found = None - for path in env.pageserver.timeline_dir(tenant_id, timeline_id).iterdir(): - if path.name == "metadata" or path.name.startswith("ephemeral-"): - continue - - if len(path.suffixes) > 0: - # temp files - continue - + for path in env.pageserver.list_layers(tenant_id, timeline_id): [key_range, lsn_range] = path.name.split("__", maxsplit=1) if "-" not in lsn_range: @@ -108,19 +102,21 @@ def test_actually_duplicated_l1(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) if l1_found is not None: raise RuntimeError(f"found multiple L1: {l1_found.name} and {path.name}") - l1_found = path + l1_found = parse_layer_file_name(path.name) assert l1_found is not None, "failed to find L1 locally" uploaded = env.pageserver_remote_storage.remote_layer_path( - tenant_id, timeline_id, l1_found.name + tenant_id, timeline_id, l1_found.to_str() ) assert not uploaded.exists(), "to-be-overwritten should not yet be uploaded" env.pageserver.start() wait_until_tenant_active(pageserver_http, tenant_id) - assert not l1_found.exists(), "partial compaction result should had been removed during startup" + assert not env.pageserver.layer_exists( + tenant_id, timeline_id, l1_found + ), "partial compaction result should had been removed during startup" # wait for us to catch up again wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, lsn) @@ -130,18 +126,18 @@ def test_actually_duplicated_l1(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) # give time for log flush time.sleep(1) - message = f".*duplicated L1 layer layer={l1_found.name}" + message = f".*duplicated L1 layer layer={l1_found}" found_msg = env.pageserver.log_contains(message) # resident or evicted, it should not be overwritten, however it should had been non-existing at startup assert ( found_msg is None ), "layer should had been removed during startup, did it live on as evicted?" - assert l1_found.exists(), "the L1 reappears" + assert env.pageserver.layer_exists(tenant_id, timeline_id, l1_found), "the L1 reappears" wait_for_upload_queue_empty(pageserver_http, tenant_id, timeline_id) uploaded = env.pageserver_remote_storage.remote_layer_path( - tenant_id, timeline_id, l1_found.name + tenant_id, timeline_id, l1_found.to_str() ) assert uploaded.exists(), "the L1 is uploaded" diff --git a/test_runner/regress/test_layer_eviction.py b/test_runner/regress/test_layer_eviction.py index fefb30bbdd..5c967fd72e 100644 --- a/test_runner/regress/test_layer_eviction.py +++ b/test_runner/regress/test_layer_eviction.py @@ -7,6 +7,7 @@ from fixtures.neon_fixtures import ( flush_ep_to_pageserver, wait_for_last_flush_lsn, ) +from fixtures.pageserver.types import parse_layer_file_name from fixtures.pageserver.utils import wait_for_upload from fixtures.remote_storage import RemoteStorageKind @@ -57,9 +58,9 @@ def test_basic_eviction( for sk in env.safekeepers: sk.stop() - timeline_path = env.pageserver.timeline_dir(tenant_id, timeline_id) - initial_local_layers = sorted( - list(filter(lambda path: path.name != "metadata", timeline_path.glob("*"))) + initial_local_layers = dict( + (parse_layer_file_name(path.name), path) + for path in env.pageserver.list_layers(tenant_id, timeline_id) ) assert ( len(initial_local_layers) > 1 @@ -73,6 +74,7 @@ def test_basic_eviction( assert len(initial_local_layers) == len( initial_layer_map_info.historic_layers ), "Should have the same layers in memory and on disk" + for returned_layer in initial_layer_map_info.historic_layers: assert ( returned_layer.kind == "Delta" @@ -81,27 +83,29 @@ def test_basic_eviction( not returned_layer.remote ), f"All created layers should be present locally, but got {returned_layer}" - local_layers = list( - filter(lambda layer: layer.name == returned_layer.layer_file_name, initial_local_layers) + returned_layer_name = parse_layer_file_name(returned_layer.layer_file_name) + assert ( + returned_layer_name in initial_local_layers + ), f"Did not find returned layer {returned_layer_name} in local layers {list(initial_local_layers.keys())}" + + local_layer_path = ( + env.pageserver.timeline_dir(tenant_id, timeline_id) + / initial_local_layers[returned_layer_name] ) assert ( - len(local_layers) == 1 - ), f"Did not find returned layer {returned_layer} in local layers {initial_local_layers}" - local_layer = local_layers[0] - assert ( - returned_layer.layer_file_size == local_layer.stat().st_size - ), f"Returned layer {returned_layer} has a different file size than local layer {local_layer}" + returned_layer.layer_file_size == local_layer_path.stat().st_size + ), f"Returned layer {returned_layer} has a different file size than local layer {local_layer_path}" # Detach all layers, ensre they are not in the local FS, but are still dumped as part of the layer map - for local_layer in initial_local_layers: + for local_layer_name, local_layer_path in initial_local_layers.items(): client.evict_layer( - tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer.name + tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer_path.name ) - assert not any( - new_local_layer.name == local_layer.name for new_local_layer in timeline_path.glob("*") - ), f"Did not expect to find {local_layer} layer after evicting" + assert not env.pageserver.layer_exists( + tenant_id, timeline_id, local_layer_name + ), f"Did not expect to find {local_layer_name} layer after evicting" - empty_layers = list(filter(lambda path: path.name != "metadata", timeline_path.glob("*"))) + empty_layers = env.pageserver.list_layers(tenant_id, timeline_id) assert not empty_layers, f"After evicting all layers, timeline {tenant_id}/{timeline_id} should have no layers locally, but got: {empty_layers}" evicted_layer_map_info = client.layer_map_info(tenant_id=tenant_id, timeline_id=timeline_id) @@ -118,15 +122,15 @@ def test_basic_eviction( assert ( returned_layer.remote ), f"All layers should be evicted and not present locally, but got {returned_layer}" - assert any( - local_layer.name == returned_layer.layer_file_name - for local_layer in initial_local_layers + returned_layer_name = parse_layer_file_name(returned_layer.layer_file_name) + assert ( + returned_layer_name in initial_local_layers ), f"Did not find returned layer {returned_layer} in local layers {initial_local_layers}" # redownload all evicted layers and ensure the initial state is restored - for local_layer in initial_local_layers: + for local_layer_name, _local_layer_path in initial_local_layers.items(): client.download_layer( - tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer.name + tenant_id=tenant_id, timeline_id=timeline_id, layer_name=local_layer_name.to_str() ) client.timeline_download_remote_layers( tenant_id, @@ -137,8 +141,9 @@ def test_basic_eviction( at_least_one_download=False, ) - redownloaded_layers = sorted( - list(filter(lambda path: path.name != "metadata", timeline_path.glob("*"))) + redownloaded_layers = dict( + (parse_layer_file_name(path.name), path) + for path in env.pageserver.list_layers(tenant_id, timeline_id) ) assert ( redownloaded_layers == initial_local_layers diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py index f957bea156..adcf7de8d4 100644 --- a/test_runner/regress/test_pageserver_generations.py +++ b/test_runner/regress/test_pageserver_generations.py @@ -10,6 +10,7 @@ of the pageserver are: """ import enum +import os import re import time from typing import Optional @@ -700,3 +701,50 @@ def test_multi_attach( # All data we wrote while multi-attached remains readable workload.validate(pageservers[2].id) + + +@pytest.mark.skip(reason="To be enabled after release with new local path style") +def test_upgrade_generationless_local_file_paths( + neon_env_builder: NeonEnvBuilder, +): + """ + Test pageserver behavior when startup up with local layer paths without + generation numbers: it should accept these layer files, and avoid doing + a delete/download cycle on them. + """ + env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF) + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + + workload = Workload(env, tenant_id, timeline_id) + workload.init() + workload.write_rows(1000) + + env.pageserver.stop() + + # Rename the local paths to legacy format, to simulate what + # we would see when upgrading + timeline_dir = env.pageserver.timeline_dir(tenant_id, timeline_id) + files_renamed = 0 + for filename in os.listdir(timeline_dir): + path = os.path.join(timeline_dir, filename) + log.info(f"Found file {path}") + if path.endswith("-00000001"): + new_path = path[:-9] + os.rename(path, new_path) + log.info(f"Renamed {path} -> {new_path}") + files_renamed += 1 + + assert files_renamed > 0 + + env.pageserver.start() + + workload.validate() + + # Assert that there were no on-demand downloads + assert ( + env.pageserver.http_client().get_metric_value( + "pageserver_remote_ondemand_downloaded_layers_total" + ) + == 0 + ) diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index 8f194e5dda..c40bb962f2 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -2,12 +2,12 @@ import json import os import random import time -from pathlib import Path from typing import Any, Dict, Optional import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver, S3Scrubber +from fixtures.pageserver.types import parse_layer_file_name from fixtures.pageserver.utils import ( assert_prefix_empty, poll_for_remote_storage_iterations, @@ -51,9 +51,13 @@ def evict_random_layers( if "ephemeral" in layer.name or "temp_download" in layer.name: continue + layer_name = parse_layer_file_name(layer.name) + if rng.choice([True, False]): - log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.name}") - client.evict_layer(tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.name) + log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer_name.to_str()}") + client.evict_layer( + tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer_name.to_str() + ) @pytest.mark.parametrize("seed", [1, 2, 3]) @@ -402,32 +406,6 @@ def test_heatmap_uploads(neon_env_builder: NeonEnvBuilder): validate_heatmap(heatmap_second) -def list_layers(pageserver, tenant_id: TenantId, timeline_id: TimelineId) -> list[Path]: - """ - Inspect local storage on a pageserver to discover which layer files are present. - - :return: list of relative paths to layers, from the timeline root. - """ - timeline_path = pageserver.timeline_dir(tenant_id, timeline_id) - - def relative(p: Path) -> Path: - return p.relative_to(timeline_path) - - return sorted( - list( - map( - relative, - filter( - lambda path: path.name != "metadata" - and "ephemeral" not in path.name - and "temp" not in path.name, - timeline_path.glob("*"), - ), - ) - ) - ) - - def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): """ Test the overall data flow in secondary mode: @@ -482,8 +460,8 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): ps_secondary.http_client().tenant_secondary_download(tenant_id) - assert list_layers(ps_attached, tenant_id, timeline_id) == list_layers( - ps_secondary, tenant_id, timeline_id + assert ps_attached.list_layers(tenant_id, timeline_id) == ps_secondary.list_layers( + tenant_id, timeline_id ) # Make changes on attached pageserver, check secondary downloads them @@ -500,8 +478,8 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): ps_secondary.http_client().tenant_secondary_download(tenant_id) try: - assert list_layers(ps_attached, tenant_id, timeline_id) == list_layers( - ps_secondary, tenant_id, timeline_id + assert ps_attached.list_layers(tenant_id, timeline_id) == ps_secondary.list_layers( + tenant_id, timeline_id ) except: # Do a full listing of the secondary location on errors, to help debug of @@ -523,8 +501,8 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): # ================================================================== try: log.info("Evicting a layer...") - layer_to_evict = list_layers(ps_attached, tenant_id, timeline_id)[0] - some_other_layer = list_layers(ps_attached, tenant_id, timeline_id)[1] + layer_to_evict = ps_attached.list_layers(tenant_id, timeline_id)[0] + some_other_layer = ps_attached.list_layers(tenant_id, timeline_id)[1] log.info(f"Victim layer: {layer_to_evict.name}") ps_attached.http_client().evict_layer( tenant_id, timeline_id, layer_name=layer_to_evict.name @@ -537,13 +515,13 @@ def test_secondary_downloads(neon_env_builder: NeonEnvBuilder): layer["name"] for layer in heatmap_after_eviction["timelines"][0]["layers"] ) assert layer_to_evict.name not in heatmap_layers - assert some_other_layer.name in heatmap_layers + assert parse_layer_file_name(some_other_layer.name).to_str() in heatmap_layers ps_secondary.http_client().tenant_secondary_download(tenant_id) - assert layer_to_evict not in list_layers(ps_attached, tenant_id, timeline_id) - assert list_layers(ps_attached, tenant_id, timeline_id) == list_layers( - ps_secondary, tenant_id, timeline_id + assert layer_to_evict not in ps_attached.list_layers(tenant_id, timeline_id) + assert ps_attached.list_layers(tenant_id, timeline_id) == ps_secondary.list_layers( + tenant_id, timeline_id ) except: # On assertion failures, log some details to help with debugging @@ -630,7 +608,7 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder): for timeline_id in timelines: log.info(f"Checking for secondary timeline {timeline_id} on node {ps_secondary.id}") # One or more layers should be present for all timelines - assert list_layers(ps_secondary, tenant_id, timeline_id) + assert ps_secondary.list_layers(tenant_id, timeline_id) # Delete the second timeline: this should be reflected later on the secondary env.storage_controller.pageserver_api().timeline_delete(tenant_id, timelines[1]) @@ -645,10 +623,10 @@ def test_secondary_background_downloads(neon_env_builder: NeonEnvBuilder): ps_secondary = next(p for p in env.pageservers if p != ps_attached) # This one was not deleted - assert list_layers(ps_secondary, tenant_id, timelines[0]) + assert ps_secondary.list_layers(tenant_id, timelines[0]) # This one was deleted - assert not list_layers(ps_secondary, tenant_id, timelines[1]) + assert not ps_secondary.list_layers(tenant_id, timelines[1]) t_end = time.time() @@ -708,7 +686,7 @@ def test_slow_secondary_downloads(neon_env_builder: NeonEnvBuilder, via_controll ps_attached.http_client().timeline_checkpoint(tenant_id, timeline_id) # Expect lots of layers - assert len(list_layers(ps_attached, tenant_id, timeline_id)) > 10 + assert len(ps_attached.list_layers(tenant_id, timeline_id)) > 10 # Simulate large data by making layer downloads artifically slow for ps in env.pageservers: diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index ad4b4a42f1..70c025c225 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -12,6 +12,7 @@ from fixtures.neon_fixtures import ( wait_for_last_flush_lsn, ) from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient +from fixtures.pageserver.types import parse_layer_file_name from fixtures.pageserver.utils import ( timeline_delete_wait_completed, wait_for_last_record_lsn, @@ -829,8 +830,9 @@ def test_compaction_waits_for_upload( assert len(upload_stuck_layers) > 0 for name in upload_stuck_layers: - path = env.pageserver.timeline_dir(tenant_id, timeline_id) / name - assert path.exists(), "while uploads are stuck the layers should be present on disk" + assert env.pageserver.layer_exists( + tenant_id, timeline_id, parse_layer_file_name(name) + ), "while uploads are stuck the layers should be present on disk" # now this will do the L0 => L1 compaction and want to remove # upload_stuck_layers and the original initdb L0 @@ -838,8 +840,9 @@ def test_compaction_waits_for_upload( # as uploads are paused, the upload_stuck_layers should still be with us for name in upload_stuck_layers: - path = env.pageserver.timeline_dir(tenant_id, timeline_id) / name - assert path.exists(), "uploads are stuck still over compaction" + assert env.pageserver.layer_exists( + tenant_id, timeline_id, parse_layer_file_name(name) + ), "uploads are stuck still over compaction" compacted_layers = client.layer_map_info(tenant_id, timeline_id).historic_by_name() overlap = compacted_layers.intersection(upload_stuck_layers) @@ -873,9 +876,8 @@ def test_compaction_waits_for_upload( wait_until(10, 1, until_layer_deletes_completed) for name in upload_stuck_layers: - path = env.pageserver.timeline_dir(tenant_id, timeline_id) / name - assert ( - not path.exists() + assert not env.pageserver.layer_exists( + tenant_id, timeline_id, parse_layer_file_name(name) ), "l0 should now be removed because of L0 => L1 compaction and completed uploads" # We should not have hit the error handling path in uploads where a uploaded file is gone diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py index d16978d02a..a1e96928bf 100644 --- a/test_runner/regress/test_tenants_with_remote_storage.py +++ b/test_runner/regress/test_tenants_with_remote_storage.py @@ -18,6 +18,7 @@ from fixtures.neon_fixtures import ( NeonEnvBuilder, last_flush_lsn_upload, ) +from fixtures.pageserver.types import parse_layer_file_name from fixtures.pageserver.utils import ( assert_tenant_state, wait_for_last_record_lsn, @@ -246,7 +247,10 @@ def test_tenant_redownloads_truncated_file_on_startup( # ensure the same size is found from the index_part.json index_part = env.pageserver_remote_storage.index_content(tenant_id, timeline_id) - assert index_part["layer_metadata"][path.name]["file_size"] == expected_size + assert ( + index_part["layer_metadata"][parse_layer_file_name(path.name).to_str()]["file_size"] + == expected_size + ) ## Start the pageserver. It will notice that the file size doesn't match, and ## rename away the local file. It will be re-downloaded when it's needed. @@ -276,7 +280,7 @@ def test_tenant_redownloads_truncated_file_on_startup( # the remote side of local_layer_truncated remote_layer_path = env.pageserver_remote_storage.remote_layer_path( - tenant_id, timeline_id, path.name + tenant_id, timeline_id, parse_layer_file_name(path.name).to_str() ) # if the upload ever was ongoing, this check would be racy, but at least one diff --git a/test_runner/regress/test_timeline_detach_ancestor.py b/test_runner/regress/test_timeline_detach_ancestor.py index bc983c36ee..5abb3e28e4 100644 --- a/test_runner/regress/test_timeline_detach_ancestor.py +++ b/test_runner/regress/test_timeline_detach_ancestor.py @@ -63,7 +63,7 @@ def test_ancestor_detach_branched_from( env.pageserver.allowed_errors.extend( [ - ".*initial size calculation failed: downloading failed, possibly for shutdown" + ".*initial size calculation failed: downloading failed, possibly for shutdown", ".*failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited", ] )