diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index 1ddd156a08..8075cc38cf 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -88,6 +88,11 @@ impl RemotePath { pub fn extension(&self) -> Option<&str> { self.0.extension()?.to_str() } + + /// Unwrap the PathBuf that RemotePath wraps + pub fn take(self) -> PathBuf { + self.0 + } } /// Storage (potentially remote) API to manage its state. diff --git a/libs/utils/src/generation.rs b/libs/utils/src/generation.rs index e3943c4122..b5b373ea34 100644 --- a/libs/utils/src/generation.rs +++ b/libs/utils/src/generation.rs @@ -59,6 +59,10 @@ impl Generation { } } } + + pub fn previous(&self) -> Generation { + Generation::new(self.0 - 1) + } } impl Serialize for Generation { diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index babf047b0a..cbcd05cae7 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -497,7 +497,7 @@ async fn tenant_attach_handler( "generation attribute missing" )))? } else { - Generation::placeholder() + Generation::none() }; if let Some(remote_storage) = &state.remote_storage { diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index 8f8dba2863..42721fadfb 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -19,7 +19,7 @@ use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_ use crate::tenant::storage_layer::LayerFileName; use crate::tenant::timeline::span::debug_assert_current_span_has_tenant_and_timeline_id; use crate::tenant::Generation; -use remote_storage::{DownloadError, GenericRemoteStorage}; +use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath}; use utils::crashsafe::path_with_suffix_extension; use utils::id::{TenantId, TimelineId}; @@ -219,18 +219,14 @@ pub async fn list_remote_timelines( Ok(timeline_ids) } -pub(super) async fn download_index_part( - conf: &'static PageServerConf, +async fn do_download_index_part( + local_path: &Path, storage: &GenericRemoteStorage, tenant_id: &TenantId, timeline_id: &TimelineId, - generation: Generation, + index_generation: Generation, ) -> Result { - let local_path = conf - .metadata_path(tenant_id, timeline_id) - .with_file_name(IndexPart::FILE_NAME); - - let remote_path = remote_index_path(tenant_id, timeline_id, generation); + let remote_path = remote_index_path(tenant_id, timeline_id, index_generation); let index_part_bytes = download_retry( || async { @@ -257,6 +253,120 @@ pub(super) async fn download_index_part( Ok(index_part) } +pub(super) async fn download_index_part( + conf: &'static PageServerConf, + storage: &GenericRemoteStorage, + tenant_id: &TenantId, + timeline_id: &TimelineId, + my_generation: Generation, +) -> Result { + let local_path = conf + .metadata_path(tenant_id, timeline_id) + .with_file_name(IndexPart::FILE_NAME); + + if my_generation.is_none() { + // Operating without generations: just fetch the generation-less path + return do_download_index_part(&local_path, storage, tenant_id, timeline_id, my_generation) + .await; + } + + let previous_gen = my_generation.previous(); + let r_previous = + do_download_index_part(&local_path, storage, tenant_id, timeline_id, previous_gen).await; + + match r_previous { + Ok(index_part) => { + tracing::debug!("Found index_part from previous generation {previous_gen}"); + return Ok(index_part); + } + Err(e) => { + if matches!(e, DownloadError::NotFound) { + tracing::debug!("No index_part found from previous generation {previous_gen}, falling back to listing"); + } else { + return Err(e); + } + } + }; + + /// Given the key of an index, parse out the generation part of the name + fn parse_generation(path: RemotePath) -> Option { + let path = path.take(); + let file_name = match path.file_name() { + Some(f) => f, + None => { + // Unexpected: we should be seeing index_part.json paths only + tracing::warn!("Malformed index key {0}", path.display()); + return None; + } + }; + + let file_name_str = match file_name.to_str() { + Some(s) => s, + None => { + tracing::warn!("Malformed index key {0}", path.display()); + return None; + } + }; + + match file_name_str.split_once("-") { + Some((_, gen_suffix)) => u32::from_str_radix(gen_suffix, 16) + .map(|g| Generation::new(g)) + .ok(), + None => None, + } + } + + // Fallback: we did not find an index_part.json from the previous generation, so + // we will list all the index_part objects and pick the most recent. + let index_prefix = remote_index_path(tenant_id, timeline_id, Generation::none()); + let indices = backoff::retry( + || async { storage.list_files(Some(&index_prefix)).await }, + |_| false, + FAILED_DOWNLOAD_WARN_THRESHOLD, + FAILED_REMOTE_OP_RETRIES, + "listing index_part files", + // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066) + backoff::Cancel::new(CancellationToken::new(), || -> anyhow::Error { + unreachable!() + }), + ) + .await + .map_err(|e| DownloadError::Other(e))?; + + let mut generations: Vec<_> = indices + .into_iter() + .filter_map(|k| parse_generation(k)) + .filter(|g| g <= &my_generation) + .collect(); + + generations.sort(); + match generations.last() { + Some(g) => { + tracing::debug!("Found index_part in generation {g} (my generation {my_generation})"); + do_download_index_part(&local_path, storage, tenant_id, timeline_id, *g).await + } + None => { + // This is not an error: the timeline may be newly created, or we may be + // upgrading and have no historical index_part with a generation suffix. + // Fall back to trying to load the un-suffixed index_part.json. + tracing::info!( + "No index_part.json-* found when loading {}/{} in generation {}", + tenant_id, + timeline_id, + my_generation + ); + return do_download_index_part( + &local_path, + storage, + tenant_id, + timeline_id, + Generation::none(), + ) + .await; + } + } +} + /// Helper function to handle retries for a download operation. /// /// Remote operations can fail due to rate limits (IAM, S3), spurious network