From 4ef74215e1174186c7ab8cdb41d98cb9a327d07d Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 29 Oct 2024 13:00:03 +0000 Subject: [PATCH] pageserver: refactor generation-aware loading code into generic (#9545) ## Problem Indices used to be the only kind of object where we had to search across generations to find the most recent one. As of https://github.com/neondatabase/neon/issues/9543, manifests will need the same treatment. ## Summary of changes - Refactor download_index_part to a generic download_generation_object function, which will be usable for downloading manifest objects as well. --- .../tenant/remote_timeline_client/download.rs | 139 ++++++++++++------ 1 file changed, 91 insertions(+), 48 deletions(-) diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index 95f8f026d4..8679c68a27 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -403,59 +403,79 @@ async fn do_download_index_part( Ok((index_part, index_generation, index_part_mtime)) } -/// index_part.json objects are suffixed with a generation number, so we cannot -/// directly GET the latest index part without doing some probing. +/// Metadata objects are "generationed", meaning that they include a generation suffix. This +/// function downloads the object with the highest generation <= `my_generation`. /// -/// In this function we probe for the most recent index in a generation <= our current generation. -/// See "Finding the remote indices for timelines" in docs/rfcs/025-generation-numbers.md +/// Data objects (layer files) also include a generation in their path, but there is no equivalent +/// search process, because their reference from an index includes the generation. +/// +/// An expensive object listing operation is only done if necessary: the typical fast path is to issue two +/// GET operations, one to our own generation (stale attachment case), and one to the immediately preceding +/// generation (normal case when migrating/restarting). Only if both of these return 404 do we fall back +/// to listing objects. +/// +/// * `my_generation`: the value of `[crate::tenant::Tenant::generation]` +/// * `what`: for logging, what object are we downloading +/// * `prefix`: when listing objects, use this prefix (i.e. the part of the object path before the generation) +/// * `do_download`: a GET of the object in a particular generation, which should **retry indefinitely** unless +/// `cancel`` has fired. This function does not do its own retries of GET operations, and relies +/// on the function passed in to do so. +/// * `parse_path`: parse a fully qualified remote storage path to get the generation of the object. +#[allow(clippy::too_many_arguments)] #[tracing::instrument(skip_all, fields(generation=?my_generation))] -pub(crate) async fn download_index_part( - storage: &GenericRemoteStorage, - tenant_shard_id: &TenantShardId, - timeline_id: &TimelineId, +pub(crate) async fn download_generation_object<'a, T, DF, DFF, PF>( + storage: &'a GenericRemoteStorage, + tenant_shard_id: &'a TenantShardId, + timeline_id: &'a TimelineId, my_generation: Generation, - cancel: &CancellationToken, -) -> Result<(IndexPart, Generation, SystemTime), DownloadError> { + what: &str, + prefix: RemotePath, + do_download: DF, + parse_path: PF, + cancel: &'a CancellationToken, +) -> Result<(T, Generation, SystemTime), DownloadError> +where + DF: Fn( + &'a GenericRemoteStorage, + &'a TenantShardId, + &'a TimelineId, + Generation, + &'a CancellationToken, + ) -> DFF, + DFF: Future>, + PF: Fn(RemotePath) -> Option, + T: 'static, +{ debug_assert_current_span_has_tenant_and_timeline_id(); if my_generation.is_none() { // Operating without generations: just fetch the generation-less path - return do_download_index_part( - storage, - tenant_shard_id, - timeline_id, - my_generation, - cancel, - ) - .await; + return do_download(storage, tenant_shard_id, timeline_id, my_generation, cancel).await; } - // Stale case: If we were intentionally attached in a stale generation, there may already be a remote - // index in our generation. + // Stale case: If we were intentionally attached in a stale generation, the remote object may already + // exist in our generation. // // This is an optimization to avoid doing the listing for the general case below. - let res = - do_download_index_part(storage, tenant_shard_id, timeline_id, my_generation, cancel).await; + let res = do_download(storage, tenant_shard_id, timeline_id, my_generation, cancel).await; match res { - Ok(index_part) => { - tracing::debug!( - "Found index_part from current generation (this is a stale attachment)" - ); - return Ok(index_part); + Ok(decoded) => { + tracing::debug!("Found {what} from current generation (this is a stale attachment)"); + return Ok(decoded); } Err(DownloadError::NotFound) => {} Err(e) => return Err(e), }; - // Typical case: the previous generation of this tenant was running healthily, and had uploaded - // and index part. We may safely start from this index without doing a listing, because: + // Typical case: the previous generation of this tenant was running healthily, and had uploaded the object + // we are seeking in that generation. We may safely start from this index without doing a listing, because: // - We checked for current generation case above // - generations > my_generation are to be ignored - // - any other indices that exist would have an older generation than `previous_gen`, and - // we want to find the most recent index from a previous generation. + // - any other objects that exist would have an older generation than `previous_gen`, and + // we want to find the most recent object from a previous generation. // // This is an optimization to avoid doing the listing for the general case below. - let res = do_download_index_part( + let res = do_download( storage, tenant_shard_id, timeline_id, @@ -464,14 +484,12 @@ pub(crate) async fn download_index_part( ) .await; match res { - Ok(index_part) => { - tracing::debug!("Found index_part from previous generation"); - return Ok(index_part); + Ok(decoded) => { + tracing::debug!("Found {what} from previous generation"); + return Ok(decoded); } Err(DownloadError::NotFound) => { - tracing::debug!( - "No index_part found from previous generation, falling back to listing" - ); + tracing::debug!("No {what} found from previous generation, falling back to listing"); } Err(e) => { return Err(e); @@ -481,12 +499,10 @@ pub(crate) async fn download_index_part( // General case/fallback: if there is no index at my_generation or prev_generation, then list all index_part.json // objects, and select the highest one with a generation <= my_generation. Constructing the prefix is equivalent // to constructing a full index path with no generation, because the generation is a suffix. - let index_prefix = remote_index_path(tenant_shard_id, timeline_id, Generation::none()); - - let indices = download_retry( + let paths = download_retry( || async { storage - .list(Some(&index_prefix), ListingMode::NoDelimiter, None, cancel) + .list(Some(&prefix), ListingMode::NoDelimiter, None, cancel) .await }, "list index_part files", @@ -497,22 +513,22 @@ pub(crate) async fn download_index_part( // General case logic for which index to use: the latest index whose generation // is <= our own. See "Finding the remote indices for timelines" in docs/rfcs/025-generation-numbers.md - let max_previous_generation = indices + let max_previous_generation = paths .into_iter() - .filter_map(|o| parse_remote_index_path(o.key)) + .filter_map(|o| parse_path(o.key)) .filter(|g| g <= &my_generation) .max(); match max_previous_generation { Some(g) => { - tracing::debug!("Found index_part in generation {g:?}"); - do_download_index_part(storage, tenant_shard_id, timeline_id, g, cancel).await + tracing::debug!("Found {what} in generation {g:?}"); + do_download(storage, tenant_shard_id, timeline_id, g, cancel).await } None => { // Migration from legacy pre-generation state: we have a generation but no prior // attached pageservers did. Try to load from a no-generation path. - tracing::debug!("No index_part.json* found"); - do_download_index_part( + tracing::debug!("No {what}* found"); + do_download( storage, tenant_shard_id, timeline_id, @@ -524,6 +540,33 @@ pub(crate) async fn download_index_part( } } +/// index_part.json objects are suffixed with a generation number, so we cannot +/// directly GET the latest index part without doing some probing. +/// +/// In this function we probe for the most recent index in a generation <= our current generation. +/// See "Finding the remote indices for timelines" in docs/rfcs/025-generation-numbers.md +pub(crate) async fn download_index_part( + storage: &GenericRemoteStorage, + tenant_shard_id: &TenantShardId, + timeline_id: &TimelineId, + my_generation: Generation, + cancel: &CancellationToken, +) -> Result<(IndexPart, Generation, SystemTime), DownloadError> { + let index_prefix = remote_index_path(tenant_shard_id, timeline_id, Generation::none()); + download_generation_object( + storage, + tenant_shard_id, + timeline_id, + my_generation, + "index_part", + index_prefix, + do_download_index_part, + parse_remote_index_path, + cancel, + ) + .await +} + pub(crate) async fn download_initdb_tar_zst( conf: &'static PageServerConf, storage: &GenericRemoteStorage,