mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-28 02:20:42 +00:00
pageserver: generation-aware storage for TenantManifest (#9555)
## Problem When tenant manifest objects are written without a generation suffix, concurrently attached pageservers may stamp on each others writes of the manifest and cause undefined behavior. Closes: #9543 ## Summary of changes - Use download_generation_object helper when reading manifests, to search for the most recent generation - Use Tenant::generation as the generation suffix when writing manifests.
This commit is contained in:
@@ -1352,14 +1352,15 @@ impl Tenant {
|
||||
)
|
||||
.await?;
|
||||
let (offloaded_add, tenant_manifest) =
|
||||
match remote_timeline_client::do_download_tenant_manifest(
|
||||
match remote_timeline_client::download_tenant_manifest(
|
||||
remote_storage,
|
||||
&self.tenant_shard_id,
|
||||
self.generation,
|
||||
&cancel,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok((tenant_manifest, _generation)) => (
|
||||
Ok((tenant_manifest, _generation, _manifest_mtime)) => (
|
||||
format!("{} offloaded", tenant_manifest.offloaded_timelines.len()),
|
||||
tenant_manifest,
|
||||
),
|
||||
@@ -3130,8 +3131,6 @@ impl Tenant {
|
||||
}
|
||||
|
||||
let tenant_manifest = self.build_tenant_manifest();
|
||||
// TODO: generation support
|
||||
let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
|
||||
for child_shard in child_shards {
|
||||
tracing::info!(
|
||||
"Uploading tenant manifest for child {}",
|
||||
@@ -3140,7 +3139,7 @@ impl Tenant {
|
||||
upload_tenant_manifest(
|
||||
&self.remote_storage,
|
||||
child_shard,
|
||||
generation,
|
||||
self.generation,
|
||||
&tenant_manifest,
|
||||
&self.cancel,
|
||||
)
|
||||
|
||||
@@ -190,6 +190,7 @@ use chrono::{NaiveDateTime, Utc};
|
||||
pub(crate) use download::download_initdb_tar_zst;
|
||||
use pageserver_api::models::TimelineArchivalState;
|
||||
use pageserver_api::shard::{ShardIndex, TenantShardId};
|
||||
use regex::Regex;
|
||||
use scopeguard::ScopeGuard;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::backoff::{
|
||||
@@ -199,7 +200,7 @@ use utils::pausable_failpoint;
|
||||
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::sync::{Arc, Mutex, OnceLock};
|
||||
use std::time::Duration;
|
||||
|
||||
use remote_storage::{
|
||||
@@ -245,7 +246,7 @@ use super::upload_queue::{NotInitialized, SetDeletedFlagProgress};
|
||||
use super::Generation;
|
||||
|
||||
pub(crate) use download::{
|
||||
do_download_tenant_manifest, download_index_part, is_temp_download_file,
|
||||
download_index_part, download_tenant_manifest, is_temp_download_file,
|
||||
list_remote_tenant_shards, list_remote_timelines,
|
||||
};
|
||||
pub(crate) use index::LayerFileMetadata;
|
||||
@@ -274,12 +275,6 @@ pub(crate) const BUFFER_SIZE: usize = 32 * 1024;
|
||||
/// which we warn and skip.
|
||||
const DELETION_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Hardcode a generation for the tenant manifest for now so that we don't
|
||||
/// need to deal with generation-less manifests in the future.
|
||||
///
|
||||
/// TODO: add proper generation support to all the places that use this.
|
||||
pub(crate) const TENANT_MANIFEST_GENERATION: Generation = Generation::new(1);
|
||||
|
||||
pub enum MaybeDeletedIndexPart {
|
||||
IndexPart(IndexPart),
|
||||
Deleted(IndexPart),
|
||||
@@ -2239,6 +2234,12 @@ pub fn remote_tenant_manifest_path(
|
||||
RemotePath::from_string(&path).expect("Failed to construct path")
|
||||
}
|
||||
|
||||
/// Prefix to all generations' manifest objects in a tenant shard
|
||||
pub fn remote_tenant_manifest_prefix(tenant_shard_id: &TenantShardId) -> RemotePath {
|
||||
let path = format!("tenants/{tenant_shard_id}/tenant-manifest",);
|
||||
RemotePath::from_string(&path).expect("Failed to construct path")
|
||||
}
|
||||
|
||||
pub fn remote_timelines_path(tenant_shard_id: &TenantShardId) -> RemotePath {
|
||||
let path = format!("tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}");
|
||||
RemotePath::from_string(&path).expect("Failed to construct path")
|
||||
@@ -2333,6 +2334,15 @@ pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Given the key of a tenant manifest, parse out the generation number
|
||||
pub(crate) fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option<Generation> {
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
let re = RE.get_or_init(|| Regex::new(r".+tenant-manifest-([0-9a-f]{8}).json").unwrap());
|
||||
re.captures(path.get_path().as_str())
|
||||
.and_then(|c| c.get(1))
|
||||
.and_then(|m| Generation::parse_suffix(m.as_str()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -20,7 +20,9 @@ use utils::backoff;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::RequestContext;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::span::{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id, debug_assert_current_span_has_tenant_id,
|
||||
};
|
||||
use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::Generation;
|
||||
@@ -36,9 +38,10 @@ use utils::pausable_failpoint;
|
||||
use super::index::{IndexPart, LayerFileMetadata};
|
||||
use super::manifest::TenantManifest;
|
||||
use super::{
|
||||
parse_remote_index_path, remote_index_path, remote_initdb_archive_path,
|
||||
remote_initdb_preserved_archive_path, remote_tenant_manifest_path, remote_tenant_path,
|
||||
FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
|
||||
parse_remote_index_path, parse_remote_tenant_manifest_path, remote_index_path,
|
||||
remote_initdb_archive_path, remote_initdb_preserved_archive_path, remote_tenant_manifest_path,
|
||||
remote_tenant_manifest_prefix, remote_tenant_path, FAILED_DOWNLOAD_WARN_THRESHOLD,
|
||||
FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
|
||||
};
|
||||
|
||||
///
|
||||
@@ -365,32 +368,34 @@ async fn do_download_remote_path_retry_forever(
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn do_download_tenant_manifest(
|
||||
async fn do_download_tenant_manifest(
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
_timeline_id: Option<&TimelineId>,
|
||||
generation: Generation,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(TenantManifest, Generation), DownloadError> {
|
||||
// TODO: generation support
|
||||
let generation = super::TENANT_MANIFEST_GENERATION;
|
||||
) -> Result<(TenantManifest, Generation, SystemTime), DownloadError> {
|
||||
let remote_path = remote_tenant_manifest_path(tenant_shard_id, generation);
|
||||
|
||||
let (manifest_bytes, _manifest_bytes_mtime) =
|
||||
let (manifest_bytes, manifest_bytes_mtime) =
|
||||
do_download_remote_path_retry_forever(storage, &remote_path, cancel).await?;
|
||||
|
||||
let tenant_manifest = TenantManifest::from_json_bytes(&manifest_bytes)
|
||||
.with_context(|| format!("deserialize tenant manifest file at {remote_path:?}"))
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
Ok((tenant_manifest, generation))
|
||||
Ok((tenant_manifest, generation, manifest_bytes_mtime))
|
||||
}
|
||||
|
||||
async fn do_download_index_part(
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
timeline_id: Option<&TimelineId>,
|
||||
index_generation: Generation,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {
|
||||
let timeline_id =
|
||||
timeline_id.expect("A timeline ID is always provided when downloading an index");
|
||||
let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation);
|
||||
|
||||
let (index_part_bytes, index_part_mtime) =
|
||||
@@ -426,7 +431,7 @@ async fn do_download_index_part(
|
||||
pub(crate) async fn download_generation_object<'a, T, DF, DFF, PF>(
|
||||
storage: &'a GenericRemoteStorage,
|
||||
tenant_shard_id: &'a TenantShardId,
|
||||
timeline_id: &'a TimelineId,
|
||||
timeline_id: Option<&'a TimelineId>,
|
||||
my_generation: Generation,
|
||||
what: &str,
|
||||
prefix: RemotePath,
|
||||
@@ -438,7 +443,7 @@ where
|
||||
DF: Fn(
|
||||
&'a GenericRemoteStorage,
|
||||
&'a TenantShardId,
|
||||
&'a TimelineId,
|
||||
Option<&'a TimelineId>,
|
||||
Generation,
|
||||
&'a CancellationToken,
|
||||
) -> DFF,
|
||||
@@ -446,7 +451,7 @@ where
|
||||
PF: Fn(RemotePath) -> Option<Generation>,
|
||||
T: 'static,
|
||||
{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
debug_assert_current_span_has_tenant_id();
|
||||
|
||||
if my_generation.is_none() {
|
||||
// Operating without generations: just fetch the generation-less path
|
||||
@@ -552,11 +557,13 @@ pub(crate) async fn download_index_part(
|
||||
my_generation: Generation,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let index_prefix = remote_index_path(tenant_shard_id, timeline_id, Generation::none());
|
||||
download_generation_object(
|
||||
storage,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
Some(timeline_id),
|
||||
my_generation,
|
||||
"index_part",
|
||||
index_prefix,
|
||||
@@ -567,6 +574,28 @@ pub(crate) async fn download_index_part(
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn download_tenant_manifest(
|
||||
storage: &GenericRemoteStorage,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
my_generation: Generation,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(TenantManifest, Generation, SystemTime), DownloadError> {
|
||||
let manifest_prefix = remote_tenant_manifest_prefix(tenant_shard_id);
|
||||
|
||||
download_generation_object(
|
||||
storage,
|
||||
tenant_shard_id,
|
||||
None,
|
||||
my_generation,
|
||||
"tenant-manifest",
|
||||
manifest_prefix,
|
||||
do_download_tenant_manifest,
|
||||
parse_remote_tenant_manifest_path,
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn download_initdb_tar_zst(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &GenericRemoteStorage,
|
||||
|
||||
Reference in New Issue
Block a user